# **GENERAL DATA**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings

# **SLEEPDAY**

In [27]:
df = pd.read_csv('/content/sleepDay_merged.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 413 entries, 0 to 412
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Id                  413 non-null    int64 
 1   SleepDay            413 non-null    object
 2   TotalSleepRecords   413 non-null    int64 
 3   TotalMinutesAsleep  413 non-null    int64 
 4   TotalTimeInBed      413 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 16.3+ KB


Understanding data

In [28]:
df.head()

Unnamed: 0,Id,SleepDay,TotalSleepRecords,TotalMinutesAsleep,TotalTimeInBed
0,1503960366,4/12/2016 12:00:00 AM,1,327,346
1,1503960366,4/13/2016 12:00:00 AM,2,384,407
2,1503960366,4/15/2016 12:00:00 AM,1,412,442
3,1503960366,4/16/2016 12:00:00 AM,2,340,367
4,1503960366,4/17/2016 12:00:00 AM,1,700,712


In [21]:
df.shape

(413, 8)

In [22]:
df.describe()

Unnamed: 0,Id,TotalSleepRecords,TotalMinutesAsleep,TotalTimeInBed
count,413.0,413.0,413.0,413.0
mean,5000979000.0,1.118644,419.467312,458.639225
std,2060360000.0,0.345521,118.344679,127.101607
min,1503960000.0,1.0,58.0,61.0
25%,3977334000.0,1.0,361.0,403.0
50%,4702922000.0,1.0,433.0,463.0
75%,6962181000.0,1.0,490.0,526.0
max,8792010000.0,3.0,796.0,961.0


In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 413 entries, 0 to 412
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Id                  413 non-null    int64 
 1   SleepDay            413 non-null    object
 2   TotalSleepRecords   413 non-null    int64 
 3   TotalMinutesAsleep  413 non-null    int64 
 4   TotalTimeInBed      413 non-null    int64 
 5   Date                413 non-null    object
 6   Time                413 non-null    object
 7   Extra_Column        413 non-null    object
dtypes: int64(4), object(4)
memory usage: 25.9+ KB


In [24]:
df.dtypes

Id                     int64
SleepDay              object
TotalSleepRecords      int64
TotalMinutesAsleep     int64
TotalTimeInBed         int64
Date                  object
Time                  object
Extra_Column          object
dtype: object

Cleaning the data

In [29]:
# Convert the 'Date' column to datetime format
df['SleepDay'] = pd.to_datetime(df['SleepDay'], format='%m/%d/%Y %I:%M:%S %p')

# Convert the 'Date' column to date only
df['SleepDay'] = df['SleepDay'].dt.strftime('%Y-%m-%d')

# Print the Data

In [30]:
df.head()

Unnamed: 0,Id,SleepDay,TotalSleepRecords,TotalMinutesAsleep,TotalTimeInBed
0,1503960366,2016-04-12,1,327,346
1,1503960366,2016-04-13,2,384,407
2,1503960366,2016-04-15,1,412,442
3,1503960366,2016-04-16,2,340,367
4,1503960366,2016-04-17,1,700,712


In [31]:
df.to_csv('SleepDay_Cleaned.csv', index=False)

# **WEIGHTLOG INFO**

In [32]:
df1 = pd.read_csv('/content/weightLogInfo_merged.csv')
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67 entries, 0 to 66
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Id              67 non-null     int64  
 1   Date            67 non-null     object 
 2   WeightKg        67 non-null     float64
 3   WeightPounds    67 non-null     float64
 4   Fat             2 non-null      float64
 5   BMI             67 non-null     float64
 6   IsManualReport  67 non-null     bool   
 7   LogId           67 non-null     int64  
dtypes: bool(1), float64(4), int64(2), object(1)
memory usage: 3.9+ KB


In [33]:
df1.head()

Unnamed: 0,Id,Date,WeightKg,WeightPounds,Fat,BMI,IsManualReport,LogId
0,1503960366,5/2/2016 11:59:59 PM,52.599998,115.963147,22.0,22.65,True,1462233599000
1,1503960366,5/3/2016 11:59:59 PM,52.599998,115.963147,,22.65,True,1462319999000
2,1927972279,4/13/2016 1:08:52 AM,133.5,294.31712,,47.540001,False,1460509732000
3,2873212765,4/21/2016 11:59:59 PM,56.700001,125.002104,,21.450001,True,1461283199000
4,2873212765,5/12/2016 11:59:59 PM,57.299999,126.324875,,21.690001,True,1463097599000


In [34]:
# Convert the 'Date' column to datetime format
df1['Date'] = pd.to_datetime(df1['Date'], format='%m/%d/%Y %I:%M:%S %p')

# Convert the 'Date' column to date only
df1['Date'] = df1['Date'].dt.strftime('%Y-%m-%d')

# Print the Data

In [35]:
df1.head()

Unnamed: 0,Id,Date,WeightKg,WeightPounds,Fat,BMI,IsManualReport,LogId
0,1503960366,2016-05-02,52.599998,115.963147,22.0,22.65,True,1462233599000
1,1503960366,2016-05-03,52.599998,115.963147,,22.65,True,1462319999000
2,1927972279,2016-04-13,133.5,294.31712,,47.540001,False,1460509732000
3,2873212765,2016-04-21,56.700001,125.002104,,21.450001,True,1461283199000
4,2873212765,2016-05-12,57.299999,126.324875,,21.690001,True,1463097599000


In [36]:
df1.to_csv('WeightlogInfo_Cleaned.csv', index=False)

# **HEARTRATE**

In [40]:
df2 = pd.read_csv('/content/heartrate_seconds_merged.csv')
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 289761 entries, 0 to 289760
Data columns (total 3 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Id      289761 non-null  int64  
 1   Time    289761 non-null  object 
 2   Value   289760 non-null  float64
dtypes: float64(1), int64(1), object(1)
memory usage: 6.6+ MB


In [38]:
df2.head()

Unnamed: 0,Id,Time,Value
0,2022484408,4/12/2016 7:21:00 AM,97.0
1,2022484408,4/12/2016 7:21:05 AM,102.0
2,2022484408,4/12/2016 7:21:10 AM,105.0
3,2022484408,4/12/2016 7:21:20 AM,103.0
4,2022484408,4/12/2016 7:21:25 AM,101.0


In [42]:
# Convert the 'Date' column to datetime format
df2['Time'] = pd.to_datetime(df2['Time'], format='%m/%d/%Y %I:%M:%S %p')

# Convert the 'Date' column to date only
df2['Time'] = df2['Time'].dt.strftime('%Y-%m-%d')

# Print the Data

In [43]:
df2.head()

Unnamed: 0,Id,Time,Value
0,2022484408,2016-04-12,97.0
1,2022484408,2016-04-12,102.0
2,2022484408,2016-04-12,105.0
3,2022484408,2016-04-12,103.0
4,2022484408,2016-04-12,101.0


In [44]:
df2.rename(columns={'Value': 'HeartRate'}, inplace=True)

In [46]:
df2.head()

Unnamed: 0,Id,Time,HeartRate
0,2022484408,2016-04-12,97.0
1,2022484408,2016-04-12,102.0
2,2022484408,2016-04-12,105.0
3,2022484408,2016-04-12,103.0
4,2022484408,2016-04-12,101.0


In [47]:
df2.to_csv('HeartRate_Cleaned.csv', index=False)