In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# **Heart Rate Monitor**

In [2]:
heartrate_second = pd.read_csv('../input/bellabeat/heartrate_seconds_merged.csv')
heartrate_second.head()

In [3]:
heartrate_second.sort_values(['Id', 'Time'])

In [4]:
hrt_id = heartrate_second['Id'].unique()
heartrate_second['Time'] = pd.to_datetime(heartrate_second['Time'])
print(hrt_id)

In [5]:
fmin_hrt = heartrate_second[(heartrate_second['Time'].dt.minute).astype(int) % 5 == 0]

fmin_hrt['hour'] = fmin_hrt['Time'].dt.hour.astype(int)
fmin_hrt['minute'] = fmin_hrt['Time'].dt.minute.astype(int)

fmin_hrt = fmin_hrt.groupby(['Id', 'hour', 'minute']).mean()
fmin_hrt = fmin_hrt.reset_index()
fmin_hrt['Time'] = fmin_hrt['hour'] + fmin_hrt['minute']/100
print(fmin_hrt)

In [6]:
col = 3
row = int(len(hrt_id) / col) if (len(hrt_id)%col==0) else int(len(hrt_id) / col) + 1
#define subplots
fig, ax = plt.subplots(row, col, figsize=(col * 10,row * 4))
fig.tight_layout()
for a in ax.flat:
    a.set(xlabel = 'Time (Hours)', ylabel = 'Heart Rate (BPM)')
    a.label_outer()
    
a = 0
for id in hrt_id:
    x = fmin_hrt.loc[(fmin_hrt['Id'] == id), 'Time']
    y = fmin_hrt.loc[(fmin_hrt['Id'] == id), 'Value']
    ax[int(a/col), a%col].plot(x, y)
    ax[int(a/col), a%col].title.set_text(id)
    a += 1


# **Daily Step**

In [7]:
hourly_step = pd.read_csv('../input/bellabeat/hourlySteps_merged.csv')
step_id = hourly_step['Id'].unique()
hourly_step['ActivityHour'] = pd.to_datetime(hourly_step['ActivityHour'])

In [8]:
hourly_step['hour'] = hourly_step['ActivityHour'].dt.hour
daily_step = hourly_step.groupby(['Id', 'hour']).mean()
daily_step = daily_step.reset_index()

In [9]:
col = 3
row = int(len(step_id) / col) if (len(step_id)%col==0) else int(len(step_id) / col) + 1

#define subplots
fig, ax = plt.subplots(row, col, figsize=(col * 10, row * 4))
fig.tight_layout()
for a in ax.flat:
    a.set(xlabel = 'Time (Hours)', ylabel = 'Step Count')
    a.label_outer()
    
a = 0
for id in step_id:
    x = daily_step.loc[(daily_step['Id'] == id), 'hour']
    y = daily_step.loc[(daily_step['Id'] == id), 'StepTotal']
    ax[int(a/col), a%col].plot(x, y)
    ax[int(a/col), a%col].title.set_text(id)
    a += 1


In [10]:
daily_step = pd.read_csv('../input/bellabeat/dailySteps_merged.csv')

#rename column
daily_step.columns = daily_step.columns.str.replace('ActivityDay','Date')

ids = daily_step['Id'].unique()
daily_step = daily_step.groupby(['Id', 'Date']).mean()
daily_step = daily_step.reset_index()

row = 11
col = 3
#define subplots
fig, ax = plt.subplots(row, col, figsize=(30,40))
fig.autofmt_xdate(rotation=90)
fig.tight_layout()
for a in ax.flat:
    a.set(xlabel = 'Time (Hours)', ylabel = 'Step Count')
    a.label_outer()
    
a = 0
for id in ids:
    x = daily_step.loc[(daily_step['Id'] == id), 'Date']
    y = daily_step.loc[(daily_step['Id'] == id), 'StepTotal']
    ax[int(a/col), a%col].scatter(x, y)
    ax[int(a/col), a%col].title.set_text(id)
    a += 1


# **Daily Activity**

In [11]:
daily_activity = pd.read_csv('../input/bellabeat/dailyActivity_merged.csv')
daily_activity.columns = daily_activity.columns.str.replace('ActivityDate', 'Date')   #rename column from "ActivityDate" to "Date"
daily_activity['Date'] = pd.to_datetime(daily_activity['Date'])                       #convert string to datetime
daily_id = daily_activity['Id'].unique()                                              #get category id(unique Id)


row = 11
col = 3
#define subplots
fig, ax = plt.subplots(row, col, figsize=(30,40))
fig.autofmt_xdate(rotation=90)
fig.tight_layout()
for a in ax.flat:
    a.set(xlabel = 'Time (Hours)', ylabel = 'Activity')
    a.label_outer()
    
a = 0
for id in daily_id:
    x = daily_activity.loc[(daily_activity['Id'] == id), 'Date']
    y_light = daily_activity.loc[(daily_activity['Id'] == id), 'LightlyActiveMinutes']
    y_fair = daily_activity.loc[(daily_activity['Id'] == id), 'FairlyActiveMinutes']
    y_active = daily_activity.loc[(daily_activity['Id'] == id), 'VeryActiveMinutes']
    y_seden = daily_activity.loc[(daily_activity['Id'] == id), 'SedentaryMinutes'] 
    ax[int(a/col), a%col].plot(x, y_light, color = 'purple')
    ax[int(a/col), a%col].plot(x, y_fair, color = 'blue')
    ax[int(a/col), a%col].plot(x, y_active, color = 'yellow')
    ax[int(a/col), a%col].plot(x, y_seden, color = 'red')
    ax[int(a/col), a%col].title.set_text(id)
    a += 1

# **Sleep Tracking**

In [12]:
daily_sleep = pd.read_csv('../input/bellabeat/sleepDay_merged.csv')
#rename
daily_sleep.columns = daily_sleep.columns.str.replace('SleepDay', 'Date')

daily_sleep['Date'] = pd.to_datetime(daily_sleep['Date'])
print(daily_sleep)

In [13]:
from sklearn.linear_model import LinearRegression
#define subplots

fig, ax = plt.subplots(4, 4, figsize=(20,12))
fig.autofmt_xdate(rotation=90)
fig.tight_layout()

for a in ax.flat:
    a.set(xlabel = 'Time (Hours)', ylabel = 'Step Count')
    a.label_outer()
    
a = 0
for id in hrt_id: 
    x = daily_sleep.loc[(daily_sleep['Id'] == id), 'Date']
    y = daily_sleep.loc[(daily_sleep['Id'] == id), 'TotalMinutesAsleep']
    
    ax[int(a/4), a%4].scatter(x, y)
    ax[int(a/4), a%4].title.set_text(id)
    a += 1

# **Average Tracking each ID**

Merging dailyActivity table with dailyCalories by using Id and Date

In [14]:
merged_data = daily_activity.merge(daily_sleep, on = ['Id', 'Date'], how = 'outer')
merged_data.head()

In [15]:
avg_data = merged_data.groupby(['Id']).mean()
avg_data = avg_data.reset_index()
avg_data.head()

In [16]:
sorted_data = avg_data.sort_values('TotalSteps', ascending = False)
x = sorted_data.Id.astype(str)
y = sorted_data.TotalSteps

fig = plt.figure(figsize = (15, 6))
 
# creating the bar plot
plt.bar(x, y, color ='blue',
        width = 0.8)
plt.xticks(rotation = 45)
plt.title("Average Total Step Count")
plt.xlabel("Id")
plt.ylabel("Step Count")
plt.show()

In [17]:
count = 0
for data in sorted_data.TotalSteps:
    count = count + 1 if data > 10000 else count
print( str(round(count / len(sorted_data.Id) * 100, 2)) + "% of users that walk more than 10,000 step per day.")

In [18]:
x = sorted_data.Id.astype(str)
y = sorted_data.Calories
# creating the bar plot
fig = plt.figure(figsize = (15, 6))
plt.bar(x, y, color ='blue',
        width = 0.8)
plt.xticks(rotation = 45)
plt.title("Average Daily Calories")
plt.xlabel("Id")
plt.ylabel("Calories")
plt.show()

In [19]:
count = 0
for data in sorted_data.Calories:
    count = count + 1 if data < 2000 else count
print(str(round(count/ len(sorted_data.Id) * 100, 2)) + "% of users burning < 2,000 cal/day")

In [20]:
x = sorted_data.Id.astype(str)
y = sorted_data.TotalMinutesAsleep
# creating the bar plot
fig = plt.figure(figsize = (15, 6))
plt.bar(x, y, color ='blue',
        width = 0.8)
plt.xticks(rotation = 45)
plt.title("Average Daily Minute Asleep")
plt.xlabel("Id")
plt.ylabel("Minute Asleep")
plt.show()

In [21]:
active_data = sorted_data[['Id', 'SedentaryMinutes', 'LightlyActiveMinutes', 'FairlyActiveMinutes', 'VeryActiveMinutes']]

In [22]:
active_data.plot(x="Id", y=['LightlyActiveMinutes', 'FairlyActiveMinutes', 'VeryActiveMinutes'], kind="bar",figsize=(20,6))
plt.title("Average Daily Active Minute")
plt.xlabel("Id")
plt.ylabel("Minute Active")
plt.show()

In [23]:
percent_active = active_data.copy()
percent_active['TotalActiveMinutes'] = active_data.SedentaryMinutes + active_data.LightlyActiveMinutes + active_data.FairlyActiveMinutes + active_data.VeryActiveMinutes 
percent_active.SedentaryMinutes = round(active_data.SedentaryMinutes / percent_active.TotalActiveMinutes * 100, 2)
percent_active.LightlyActiveMinutes = round(active_data.LightlyActiveMinutes / percent_active.TotalActiveMinutes * 100, 2)
percent_active.FairlyActiveMinutes = round(active_data.FairlyActiveMinutes / percent_active.TotalActiveMinutes * 100, 2)
percent_active.VeryActiveMinutes = round(active_data.VeryActiveMinutes / percent_active.TotalActiveMinutes * 100, 2)
percent_active.head()

In [24]:
percent_active.plot(x="Id", y=['SedentaryMinutes', 'LightlyActiveMinutes', 'FairlyActiveMinutes', 'VeryActiveMinutes'], kind="bar",figsize=(20,6))
plt.title("Percent of Daily Active Minute")
plt.xlabel("Id")
plt.ylabel("Percent Minute Active")
plt.show()

In [25]:
avg_seden = percent_active.SedentaryMinutes.mean()
avg_light = percent_active.LightlyActiveMinutes.mean()
avg_fair = percent_active.FairlyActiveMinutes.mean()
avg_very = percent_active.VeryActiveMinutes.mean()
total = avg_seden + avg_light + avg_fair + avg_very

# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = 'SedentaryMinutes', 'FairlyActiveMinutes', 'LightlyActiveMinutes', 'VeryActiveMinutes'
sizes = [avg_seden, avg_fair, avg_light, avg_very]
explode = (0, 0, 0, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')

fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()
print(total)


In [26]:
weight_log = pd.read_csv('../input/bellabeat/weightLogInfo_merged.csv')
latest_weight_log = weight_log.groupby(['Id'])['Date'].max().reset_index().merge(weight_log, on = ['Id', 'Date'], how = 'inner')
first_weight_log = weight_log.groupby(['Id'])['Date'].min().reset_index().merge(weight_log, on = ['Id', 'Date'], how = 'inner')

fig = plt.figure(figsize = (15, 6))
plt.bar(latest_weight_log.Id.astype(str), latest_weight_log.WeightKg,width = 0.5)
plt.xticks(rotation = 90)
plt.title("Lastest Weight (Kg)")
plt.xlabel("Id")
plt.ylabel("Weight (Kg)")
plt.show()

In [27]:
fig = plt.figure(figsize = (15, 6))
plt.bar(latest_weight_log.Id.astype(str), latest_weight_log.BMI,width = 0.5)
plt.xticks(rotation = 90)
plt.title("Lastest BMI")
plt.xlabel("Id")
plt.ylabel("BMI")

plt.text(7.8,42,'Extremely Obese',horizontalalignment='left', color = 'r')
plt.axhline(y=35, color='r', linestyle='-')
plt.text(7.8,32.5,'Obese',horizontalalignment='left', color = 'orange')
plt.axhline(y=30, color='orange', linestyle='-')
plt.text(7.8,27.5,'Over Weight',horizontalalignment='left', color = 'yellow')
plt.axhline(y=25, color='yellow', linestyle='-')
plt.text(7.8,21.5,'Normal',horizontalalignment='left', color = 'g')
plt.axhline(y=18.5, color='g', linestyle='-')
plt.text(7.8,8.5,'Under Weight',horizontalalignment='left', color = 'b')
plt.show()

In [28]:
count = 0
for i in latest_weight_log.BMI:
    if(i >= 25): 
        count+=1
print(str(round(count/ len(latest_weight_log.BMI) * 100, 2)) + "% of users has BMI >= 25 (Obese)")