## Daily Dataset

# Importing Libraries

In [99]:
import numpy as np
import pandas as pd

## Loading Dataset

In [100]:
daily = pd.read_csv("/content/dailyActivity_merged.csv")
daily

Unnamed: 0,Id,ActivityDate,TotalSteps,TotalDistance,TrackerDistance,LoggedActivitiesDistance,VeryActiveDistance,ModeratelyActiveDistance,LightActiveDistance,SedentaryActiveDistance,VeryActiveMinutes,FairlyActiveMinutes,LightlyActiveMinutes,SedentaryMinutes,Calories
0,1503960366,4/12/2016,13162,8.500000,8.500000,0.0,1.88,0.55,6.06,0.00,25,13,328,728,1985
1,1503960366,4/13/2016,10735,6.970000,6.970000,0.0,1.57,0.69,4.71,0.00,21,19,217,776,1797
2,1503960366,4/14/2016,10460,6.740000,6.740000,0.0,2.44,0.40,3.91,0.00,30,11,181,1218,1776
3,1503960366,4/15/2016,9762,6.280000,6.280000,0.0,2.14,1.26,2.83,0.00,29,34,209,726,1745
4,1503960366,4/16/2016,12669,8.160000,8.160000,0.0,2.71,0.41,5.04,0.00,36,10,221,773,1863
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
935,8877689391,5/8/2016,10686,8.110000,8.110000,0.0,1.08,0.20,6.80,0.00,17,4,245,1174,2847
936,8877689391,5/9/2016,20226,18.250000,18.250000,0.0,11.10,0.80,6.24,0.05,73,19,217,1131,3710
937,8877689391,5/10/2016,10733,8.150000,8.150000,0.0,1.35,0.46,6.28,0.00,18,11,224,1187,2832
938,8877689391,5/11/2016,21420,19.559999,19.559999,0.0,13.22,0.41,5.89,0.00,88,12,213,1127,3832


* Checking the null values

In [101]:
daily.isnull().sum()

Id                          0
ActivityDate                0
TotalSteps                  0
TotalDistance               0
TrackerDistance             0
LoggedActivitiesDistance    0
VeryActiveDistance          0
ModeratelyActiveDistance    0
LightActiveDistance         0
SedentaryActiveDistance     0
VeryActiveMinutes           0
FairlyActiveMinutes         0
LightlyActiveMinutes        0
SedentaryMinutes            0
Calories                    0
dtype: int64

In [102]:
daily.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 940 entries, 0 to 939
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Id                        940 non-null    int64  
 1   ActivityDate              940 non-null    object 
 2   TotalSteps                940 non-null    int64  
 3   TotalDistance             940 non-null    float64
 4   TrackerDistance           940 non-null    float64
 5   LoggedActivitiesDistance  940 non-null    float64
 6   VeryActiveDistance        940 non-null    float64
 7   ModeratelyActiveDistance  940 non-null    float64
 8   LightActiveDistance       940 non-null    float64
 9   SedentaryActiveDistance   940 non-null    float64
 10  VeryActiveMinutes         940 non-null    int64  
 11  FairlyActiveMinutes       940 non-null    int64  
 12  LightlyActiveMinutes      940 non-null    int64  
 13  SedentaryMinutes          940 non-null    int64  
 14  Calories  

* Now we will change the data type of ActivityDate column to Date data type

In [103]:
daily['ActivityDate'] = pd.to_datetime(daily['ActivityDate'].str.strip())

In [104]:
daily.columns

Index(['Id', 'ActivityDate', 'TotalSteps', 'TotalDistance', 'TrackerDistance',
       'LoggedActivitiesDistance', 'VeryActiveDistance',
       'ModeratelyActiveDistance', 'LightActiveDistance',
       'SedentaryActiveDistance', 'VeryActiveMinutes', 'FairlyActiveMinutes',
       'LightlyActiveMinutes', 'SedentaryMinutes', 'Calories'],
      dtype='object')

In [105]:
daily=daily.rename(columns = {'ActivityDate':'Activity_Date', 'TotalSteps':'Total_Steps','TotalDistance':'Total_Distance','TrackerDistance':'Tracker_Distance','LoggedActivitiesDistance':'Logged_Activities_Distance','VeryActiveDistance':'Very_Active_Distance','ModeratelyActiveDistance':'Moderately_Active_Distance','LightActiveDistance':'Light_Active_Distance','SedentaryActiveDistance':'Sedentary_Active_Distance','VeryActiveMinutes':'Very_Active_Minutes','FairlyActiveMinutes':'Fairly_Active_Minutes','LightlyActiveMinutes':'Lightly_Active_Minutes','SedentaryMinutes':'Sedentary_Minutes'})

In [106]:
daily.to_csv("daily_data.csv")

##  Visualizing the data


* Creating a Correlation heatmap to show the positive and negative correlation among the columns

In [107]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

## Activity Date vs Distance Ctaegories

In [108]:
distance_columns = ['Very_Active_Distance', 'Moderately_Active_Distance', 'Light_Active_Distance', 'Sedentary_Active_Distance']
fig_distance = px.bar(daily, x='Activity_Date', y=distance_columns,
                     title='Categorical Bar Chart: Activity Date vs. Distance Categories',
                     labels={'Activity_Date': 'Activity Date'},
                     barmode='stack')
fig_distance.show()

## **Activity_Date** VS **SUM of Calories** Burned on that DAY

In [109]:
aggregated_data = daily.groupby('Activity_Date')['Calories'].sum().reset_index()
fig = px.scatter(aggregated_data, x='Activity_Date', y='Calories', title='Calories Burned Over Time')

fig.update_layout(xaxis_title='Activity Day', yaxis_title='Calories Burned')

fig.show()

## Each day sum of Activity minutes

In [119]:
aggregated_data = daily.groupby('Activity_Date').agg({
    'Sedentary_Minutes': 'sum',
    'Moderately_Active_Distance': 'sum',
    'Lightly_Active_Minutes': 'sum',
    'Very_Active_Minutes': 'sum'
}).reset_index()

aggregated_data_melted = aggregated_data.melt(id_vars=['Activity_Date'], var_name='ActivityType', value_name='Minutes')

fig = px.line(aggregated_data_melted, x='Activity_Date', y='Minutes', color='ActivityType', title='Activity Minutes Over Time')
fig.update_xaxes(title='Activity Date')
fig.update_yaxes(title='Minutes')
fig.show()


## Activity Distance Over Time

In [122]:
aggregated_data = daily.groupby('Activity_Date').agg({
    'Sedentary_Active_Distance': 'mean',
    'Light_Active_Distance': 'mean',
    'Moderately_Active_Distance': 'mean',
    'Very_Active_Distance': 'mean'
}).reset_index()


aggregated_data_melted = aggregated_data.melt(id_vars=['Activity_Date'], var_name='ActivityType', value_name='Distance')


fig = px.line(aggregated_data_melted, x='Activity_Date', y='Distance', color='ActivityType', title='Activity Distance Over Time')
fig.update_xaxes(title='Activity Day')
fig.update_yaxes(title='Distance (in miles)')
fig.show()
