### Are there differences in activity patterns between weekdays and weekends?
1. Create a new factor variable in the dataset with two levels - "weekday" and "weekend" indicating whether a given date is a weekday or weekend day.

2. Make a plot containing a time series plot of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all weekdays or weekend days (y-axis).

In [1]:
import pandas as pd
import plotly
import plotly.express as px
#This cell's code is required when you are working with plotly on colab
plotly.io.renderers.default = 'colab'

In [2]:
# load data
df = pd.read_csv("activity.csv")
df

Unnamed: 0,steps,date,interval
0,,2012-10-01,0
1,,2012-10-01,5
2,,2012-10-01,10
3,,2012-10-01,15
4,,2012-10-01,20
...,...,...,...
17563,,2012-11-30,2335
17564,,2012-11-30,2340
17565,,2012-11-30,2345
17566,,2012-11-30,2350


In [3]:
# check data types of each column
df["steps"].dtypes

dtype('float64')

In [4]:
df["date"].dtypes

dtype('O')

In [5]:
df["interval"].dtypes

dtype('int64')

In [6]:
# as "date" has dtype Object, conversion to datetime is required
df["date"] = pd.to_datetime(df["date"])

In [7]:
# fill NaN in "steps" with 0
df["steps"].fillna(0, inplace=True)

In [8]:
# obtain day of the week as indeces; monday is denoted by 0, tueday is denoted by 1 and so on
df["day_no"] = df["date"].dt.dayofweek
df

Unnamed: 0,steps,date,interval,day_no
0,0.0,2012-10-01,0,0
1,0.0,2012-10-01,5,0
2,0.0,2012-10-01,10,0
3,0.0,2012-10-01,15,0
4,0.0,2012-10-01,20,0
...,...,...,...,...
17563,0.0,2012-11-30,2335,4
17564,0.0,2012-11-30,2340,4
17565,0.0,2012-11-30,2345,4
17566,0.0,2012-11-30,2350,4


In [9]:
# create function to return weekday and weekend using conditional statements
def day_type(row):
  if row["day_no"] in (0, 1, 2, 3, 4):
    return "weekday"
  if row["day_no"] in (5, 6):
    return "weekend"

In [10]:
# the lambda function passes each row (axis=1) under "day_no" to the day_type function. 
# and returns the result to a new column, "day"
df["day"] = df.apply(lambda row: day_type(row), axis=1)

In [11]:
df

Unnamed: 0,steps,date,interval,day_no,day
0,0.0,2012-10-01,0,0,weekday
1,0.0,2012-10-01,5,0,weekday
2,0.0,2012-10-01,10,0,weekday
3,0.0,2012-10-01,15,0,weekday
4,0.0,2012-10-01,20,0,weekday
...,...,...,...,...,...
17563,0.0,2012-11-30,2335,4,weekday
17564,0.0,2012-11-30,2340,4,weekday
17565,0.0,2012-11-30,2345,4,weekday
17566,0.0,2012-11-30,2350,4,weekday


In [12]:
# apply aggregation functions (mean, min, max) to "steps" based on "interval" and "day"
# in other words, the values is sorted by "interval" and "day"
grouped_interval = df.groupby(["interval", "day"]).agg({"steps":["mean", "min", "max"]})
grouped_interval = grouped_interval.reset_index() # "interval" and "day" becomes the new indices so we want to avoid this by reseting
grouped_interval

Unnamed: 0_level_0,interval,day,steps,steps,steps
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,min,max
0,0,weekday,2.022222,0.0,47.0
1,0,weekend,0.000000,0.0,0.0
2,5,weekday,0.400000,0.0,18.0
3,5,weekend,0.000000,0.0,0.0
4,10,weekday,0.155556,0.0,7.0
...,...,...,...,...,...
571,2345,weekend,1.625000,0.0,26.0
572,2350,weekday,0.266667,0.0,8.0
573,2350,weekend,0.000000,0.0,0.0
574,2355,weekday,1.266667,0.0,29.0


In [13]:
# graphing the dataframe
fig = px.line(
    x=grouped_interval["interval"],
    y=grouped_interval["steps"]["mean"],
    color=grouped_interval["day"]
    )
fig.update_xaxes(title_text='5-min interval')
fig.update_yaxes(title_text='Avg. no. of steps')

fig.show()