In [1]:
import pandas as pd 
from IPython.display import display, HTML

import plotly.graph_objs as go
import plotly.express as px

##### workouts_df and sports_info_df are both required in order to map the name of the workout to ID

In [2]:
workouts_df = pd.read_pickle("../data/clean/records/workouts/2021-12-27_2022-08-01.csv")
sports_info_df = pd.read_csv("../data/csvs/records/sports_info.csv")

df = pd.merge(workouts_df, sports_info_df[['id','name']], how='left', left_on='sport_id', right_on='id')


In [3]:
# Filter dates 

# All data after select date
# df = df.loc[df['during_start'] > '2022-05-01'] # cjx data 

# Between two dates 
df = df.loc[df["during_start"].between("2022-05-06", "2022-07-31")]

In [4]:
# Adding columns we are going to need for graphing 
df['total_workout_time_min'] = df.tot_sec_diff / 60
df["cycle_start_dayname"] = df["cycle_start"].dt.day_name()
df["cycle_start_monthname"] = df["cycle_start"].dt.month_name()

In [5]:
display(HTML(df.head().to_html()))

Unnamed: 0,cycle_id,created_at,updated_at,sport_id,activity_id,gps_enabled,intensity_score,max_heart_rate,average_heart_rate,distance,raw_intensity_score,altitude_gain,altitude_change,cumulative_workout_intensity,projected_score,kilojoules,user_id,timezone_offset,survey_response_id,percent_recorded,auto_detected,state,responded,source,Zone1,Zone2,Zone3,Zone4,Zone5,Zone6,during_start,during_end,cycle_start,cycle_end,calories,tot_sec_diff,id,name,total_workout_time_min,cycle_start_dayname,cycle_start_monthname
269,223992931,2022-05-06 12:08:45.653,2022-05-06 12:17:15.932,63,430340515,True,4.2,106,92,380.058972,0.000141,13.950805,9.542785,0.0,0.000141,115.879616,3802464,-400,0.0,1.0,False,complete,False,user,271088,344186,0,0,0,0,2022-05-06 11:58:27.777,2022-05-06 12:08:43.208,2022-05-06,2022-05-07,27,615.431,63.0,Walking,10.257183,Friday,May
270,223992931,2022-05-06 13:14:06.415,2022-05-06 13:21:06.966,44,430361376,False,4.9221,119,89,0.0,0.000519,0.0,0.0,0.0,0.000519,524.036926,3802464,-400,0.0,1.0,False,complete,False,user,1591000,1399659,54794,0,0,0,2022-05-06 12:23:20.454,2022-05-06 13:14:06.210,2022-05-06,2022-05-07,125,3045.756,44.0,Yoga,50.7626,Friday,May
271,225582260,2022-05-10 20:41:05.561,2022-05-10 20:57:29.936,44,432896835,False,8.8529,144,117,0.0,0.002858,0.0,0.0,0.0,0.002858,1687.030273,3802464,-400,0.0,1.0,True,complete,False,user,239364,571979,1811108,708478,0,0,2022-05-10 19:45:34.684,2022-05-10 20:41:05.486,2022-05-10,2022-05-11,403,3330.802,44.0,Yoga,55.513367,Tuesday,May
272,226223555,2022-05-12 16:15:21.374,2022-05-12 16:17:51.972,45,434066249,False,8.5924,145,118,0.0,0.002683,0.0,0.0,0.0,0.002683,1519.78894,3802464,-400,0.0,1.0,False,complete,False,user,0,586396,1516977,692139,0,0,2022-05-12 15:28:45.311,2022-05-12 16:15:20.983,2022-05-12,2022-05-13,363,2795.672,45.0,Weightlifting,46.594533,Thursday,May
273,226630958,2022-05-13 18:27:13.628,2022-05-13 18:34:44.235,45,434718537,False,7.4811,142,110,0.0,0.001973,0.0,0.0,0.0,0.001973,1287.371094,3802464,-400,0.0,1.0,False,complete,False,user,28839,1351594,1383357,99975,0,0,2022-05-13 17:39:29.416,2022-05-13 18:27:13.117,2022-05-13,2022-05-14,307,2863.701,45.0,Weightlifting,47.72835,Friday,May


### Zone Duration by Day
> zone_duration() in VisableWhoop

In [6]:
zone_gb_df = df.groupby(["cycle_id", "cycle_start"])["Zone1", "Zone2", "Zone3", "Zone4", "Zone5", "Zone6"].sum().reset_index()

only_zone_df = zone_gb_df[["Zone1", "Zone2", "Zone3", "Zone4", "Zone5", "Zone6"]]

zone_gb_df["row_sum"] = only_zone_df.sum(axis=1)

zone_gb_df["Zone1_pct"] = (zone_gb_df['Zone1'] / zone_gb_df["row_sum"])
zone_gb_df["Zone2_pct"] = (zone_gb_df['Zone2'] / zone_gb_df["row_sum"]) 
zone_gb_df["Zone3_pct"] = (zone_gb_df['Zone3'] / zone_gb_df["row_sum"]) 
zone_gb_df["Zone4_pct"] = (zone_gb_df['Zone4'] / zone_gb_df["row_sum"])
zone_gb_df["Zone5_pct"] = (zone_gb_df['Zone5'] / zone_gb_df["row_sum"])
zone_gb_df["Zone6_pct"] = (zone_gb_df['Zone6'] / zone_gb_df["row_sum"])
zone_gb_df


plot = go.Figure()

plot.add_trace(go.Scatter(
    name = 'Zone 6 - (90-100%)',
    x = zone_gb_df["cycle_start"],
    y = zone_gb_df["Zone6_pct"],
    stackgroup='one'
    ))
plot.add_trace(go.Scatter(
    name = 'Zone 5 - (80-89%)',
    x = zone_gb_df["cycle_start"],
    y = zone_gb_df["Zone5_pct"],
    stackgroup='one'
    ))
plot.add_trace(go.Scatter(
    name = 'Zone 4 - (70-79%)',
    x = zone_gb_df["cycle_start"],
    y = zone_gb_df["Zone4_pct"],
    stackgroup='one'
    ))
plot.add_trace(go.Scatter(
    name = 'Zone 3 - (60-69%)',
    x = zone_gb_df["cycle_start"],
    y = zone_gb_df["Zone3_pct"],
    stackgroup='one'
    ))
plot.add_trace(go.Scatter(
    name = 'Zone 2 - (50-59%)',
    x = zone_gb_df["cycle_start"],
    y = zone_gb_df["Zone2_pct"],
    stackgroup='one'
    ))
plot.add_trace(go.Scatter(
    name = 'Zone 1 - (0-49%)',
    x = zone_gb_df["cycle_start"],
    y = zone_gb_df["Zone1_pct"],
    stackgroup='one'
    ))


plot.layout.yaxis.tickformat = ',.0%'
plot.update_xaxes(title='Date')
plot.update_yaxes(title='Zone Duration %')
plot.update_layout(title_text="Zone Duration % by Date", title_x=0.5)
plot.show()

  zone_gb_df = df.groupby(["cycle_id", "cycle_start"])["Zone1", "Zone2", "Zone3", "Zone4", "Zone5", "Zone6"].sum().reset_index()


In [7]:
workouts_df = pd.read_pickle("../data/clean/records/workouts/2021-12-27_2022-08-01.csv")
sports_info_df = pd.read_csv("../data/csvs/records/sports_info.csv")

df = pd.merge(workouts_df, sports_info_df[['id','name']], how='left', left_on='sport_id', right_on='id')
# Adding columns we are going to need for graphing 
df['total_workout_time_min'] = df.tot_sec_diff / 60
df["cycle_start_dayname"] = df["cycle_start"].dt.day_name()
df["cycle_start_monthname"] = df["cycle_start"].dt.month_name()


### Count of Activities based on Type
> activity_count() in VisableWhoop

In [8]:
counts = df['name'].value_counts().rename_axis('name').reset_index(name='count')

In [9]:
fig = px.bar(counts,
            x='name',
            y='count',
            title='Test',
            color='name',
            barmode='stack')

fig.update_layout(title_text='Count of Activities by Type', title_x=0.5,
                xaxis_title="Activity", yaxis_title="Count", legend_title="Activity Type")

fig.show()

### Workout Duration Hist by Type
> workout_duration() in VisableWhoop

In [10]:
fig = px.histogram(df, x="total_workout_time_min", color="name")

fig.update_traces(overwrite=True, marker={"opacity": 0.4}) 
fig.update_layout(barmode='overlay', title_text='Workout Duration (min)',
                title_x=0.5, xaxis_title="Minutes", yaxis_title="Count",
                legend_title="Activity Type")

fig.show()

### Workout Duration Box Plot by Day of Week 
> workout_duration_day() in VisableWhoop

In [11]:
fig = px.box(df, x="cycle_start_dayname", y="total_workout_time_min", color="name",
            labels={
                "total_workout_time_min": "Minutes",
                "cycle_start_dayname": ""},
            category_orders={"cycle_start_dayname": ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]})

fig.update_traces(quartilemethod="exclusive")
fig.update_layout(title_text='Daily Workout Duration by Day', title_x=0.5,
                legend_title="Workout Type")

fig.show()

### Average HR by Workout Box Plot
> workout_hr() in VisableWhoop

In [12]:
fig = px.box(df, x="name", y=["average_heart_rate"], color="name")


fig.update_traces(quartilemethod="exclusive")
fig.update_layout(title_text='Average Heart Rate per Activity', title_x=0.5,
                xaxis_title="Activity Name", yaxis_title="Average Heart Rate", legend_title="Activity Type")
fig.show()