In [2]:
from datetime import datetime

In [3]:
# import secrets and tokens from config.py
from config import client_id, client_secret, refresh_token

import requests
import urllib3

# import packages for data manipulation
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"

payload = {
    'client_id': client_id,
    'client_secret': client_secret,
    'refresh_token': refresh_token,
    'grant_type': "refresh_token",
    'f': 'json'
}

In [4]:
print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']
print("Access Token = {}\n".format(access_token))

header = {'Authorization': 'Bearer ' + access_token}
param = {'per_page': 200, 'page': 1}
my_dataset = requests.get(activites_url, headers=header, params=param).json()

Requesting Token...

Access Token = 49782fc900570fd06613239178fdc4f62643bbc8



In [5]:
activities = pd.json_normalize(my_dataset)

In [6]:
activities.columns 

Index(['resource_state', 'name', 'distance', 'moving_time', 'elapsed_time',
       'total_elevation_gain', 'type', 'sport_type', 'workout_type', 'id',
       'start_date', 'start_date_local', 'timezone', 'utc_offset',
       'location_city', 'location_state', 'location_country',
       'achievement_count', 'kudos_count', 'comment_count', 'athlete_count',
       'photo_count', 'trainer', 'commute', 'manual', 'private', 'visibility',
       'flagged', 'gear_id', 'start_latlng', 'end_latlng', 'average_speed',
       'max_speed', 'average_cadence', 'average_watts', 'max_watts',
       'weighted_average_watts', 'kilojoules', 'device_watts', 'has_heartrate',
       'average_heartrate', 'max_heartrate', 'heartrate_opt_out',
       'display_hide_heartrate_option', 'elev_high', 'elev_low', 'upload_id',
       'upload_id_str', 'external_id', 'from_accepted_tag', 'pr_count',
       'total_photo_count', 'has_kudoed', 'suffer_score', 'athlete.id',
       'athlete.resource_state', 'map.id', 'map.sum

In [7]:
#Break date into start time and date
activities['start_date_local'] = pd.to_datetime(activities['start_date_local'])
activities['start_time'] = activities['start_date_local'].dt.time
activities['start_date_local'] = activities['start_date_local'].dt.date
activities.head(5)

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,total_photo_count,has_kudoed,suffer_score,athlete.id,athlete.resource_state,map.id,map.summary_polyline,map.resource_state,average_temp,start_time
0,2,Morning Run,6456.5,2354,2371,6.0,Run,Run,,9620123035,...,0,False,16.0,43537797,1,a9620123035,wtmjFpxidVBWRw@RaAH{@Nm@^kCXsAb@iCHUHONKL?f@Tn...,2,,06:32:09
1,2,Evening Run,6625.3,1994,2029,7.0,Run,Run,,9617068068,...,0,False,84.0,43537797,1,a9617068068,yggjF`irdVM~@c@`EUdBk@tBy@|HIVIBOBq@]_B_A{BgAG...,2,,18:12:25
2,2,Lunch Swim,1267.6,1444,1891,0.0,Swim,Swim,,9615465097,...,0,False,30.0,43537797,1,a9615465097,,2,,11:47:16
3,2,Afternoon Swim,1629.7,1765,2455,0.0,Swim,Swim,,9610042593,...,0,False,62.0,43537797,1,a9610042593,,2,,16:45:01
4,2,Evening HIIT,0.0,3584,3584,0.0,Workout,HighIntensityIntervalTraining,,9603649228,...,0,False,22.0,43537797,1,a9603649228,,2,,19:02:21


In [8]:
# Make a copy of activities DataFrame for testing.
activities_copy = activities.copy()

In [9]:
activities_copy.drop(
    [
        'map.summary_polyline', 
        'resource_state',
        'external_id', 
        'upload_id', 
        'location_city', 
        'location_state', 
        'has_kudoed', 
        'start_date', 
        'athlete.resource_state', 
        'utc_offset', 
        'map.resource_state', 
        'athlete.id', 
        'visibility', 
        'heartrate_opt_out', 
        'upload_id_str', 
        'from_accepted_tag', 
        'map.id', 
        'manual', 
        'private', 
        'flagged', 
    ], 
    axis=1, 
    inplace=True
)

In [10]:
# filter down to Ride, Run, and Swim activities
activities_copy = activities_copy.query("type == 'Run'")
print(len(activities_copy))

112


In [11]:
# convert data types

# what does this do?
# activities_copy.loc[:, 'start_date'] = pd.to_datetime(activities_copy['start_date']).dt.tz_localize(None)
# what does this do?
activities_copy.loc[:, 'start_date_local'] = pd.to_datetime(activities_copy['start_date_local']).dt.tz_localize(None)
# convert values
activities_copy.loc[:, 'distance'] /= 1609.344 # convert from meters to miles
activities_copy.loc[:, 'average_speed'] *= 2.23693629 # convert from meters/second to miles/hour
activities_copy.loc[:, 'max_speed'] *= 2.23693629 # convert from meters/second to miles/hour
# set index
#activities_copy.set_index('start_date_local', inplace=True)

  activities_copy.loc[:, 'start_date_local'] = pd.to_datetime(activities_copy['start_date_local']).dt.tz_localize(None)


In [12]:
activities_copy.head()

Unnamed: 0,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,start_date_local,...,average_heartrate,max_heartrate,display_hide_heartrate_option,elev_high,elev_low,pr_count,total_photo_count,suffer_score,average_temp,start_time
0,Morning Run,4.011883,2354,2371,6.0,Run,Run,,9620123035,2023-08-10,...,135.6,145.0,True,11.6,5.8,4,0,16.0,,06:32:09
1,Evening Run,4.116771,1994,2029,7.0,Run,Run,,9617068068,2023-08-09,...,165.4,184.0,True,13.6,5.8,2,0,84.0,,18:12:25
5,Morning Run,3.509504,1878,1902,4.0,Run,Run,,9601083751,2023-08-07,...,159.5,176.0,True,7.8,3.0,0,0,62.0,,09:39:54
6,NFS McKinley Magic Miles,3.132519,2140,2148,7.0,Run,Run,0.0,9594866140,2023-08-06,...,126.2,140.0,True,12.8,7.4,0,0,8.0,,08:04:57
7,Morning Run,8.22043,4146,4215,18.0,Run,Run,0.0,9588446455,2023-08-05,...,168.3,181.0,True,14.0,7.8,12,0,194.0,,07:20:36


In [13]:
# https://stackoverflow.com/questions/25146121/extracting-just-month-and-year-separately-from-pandas-datetime-column
# create a column that extracts month and year from the activity
# df['yyyy-mm'] = pd.to_datetime(df['ArrivalDate']).dt.strftime('%Y-%m')
activities_copy['Month_Year'] = pd.to_datetime(activities_copy['start_date_local']).dt.strftime('%Y-%m')

In [14]:
# https://stackoverflow.com/questions/2600775/how-to-get-Week-number-in-python
# make a Week_of_Year column
activities_copy['Week_of_Year'] = pd.to_datetime(activities_copy['start_date_local']).dt.strftime('%U')

In [18]:
# get Weekly mileage and total Weekly moving time
df_miles_per_week = pd.DataFrame(activities_copy.groupby(['Week_of_Year'])['distance', 'moving_time'].sum().reset_index())

  df_miles_per_week = pd.DataFrame(activities_copy.groupby(['Week_of_Year'])['distance', 'moving_time'].sum().reset_index())


In [19]:
# make a average mph column
df_miles_per_week['Average Moving Speed (mph)'] = df_miles_per_week['distance']/(df_miles_per_week['moving_time']/(60*60))

In [20]:
# option: make a column that gives the first day of the week
# lookup Week_of_Year???

In [21]:
# rename columns
df_miles_per_week.rename(columns={"distance": "Weekly Mileage", "moving_time": "Total Moving Time (seconds)"}).tail()

Unnamed: 0,Week_of_Year,Weekly Mileage,Total Moving Time (seconds),Average Moving Speed (mph)
26,28,5.525419,3222,6.173653
27,29,10.534914,5892,6.436811
28,30,9.913045,5226,6.828734
29,31,19.129347,10300,6.685985
30,32,14.770677,8366,6.356017


In [22]:
# Make function that calculates weekly mileage targets
def weekly_mileage_target(easy_runs_minutes, long_run_mileage):

    easy_miles = easy_runs_minutes/10
    total_weekly_mileage = easy_miles + long_run_mileage
    return total_weekly_mileage

In [23]:
# convert the Week_of_Year column to numeric type so we can filter on it
df_miles_per_week["Week_of_Year"] = pd.to_numeric(df_miles_per_week["Week_of_Year"])


In [24]:
df_miles_per_week.head()

Unnamed: 0,Week_of_Year,distance,moving_time,Average Moving Speed (mph)
0,2,4.127707,2158,6.885887
1,3,20.656802,11420,6.511776
2,4,22.829302,13070,6.288102
3,5,16.351259,8762,6.718162
4,6,8.293441,4317,6.916004


In [25]:
df_miles_per_week["distance"] = round((df_miles_per_week["distance"]),1)
df_miles_per_week["Average Moving Speed (mph)"] = round((df_miles_per_week["Average Moving Speed (mph)"]),1)

df_miles_per_week.head()


Unnamed: 0,Week_of_Year,distance,moving_time,Average Moving Speed (mph)
0,2,4.1,2158,6.9
1,3,20.7,11420,6.5
2,4,22.8,13070,6.3
3,5,16.4,8762,6.7
4,6,8.3,4317,6.9


In [26]:
# sort down to marathon training start
df_marathon = df_miles_per_week.query('Week_of_Year >= 31')
df_marathon.head()
# calculate difference to targets

Unnamed: 0,Week_of_Year,distance,moving_time,Average Moving Speed (mph)
29,31,19.1,10300,6.7
30,32,14.8,8366,6.4


In [27]:
# Make function that calculates weekly mileage targets
def weekly_mileage_target(easy_runs_minutes, long_run_mileage):

    easy_miles = easy_runs_minutes/10
    total_weekly_mileage = easy_miles + long_run_mileage
    return total_weekly_mileage

In [51]:
week_1_goal_mileage = weekly_mileage_target(190, 8)
week_2_goal_mileage = weekly_mileage_target(190, 10)
print(week_1_goal_mileage)
print(week_2_goal_mileage)

9.0
19.0


In [52]:
df_goal = pd.DataFrame([week_1_goal_mileage,week_2_goal_mileage], columns= ['Weekly Mileage Goal'])

In [53]:
df_goal['Week_of_Year']= [31,32]

In [54]:
df_goal.head()

Unnamed: 0,Weekly Mileage Goal,Week_of_Year
0,9.0,31
1,19.0,32


In [55]:
# merge Weekly Mileage Goal onto the df_marathon
df_combined = df_marathon.merge(df_goal, how = 'left', left_on='Week_of_Year', right_on='Week_of_Year')

In [56]:
# create "Miles to Reach Goal" column
df_combined["Miles to Reach Goal"] = df_combined["Weekly Mileage Goal"] - df_combined["distance"]

In [57]:
# create "Average Pace (min/mile)" column
df_combined["Average Pace (min/mile)"] = round(df_combined["moving_time"]/60/df_combined["distance"],1)

In [58]:
df_combined.head()

Unnamed: 0,Week_of_Year,distance,moving_time,Average Moving Speed (mph),Weekly Mileage Goal,Miles to Reach Goal,Average Pace (min/mile)
0,31,19.1,10300,6.7,9.0,-10.1,9.0
1,32,14.8,8366,6.4,19.0,4.2,9.4


In [63]:
# plot weekly mileage using the wide format data
import plotly.express as px

fig0 = px.bar(
    df_combined, x = "Week_of_Year", y = ["distance", "Miles to Reach Goal" ],
    labels = dict(Week_of_Year = "Training Week ", distance = "Distance (miles) "),
    #hover_data=["start_date_local"],
    title = "Weekly Mileage",
    width = 1000
)

fig0.update_traces(marker_line_width = 2.0)
fig0.update_yaxes(range = [-10, 50])
fig0.update_layout(bargap = 0.5)
fig0.show() 