In [23]:
from datetime import datetime

In [24]:
# import secrets and tokens from config.py
from config import client_id, client_secret, refresh_token

import requests
import urllib3

# import packages for data manipulation
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"

payload = {
    'client_id': client_id,
    'client_secret': client_secret,
    'refresh_token': refresh_token,
    'grant_type': "refresh_token",
    'f': 'json'
}

In [25]:
print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']
print("Access Token = {}\n".format(access_token))

header = {'Authorization': 'Bearer ' + access_token}
param = {'per_page': 200, 'page': 1}
my_dataset = requests.get(activites_url, headers=header, params=param).json()

Requesting Token...

Access Token = 9f4d924cd9a87564d20c8103a4a07b6b8bbae776



In [26]:
activities = pd.json_normalize(my_dataset)

In [27]:
activities.columns 

Index(['resource_state', 'name', 'distance', 'moving_time', 'elapsed_time',
       'total_elevation_gain', 'type', 'sport_type', 'id', 'start_date',
       'start_date_local', 'timezone', 'utc_offset', 'location_city',
       'location_state', 'location_country', 'achievement_count',
       'kudos_count', 'comment_count', 'athlete_count', 'photo_count',
       'trainer', 'commute', 'manual', 'private', 'visibility', 'flagged',
       'gear_id', 'start_latlng', 'end_latlng', 'average_speed', 'max_speed',
       'has_heartrate', 'average_heartrate', 'max_heartrate',
       'heartrate_opt_out', 'display_hide_heartrate_option', 'elev_high',
       'elev_low', 'upload_id', 'upload_id_str', 'external_id',
       'from_accepted_tag', 'pr_count', 'total_photo_count', 'has_kudoed',
       'suffer_score', 'athlete.id', 'athlete.resource_state', 'map.id',
       'map.summary_polyline', 'map.resource_state', 'workout_type',
       'average_cadence', 'average_watts', 'max_watts',
       'weighted_a

In [28]:
#Break date into start time and date
activities['start_date_local'] = pd.to_datetime(activities['start_date_local'])
activities['start_time'] = activities['start_date_local'].dt.time
activities['start_date_local'] = activities['start_date_local'].dt.date
activities.head(5)

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,id,start_date,...,map.resource_state,workout_type,average_cadence,average_watts,max_watts,weighted_average_watts,kilojoules,device_watts,average_temp,start_time
0,2,Lunch Swim,1267.6,1444,1891,0.0,Swim,Swim,9615465097,2023-08-09T18:47:16Z,...,2,,,,,,,,,11:47:16
1,2,Afternoon Swim,1629.7,1765,2455,0.0,Swim,Swim,9610042593,2023-08-08T23:45:01Z,...,2,,,,,,,,,16:45:01
2,2,Evening HIIT,0.0,3584,3584,0.0,Workout,HighIntensityIntervalTraining,9603649228,2023-08-08T02:02:21Z,...,2,,,,,,,,,19:02:21
3,2,Morning Run,5648.0,1878,1902,4.0,Run,Run,9601083751,2023-08-07T16:39:54Z,...,2,,81.7,302.3,401.0,299.0,567.8,True,,09:39:54
4,2,NFS McKinley Magic Miles,5041.3,2140,2148,7.0,Run,Run,9594866140,2023-08-06T15:04:57Z,...,2,0.0,80.1,238.6,409.0,238.0,510.7,True,,08:04:57


In [29]:
# Make a copy of activities DataFrame for testing.
activities_copy = activities.copy()

In [30]:
activities_copy.drop(
    [
        'map.summary_polyline', 
        'resource_state',
        'external_id', 
        'upload_id', 
        'location_city', 
        'location_state', 
        'has_kudoed', 
        'start_date', 
        'athlete.resource_state', 
        'utc_offset', 
        'map.resource_state', 
        'athlete.id', 
        'visibility', 
        'heartrate_opt_out', 
        'upload_id_str', 
        'from_accepted_tag', 
        'map.id', 
        'manual', 
        'private', 
        'flagged', 
    ], 
    axis=1, 
    inplace=True
)

In [31]:
# filter down to Ride, Run, and Swim activities
activities_copy = activities_copy.query("type == 'Run'")
print(len(activities_copy))

111


In [32]:
# convert data types

# what does this do?
# activities_copy.loc[:, 'start_date'] = pd.to_datetime(activities_copy['start_date']).dt.tz_localize(None)
# what does this do?
activities_copy.loc[:, 'start_date_local'] = pd.to_datetime(activities_copy['start_date_local']).dt.tz_localize(None)
# convert values
activities_copy.loc[:, 'distance'] /= 1609.344 # convert from meters to miles
activities_copy.loc[:, 'average_speed'] *= 2.23693629 # convert from meters/second to miles/hour
activities_copy.loc[:, 'max_speed'] *= 2.23693629 # convert from meters/second to miles/hour
# set index
#activities_copy.set_index('start_date_local', inplace=True)

  activities_copy.loc[:, 'start_date_local'] = pd.to_datetime(activities_copy['start_date_local']).dt.tz_localize(None)


In [33]:
activities_copy.head()

Unnamed: 0,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,id,start_date_local,timezone,...,suffer_score,workout_type,average_cadence,average_watts,max_watts,weighted_average_watts,kilojoules,device_watts,average_temp,start_time
3,Morning Run,3.509504,1878,1902,4.0,Run,Run,9601083751,2023-08-07,(GMT-08:00) America/Los_Angeles,...,62.0,,81.7,302.3,401.0,299.0,567.8,True,,09:39:54
4,NFS McKinley Magic Miles,3.132519,2140,2148,7.0,Run,Run,9594866140,2023-08-06,(GMT-08:00) America/Los_Angeles,...,8.0,0.0,80.1,238.6,409.0,238.0,510.7,True,,08:04:57
5,Morning Run,8.22043,4146,4215,18.0,Run,Run,9588446455,2023-08-05,(GMT-08:00) America/Los_Angeles,...,194.0,0.0,83.5,318.5,520.0,319.0,1320.4,True,,07:20:36
6,Lunch Run,3.344593,1801,1876,7.0,Run,Run,9583192626,2023-08-04,(GMT-08:00) America/Los_Angeles,...,57.0,,80.0,302.1,376.0,296.0,544.1,True,,11:16:50
7,Evening Run,3.911718,2157,2193,6.0,Run,Run,9572580248,2023-08-02,(GMT-08:00) America/Los_Angeles,...,86.0,0.0,81.7,299.5,609.0,304.0,646.1,True,,18:16:11


In [34]:
# https://stackoverflow.com/questions/25146121/extracting-just-month-and-year-separately-from-pandas-datetime-column
# create a column that extracts month and year from the activity
# df['yyyy-mm'] = pd.to_datetime(df['ArrivalDate']).dt.strftime('%Y-%m')
activities_copy['Month_Year'] = pd.to_datetime(activities_copy['start_date_local']).dt.strftime('%Y-%m')

In [35]:
# https://stackoverflow.com/questions/2600775/how-to-get-week-number-in-python
# make a Week_Of_Year column
activities_copy['Week_Of_Year'] = pd.to_datetime(activities_copy['start_date_local']).dt.strftime('%U')

In [36]:
# sort by start_date_local asc

In [37]:
# get weekly mileage and total weekly moving time
df_miles_per_week = pd.DataFrame(activities_copy.groupby(['Week_Of_Year'])['distance', 'moving_time'].sum().reset_index())

  df_miles_per_week = pd.DataFrame(activities_copy.groupby(['Week_Of_Year'])['distance', 'moving_time'].sum().reset_index())


In [38]:
# make a average mph column
df_miles_per_week['Average Moving Speed (mph)'] = df_miles_per_week['distance']/(df_miles_per_week['moving_time']/(60*60))

In [39]:
# option: make a column that gives the first day of the week
# lookup Week_Of_Year???

In [44]:
# rename columns
df_miles_per_week.rename(columns={"distance": "Weekly Mileage", "moving_time": "Total Moving Time (seconds)"}).tail()

Unnamed: 0,Week_Of_Year,Weekly Mileage,Total Moving Time (seconds),Average Moving Speed (mph)
26,28,5.525419,3222,6.173653
27,29,10.534914,5892,6.436811
28,30,9.913045,5226,6.828734
29,31,19.129347,10300,6.685985
30,32,6.642023,4018,5.951041


In [41]:
# Make function that calculates weekly mileage targets
def weekly_mileage_target(easy_runs_minutes, long_run_mileage):

    easy_miles = easy_runs_minutes/10
    total_weekly_mileage = easy_miles + long_run_mileage
    return total_weekly_mileage

In [49]:
# convert the Week_Of_Year column to numeric type so we can filter on it
df_miles_per_week["Week_Of_Year"] = pd.to_numeric(df_miles_per_week["Week_Of_Year"])


In [50]:
# sort down to marathon training start
df_marathon = df_miles_per_week.query("Week_Of_Year >= 31")
df_marathon.head()
# calculate difference to targets

Unnamed: 0,Week_Of_Year,distance,moving_time,Average Moving Speed (mph)
29,31,19.129347,10300,6.685985
30,32,6.642023,4018,5.951041


In [None]:
# Make function that calculates weekly mileage targets
def weekly_mileage_target(easy_runs_minutes, long_run_mileage):

    easy_miles = easy_runs_minutes/10
    total_weekly_mileage = easy_miles + long_run_mileage
    return total_weekly_mileage

In [53]:
week_1_goal_mileage = weekly_mileage_target(190, 8)
week_2_goal_mileage = weekly_mileage_target(190, 10)
print(week_1_goal_mileage)

27.0


In [64]:
df_goal = pd.DataFrame([week_1_goal_mileage,week_2_goal_mileage], columns= ['Weekly_Mileage_Goal'])

In [65]:
df_goal['Week_Of_Year']= [31,32]

In [66]:
df_goal.head()

Unnamed: 0,Weekly_Mileage_Goal,Week_Of_Year
0,27.0,31
1,29.0,32


In [72]:
# merge Weekly_Mileage_Goal onto the df_marathon
df_combined = df_marathon.merge(df_goal, how = 'left', left_on='Week_Of_Year', right_on='Week_Of_Year')

In [73]:
df_combined.head()

Unnamed: 0,Week_Of_Year,distance,moving_time,Average Moving Speed (mph),Weekly_Mileage_Goal
0,31,19.129347,10300,6.685985,27.0
1,32,6.642023,4018,5.951041,29.0


In [71]:
# plot weekly mileage
import plotly.express as px

fig0 = px.bar(
    df_marathon, x = "Week_Of_Year", y = "distance",
    labels = dict(Week_Of_Year ="Training Week", distance ="Distance (miles) "),
    #hover_data=["start_date_local"],
    title = "Weekly Mileage",
    width = 1000
)
# Hover over should be the day, not the first of the month
# fig0.add_hline(y = 20*8)
fig0.update_traces(marker_line_width = 2.5)
fig0.update_yaxes(range = [0, 50])
#fig0.update_layout(bargap = 0.8)
fig0.show() 

ValueError: 
    Invalid value of type 'builtins.str' received for the 'x' property of bar
        Received value: 'Week_Of_Year'

    The 'x' property is an array that may be specified as a tuple,
    list, numpy array, or pandas Series