In [1]:
from datetime import datetime

In [2]:
# import secrets and tokens from config.py
from config import client_id, client_secret, refresh_token

import requests
import urllib3

# import packages for data manipulation
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"

payload = {
    'client_id': client_id,
    'client_secret': client_secret,
    'refresh_token': refresh_token,
    'grant_type': "refresh_token",
    'f': 'json'
}

In [3]:
print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']
print("Access Token = {}\n".format(access_token))

header = {'Authorization': 'Bearer ' + access_token}
param = {'per_page': 200, 'page': 1}
my_dataset = requests.get(activites_url, headers=header, params=param).json()

Requesting Token...

Access Token = 914152ebe6f19f7fa6c2d00d1c80140eb850a199



In [4]:
activities = pd.json_normalize(my_dataset)

In [5]:
activities.columns 

Index(['resource_state', 'name', 'distance', 'moving_time', 'elapsed_time',
       'total_elevation_gain', 'type', 'sport_type', 'workout_type', 'id',
       'start_date', 'start_date_local', 'timezone', 'utc_offset',
       'location_city', 'location_state', 'location_country',
       'achievement_count', 'kudos_count', 'comment_count', 'athlete_count',
       'photo_count', 'trainer', 'commute', 'manual', 'private', 'visibility',
       'flagged', 'gear_id', 'start_latlng', 'end_latlng', 'average_speed',
       'max_speed', 'average_cadence', 'average_watts', 'max_watts',
       'weighted_average_watts', 'kilojoules', 'device_watts', 'has_heartrate',
       'average_heartrate', 'max_heartrate', 'heartrate_opt_out',
       'display_hide_heartrate_option', 'elev_high', 'elev_low', 'upload_id',
       'upload_id_str', 'external_id', 'from_accepted_tag', 'pr_count',
       'total_photo_count', 'has_kudoed', 'suffer_score', 'athlete.id',
       'athlete.resource_state', 'map.id', 'map.sum

In [6]:
#Break date into start time and date
activities['start_date_local'] = pd.to_datetime(activities['start_date_local'])
activities['start_time'] = activities['start_date_local'].dt.time
activities['start_date_local'] = activities['start_date_local'].dt.date
activities.head(5)

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,total_photo_count,has_kudoed,suffer_score,athlete.id,athlete.resource_state,map.id,map.summary_polyline,map.resource_state,average_temp,start_time
0,2,Afternoon Run,4087.6,1345,1349,3.0,Run,Run,,10489592906,...,0,False,23.0,43537797,1,a10489592906,omhjFnyvcV]Ri@b@c@b@c@r@a@fAIDE?GEiAkAYOOSyBiB...,2,,17:00:41
1,2,Evening Weight Training,0.0,3602,3602,0.0,WeightTraining,WeightTraining,,10483472946,...,0,False,11.0,43537797,1,a10483472946,,2,,19:45:11
2,2,Afternoon Run,5020.6,1819,1837,3.0,Run,Run,,10476390069,...,0,False,37.0,43537797,1,a10476390069,aimjFdtidVZDdBl@TBd@Nf@TBHELWZAPF`@HZI\DHBTCHE...,2,,14:26:41
3,2,Morning Trail Run,21602.5,11258,11264,721.0,Run,TrailRun,0.0,10470281355,...,2,False,144.0,43537797,1,a10470281355,azjlFl}|aVCW|AsArBm@d@}@hD]dBiEqA_AMgBB}@pA_FJ...,2,,08:49:13
4,2,Afternoon Run,3650.2,1383,1456,1.0,Run,Run,,10465220968,...,0,False,5.0,43537797,1,a10465220968,ymhjFryvcVk@`@e@f@i@r@]p@M`@GFIAWYe@a@GKICKI_A...,2,,16:45:22


In [14]:
# Make a copy of activities DataFrame for testing.
activities_copy = activities.copy()

In [8]:
activities_copy.drop(
    [
        'map.summary_polyline', 
        'resource_state',
        'external_id', 
        'upload_id', 
        'location_city', 
        'location_state', 
        'has_kudoed', 
        'start_date', 
        'athlete.resource_state', 
        'utc_offset', 
        'map.resource_state', 
        'athlete.id', 
        'visibility', 
        'heartrate_opt_out', 
        'upload_id_str', 
        'from_accepted_tag', 
        'map.id', 
        'manual', 
        'private', 
        'flagged', 
    ], 
    axis=1, 
    inplace=True
)

In [16]:
# filter down to Ride, Run, and Swim activities
activities_copy = activities_copy.query("start_date >= '2023-08-01T00:00:00Z' & start_date <= '2023-12-03T00:00:00Z' & (type == 'Run' | type == 'TrailRun')")
print(len(activities_copy))

54


In [17]:
# convert data types

# what does this do?
# activities_copy.loc[:, 'start_date'] = pd.to_datetime(activities_copy['start_date']).dt.tz_localize(None)
# what does this do?
activities_copy.loc[:, 'start_date_local'] = pd.to_datetime(activities_copy['start_date_local']).dt.tz_localize(None)
# convert values
activities_copy.loc[:, 'distance'] /= 1609.344 # convert from meters to miles
activities_copy.loc[:, 'average_speed'] *= 2.23693629 # convert from meters/second to miles/hour
activities_copy.loc[:, 'max_speed'] *= 2.23693629 # convert from meters/second to miles/hour
# set index
#activities_copy.set_index('start_date_local', inplace=True)

  activities_copy.loc[:, 'start_date_local'] = pd.to_datetime(activities_copy['start_date_local']).dt.tz_localize(None)


In [18]:
activities_copy.head()

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,total_photo_count,has_kudoed,suffer_score,athlete.id,athlete.resource_state,map.id,map.summary_polyline,map.resource_state,average_temp,start_time
18,2,Lunch Run,2.021569,1062,1065,4.0,Run,Run,,10318307329,...,0,False,20.0,43537797,1,a10318307329,qhhjFtpvcVOIi@_@u@{@aAyAi@iAGEEACBkBtBURg@RYNI...,2,,12:10:17
19,2,Afternoon Run,1.006062,521,521,1.0,Run,Run,,10313452620,...,0,False,8.0,43537797,1,a10313452620,_nhjFryvcVoA`ASTa@r@c@bAEBI?w@u@cA{@oAmA{DeDEM...,2,,15:31:32
20,2,Evening Run,4.27957,2308,2500,7.0,Run,Run,,10304280924,...,0,False,69.0,43537797,1,a10304280924,{ggjFtjrdVe@`DIfAUvAETYt@Ib@OtAEh@YfBMlAIVKHG?...,2,,18:14:53
21,2,Afternoon Run,3.131773,1621,1642,2.0,Run,Run,,10292678447,...,0,False,52.0,43537797,1,a10292678447,ktmjF~vidVfAmGJcALq@T{@@W\mBDMPYHEJ@f@Xt@TBBBL...,2,,15:36:20
22,2,Morning Run,9.332188,5056,5353,34.0,Run,Run,,10286282969,...,0,False,82.0,43537797,1,a10286282969,umjjFbl_dVEu@]gBKoB]aAi@eCWcCM]o@aAUm@UaAOsAe@...,2,,07:19:50


In [19]:
# https://stackoverflow.com/questions/25146121/extracting-just-month-and-year-separately-from-pandas-datetime-column
# create a column that extracts month and year from the activity
# df['yyyy-mm'] = pd.to_datetime(df['ArrivalDate']).dt.strftime('%Y-%m')
activities_copy['Month_Year'] = pd.to_datetime(activities_copy['start_date_local']).dt.strftime('%Y-%m')

In [20]:
# https://stackoverflow.com/questions/2600775/how-to-get-week-number-in-python
# make a Week_Of_Year column
activities_copy['Week_Of_Year'] = pd.to_datetime(activities_copy['start_date_local']).dt.strftime('%U')

In [14]:
# sort by start_date_local asc

In [21]:
# get weekly mileage and total weekly moving time
df_miles_per_week = pd.DataFrame(activities_copy.groupby(['Week_Of_Year'])['distance', 'moving_time'].sum().reset_index())

  df_miles_per_week = pd.DataFrame(activities_copy.groupby(['Week_Of_Year'])['distance', 'moving_time'].sum().reset_index())


In [22]:
# make a average mph column
df_miles_per_week['Average Moving Speed (mph)'] = df_miles_per_week['distance']/(df_miles_per_week['moving_time']/(60*60))

In [25]:
df_miles_per_week.rename(columns={"distance": "Weekly Mileage", "moving_time": "Total Moving Time (seconds)"})

Unnamed: 0,Week_Of_Year,Weekly Mileage,Total Moving Time (seconds),Average Moving Speed (mph)
0,31,15.476741,8104,6.875156
1,32,14.770677,8366,6.356017
2,33,19.479738,10692,6.558834
3,34,14.133461,7854,6.478286
4,35,15.211975,8236,6.649236
5,36,24.359242,13719,6.392104
6,37,5.669018,3086,6.613242
7,38,21.595569,11881,6.543561
8,39,4.591933,2441,6.772208
9,40,20.116768,10984,6.59326


In [29]:
# Make function that calculates weekly mileage targets
def weekly_mileage_target(easy_runs_minutes, long_run_mileage):

    easy_miles = easy_runs_minutes/10
    total_weekly_mileage = easy_miles + long_run_mileage
    return total_weekly_mileage

In [30]:
# convert the Week_Of_Year column to numeric type so we can filter on it
df_miles_per_week["Week_Of_Year"] = pd.to_numeric(df_miles_per_week["Week_Of_Year"])


In [41]:
# Instead, read in the weekly mileage goals from a .csv file
goal_df = pd.read_csv('..\data\CIM Training Plans 2023 - Weekly Mileage Goals.csv')

In [43]:
goal_df.head()

Unnamed: 0,Week,Long Run,Total Easy Minutes,Total Mileage
0,31,8,190.0,27.0
1,32,10,190.0,29.0
2,33,10,210.0,31.0
3,34,12,212.0,33.2
4,35,14,226.5,36.65


In [31]:
# sort down to marathon training start
df_marathon = df_miles_per_week.query("Week_Of_Year >= 31")
df_marathon.head()
# calculate difference to targets

Unnamed: 0,Week_Of_Year,distance,moving_time,Average Moving Speed (mph)
0,31,15.476741,8104,6.875156
1,32,14.770677,8366,6.356017
2,33,19.479738,10692,6.558834
3,34,14.133461,7854,6.478286
4,35,15.211975,8236,6.649236


In [44]:
# merge Weekly_Mileage_Goal onto the df_marathon
df_combined = df_marathon.merge(goal_df, how = 'left', left_on='Week_Of_Year', right_on='Week')

In [45]:
df_combined.head()

Unnamed: 0,Week_Of_Year,distance,moving_time,Average Moving Speed (mph),Week,Long Run,Total Easy Minutes,Total Mileage
0,31,15.476741,8104,6.875156,31.0,8.0,190.0,27.0
1,32,14.770677,8366,6.356017,32.0,10.0,190.0,29.0
2,33,19.479738,10692,6.558834,33.0,10.0,210.0,31.0
3,34,14.133461,7854,6.478286,34.0,12.0,212.0,33.2
4,35,15.211975,8236,6.649236,35.0,14.0,226.5,36.65


In [46]:
df_combined['Training Week'] = df_combined['Week_Of_Year'] - 30

In [55]:
df_combined['distance']= round(df_combined['distance'],1)

In [56]:
df_combined['Mileage Difference'] = round(df_combined['distance'] - df_combined['Total Mileage'], 1)

In [57]:
df_combined.head()

Unnamed: 0,Week_Of_Year,distance,moving_time,Average Moving Speed (mph),Week,Long Run,Total Easy Minutes,Total Mileage,Training Week,Mileage Difference
0,31,15.5,8104,6.875156,31.0,8.0,190.0,27.0,1,-11.5
1,32,14.8,8366,6.356017,32.0,10.0,190.0,29.0,2,-14.2
2,33,19.5,10692,6.558834,33.0,10.0,210.0,31.0,3,-11.5
3,34,14.1,7854,6.478286,34.0,12.0,212.0,33.2,4,-19.1
4,35,15.2,8236,6.649236,35.0,14.0,226.5,36.65,5,-21.4


In [58]:
# plot weekly mileage
import plotly.express as px

fig0 = px.bar(
    df_combined, x = "Training Week", y = "distance",
    labels = dict(Week_Of_Year ="Training Week", distance ="Distance (miles) "),
    #hover_data=["start_date_local"],
    title = "CIM Weekly Mileage Log - Can you see which weeks I was injured?",
    width = 1000
)
# Hover over should be the day, not the first of the month
# fig0.add_hline(y = 20*8)
fig0.update_traces(marker_line_width = 2.5)
fig0.update_yaxes(range = [0, 50])
#fig0.update_layout(bargap = 0.8)
fig0.show() 

In [30]:
def combine_words(word_one, word_two):
    combined_word = f"{word_one} {word_two}"
    return combined_word

In [31]:
dog_description = combine_words("purple","poodle")
print(dog_description)

purple poodle


In [32]:
new_dog_description = combine_words(word_one = "blue", word_two = "heeler")
print(new_dog_description)

blue heeler


In [33]:
import random

random.seed(10)
print(random.random())

random.seed(10)
print(random.random())

random.seed(5)
print(random.random())

0.5714025946899135
0.5714025946899135
0.6229016948897019


In [34]:
def random_rgb():
    """Returns a list of random R, G, B color values.
    
    >>> random.seed(1)
    >>> random_rgb()
    [106, 184, 0]
    """
    r = random.randint(0, 255)
    g = random.randint(0, 255)
    b = random.randint(0, 255)
    return [r, g, b]

In [35]:
random_rgb()

[183, 14, 238]

In [36]:
random.seed(1)
random_rgb()

[68, 32, 130]

In [37]:
random_rgb()

[60, 253, 230]

In [38]:
random.seed(1)
random_rgb()

[68, 32, 130]