In [2]:
# import secrets and tokens from config.py
from config import client_id, client_secret, refresh_token

import requests
import urllib3

# import packages for data manipulation
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"

payload = {
    'client_id': client_id,
    'client_secret': client_secret,
    'refresh_token': refresh_token,
    'grant_type': "refresh_token",
    'f': 'json'
}

In [3]:
print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']
print("Access Token = {}\n".format(access_token))

header = {'Authorization': 'Bearer ' + access_token}
param = {'per_page': 200, 'page': 1}
my_dataset = requests.get(activites_url, headers=header, params=param).json()

Requesting Token...

Access Token = f02a6cb6a0500fd3802a2b8517ed3ff620870452



In [4]:
activities = pd.json_normalize(my_dataset)

In [5]:
activities.columns 

Index(['resource_state', 'name', 'distance', 'moving_time', 'elapsed_time',
       'total_elevation_gain', 'type', 'sport_type', 'workout_type', 'id',
       'start_date', 'start_date_local', 'timezone', 'utc_offset',
       'location_city', 'location_state', 'location_country',
       'achievement_count', 'kudos_count', 'comment_count', 'athlete_count',
       'photo_count', 'trainer', 'commute', 'manual', 'private', 'visibility',
       'flagged', 'gear_id', 'start_latlng', 'end_latlng', 'average_speed',
       'max_speed', 'average_cadence', 'average_watts', 'max_watts',
       'weighted_average_watts', 'kilojoules', 'device_watts', 'has_heartrate',
       'average_heartrate', 'max_heartrate', 'heartrate_opt_out',
       'display_hide_heartrate_option', 'elev_high', 'elev_low', 'upload_id',
       'upload_id_str', 'external_id', 'from_accepted_tag', 'pr_count',
       'total_photo_count', 'has_kudoed', 'suffer_score', 'athlete.id',
       'athlete.resource_state', 'map.id', 'map.sum

In [6]:
#Break date into start time and date
activities['start_date_local'] = pd.to_datetime(activities['start_date_local'])
activities['start_time'] = activities['start_date_local'].dt.time
activities['start_date_local'] = activities['start_date_local'].dt.date
activities.head(5)

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,total_photo_count,has_kudoed,suffer_score,athlete.id,athlete.resource_state,map.id,map.summary_polyline,map.resource_state,average_temp,start_time
0,2,Afternoon Run,5649.8,1872,1894,8.0,Run,Run,,9011734617,...,0,False,37.0,43537797,1,a9011734617,wuijFfwrdVTFTL|An@F?DELsAJa@BQAeA^sBf@}EFEB?hA...,2,,13:20:52
1,2,Morning Weight Training,0.0,2521,2521,0.0,WeightTraining,WeightTraining,,9009318868,...,0,False,9.0,43537797,1,a9009318868,,2,,06:34:46
2,2,Afternoon Run,1981.7,620,626,5.0,Run,Run,,9006270699,...,0,False,11.0,43537797,1,a9006270699,_pijFbzrdVh@TXHj@BPFf@@x@JbCGV?NA\?JCBOCeB?u@H...,2,,17:05:28
3,2,Evening Run,6481.3,2172,2186,9.0,Run,Run,,9000198422,...,0,False,46.0,43537797,1,a9000198422,epijF|erdV`Ab@XP\N\RVH\TV]^kCFe@@U@EH?jB~@ZJrB...,2,,18:40:02
4,2,Morning Weight Training,0.0,2892,2892,0.0,WeightTraining,WeightTraining,,8996767720,...,0,False,7.0,43537797,1,a8996767720,,2,,07:01:36


In [7]:
# import modules
import os
import time
import matplotlib.pyplot as plt
import folium
import polyline
import base64
from tqdm import tqdm

In [12]:
activities['map.summary_polyline'].head(3)

0                                                     
1    ovijFl`rdVL[LgATwA?EKK@_@TiANiA`@cCFKBAJ@TL^`@...
2                                                     
Name: map.summary_polyline, dtype: object

In [8]:
activities['map.polyline'] = activities['map.summary_polyline'].apply(polyline.decode)


In [14]:
activities['map.polyline'].head(3)


0                                                   []
1    [(38.55736, -121.50295), (38.55729, -121.50281...
2                                                   []
Name: map.polyline, dtype: object

In [24]:
# add decoded summary polylines
# activities['map.summary_polyline'] contains an encoded polyline
# .apply(polyline.decode) decodes that polyline into latitude and longitude
activities['map.polyline'] = activities['map.summary_polyline'].apply(polyline.decode)
# print("turkey")
#####################
# define function to get elevation data using the open-elevation API
def get_elevation(latitude, longitude):
    base_url = 'https://api.open-elevation.com/api/v1/lookup'
    payload = {'locations': f'{latitude},{longitude}'}
    r = requests.get(base_url, params=payload).json()['results'][0]
    return r['elevation']
# get elevation data
elevation_data = list()
for idx in tqdm(activities.index):
    activity = activities.loc[idx, :]
    elevation = [get_elevation(coord[0], coord[1]) for coord in activity['map.polyline']]
    elevation_data.append(elevation)

turkey


  1%|          | 1/94 [04:02<6:16:04, 242.63s/it]


KeyboardInterrupt: 

In [None]:
# add elevation data to dataframe
activities['map.elevation'] = elevation_data

In [136]:
# Make a copy of activities DataFrame for testing.
activities_copy = activities.copy()

In [137]:
# filter down to Ride, Run, and Swim activities
activities_copy = activities_copy.query("type == 'Ride' | type == 'Run' | type == 'Swim'")
print(len(activities_copy))

149


In [138]:
# convert data types
activities_copy.loc[:, 'start_date'] = pd.to_datetime(activities_copy['start_date']).dt.tz_localize(None)
activities_copy.loc[:, 'start_date_local'] = pd.to_datetime(activities_copy['start_date_local']).dt.tz_localize(None)
# convert values
activities_copy.loc[:, 'distance'] /= 1609.344 # convert from meters to miles
activities_copy.loc[:, 'average_speed'] *= 2.23693629 # convert from meters/second to miles/hour
activities_copy.loc[:, 'max_speed'] *= 2.23693629 # convert from meters/second to miles/hour
# set index
#activities_copy.set_index('start_date_local', inplace=True)


In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`


In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)`



In [139]:
activities_copy.head()

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,total_photo_count,has_kudoed,suffer_score,athlete.id,athlete.resource_state,map.id,map.summary_polyline,map.resource_state,average_temp,start_time
0,2,Morning Run,5.023662,2664,2959,17.0,Run,Run,,8992309691,...,0,False,57.0,43537797,1,a8992309691,{uijFhwrdVXHz@b@z@Xn@^f@P`ALNDdAHHBd@Ar@B~@A\E...,2,,10:18:35
2,2,Morning Run,4.67675,2702,2707,40.0,Run,Run,,8984825996,...,0,False,44.0,43537797,1,a8984825996,wuijFjwrdVrChAp@ZH@|@X|@J~ADJEDGBWD{BH{AJ_@v@s...,2,,07:14:27
3,2,Morning Run,2.883473,1500,1520,5.0,Run,Run,,8978661487,...,0,False,41.0,43537797,1,a8978661487,mtijFjxrdV~@^r@RTNd@RvAXjBLbBGb@?d@Ed@?FI@EKoD...,2,,08:16:17
4,2,NFS Team Ride,20.051711,5542,6274,60.0,Ride,Ride,10.0,8972407676,...,2,False,26.0,43537797,1,a8972407676,kgjjF~hrdVkP{FcCs@SBGN]~BEHsBg@I@wBvLMX_@VIRq@...,2,,06:20:42
5,2,Sac State 5k,3.214664,1423,1429,10.0,Run,Run,0.0,8969699886,...,0,False,100.0,43537797,1,a8969699886,ymijFvebdV^I`AEbDLj@?PEFOAqFQgAECMCwA?eAHUFkB@...,2,,18:00:28


In [140]:
activities_copy.columns

Index(['resource_state', 'name', 'distance', 'moving_time', 'elapsed_time',
       'total_elevation_gain', 'type', 'sport_type', 'workout_type', 'id',
       'start_date', 'start_date_local', 'timezone', 'utc_offset',
       'location_city', 'location_state', 'location_country',
       'achievement_count', 'kudos_count', 'comment_count', 'athlete_count',
       'photo_count', 'trainer', 'commute', 'manual', 'private', 'visibility',
       'flagged', 'gear_id', 'start_latlng', 'end_latlng', 'average_speed',
       'max_speed', 'average_cadence', 'average_watts', 'max_watts',
       'weighted_average_watts', 'kilojoules', 'device_watts', 'has_heartrate',
       'average_heartrate', 'max_heartrate', 'heartrate_opt_out',
       'display_hide_heartrate_option', 'elev_high', 'elev_low', 'upload_id',
       'upload_id_str', 'external_id', 'from_accepted_tag', 'pr_count',
       'total_photo_count', 'has_kudoed', 'suffer_score', 'athlete.id',
       'athlete.resource_state', 'map.id', 'map.sum

In [142]:
miles_per_activity = pd.DataFrame(activities_copy.groupby(['type'])['distance'].sum().reset_index())
miles_per_activity

Unnamed: 0,type,distance
0,Ride,317.111009
1,Run,358.743625
2,Swim,1.967883


In [52]:
type(miles_per_activity)

pandas.core.frame.DataFrame

In [10]:
# df.query('column_name.str.contains("abc")', engine='python')
activities_nfs_copy = activities.copy(deep = True)
# the line below is likely causing the "A value is trying to be set on a copy of a slice from a DataFrame." warning.
activities_nfs = activities_nfs_copy.query('name.str.contains("NFS")', engine = 'python')
len(activities_nfs)

4

In [144]:
activities_nfs.head()

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,id,...,total_photo_count,has_kudoed,suffer_score,athlete.id,athlete.resource_state,map.id,map.summary_polyline,map.resource_state,average_temp,start_time
4,2,NFS Team Ride,20.051711,5542,6274,60.0,Ride,Ride,10.0,8972407676,...,2,False,26.0,43537797,1,a8972407676,kgjjF~hrdVkP{FcCs@SBGN]~BEHsBg@I@wBvLMX_@VIRq@...,2,,06:20:42
11,2,NFS Team Ride,17.588968,4883,5164,83.0,Ride,Ride,10.0,8925187613,...,0,False,21.0,43537797,1,a8925187613,unjjFnerdVsMgE_Ck@qDfSKPY@M^o@pEO`@iGz^}CdQc@|...,2,,06:28:09
25,2,NFS team ride,19.785888,5848,6666,74.0,Ride,Ride,10.0,8850777949,...,2,False,25.0,43537797,1,a8850777949,aijjFxgrdVaD_A{Q_Ge@IONkBpLkA~FcDdRwEbXgCbOS`A...,2,,15:19:54
104,2,First NFS team ride. Saw 10+ deer!,30.03174,8518,8962,84.6,Ride,Ride,10.0,8417951962,...,0,False,26.0,43537797,1,a8417951962,gtijFf{qdVCcAqNyGGk@zGmZhI}e@nA{DLmBm@kCxJ_m@K...,2,,05:49:02


In [157]:
activities_nfs.columns

Index(['resource_state', 'name', 'distance', 'moving_time', 'elapsed_time',
       'total_elevation_gain', 'type', 'sport_type', 'workout_type', 'id',
       'start_date', 'start_date_local', 'timezone', 'utc_offset',
       'location_city', 'location_state', 'location_country',
       'achievement_count', 'kudos_count', 'comment_count', 'athlete_count',
       'photo_count', 'trainer', 'commute', 'manual', 'private', 'visibility',
       'flagged', 'gear_id', 'start_latlng', 'end_latlng', 'average_speed',
       'max_speed', 'average_cadence', 'average_watts', 'max_watts',
       'weighted_average_watts', 'kilojoules', 'device_watts', 'has_heartrate',
       'average_heartrate', 'max_heartrate', 'heartrate_opt_out',
       'display_hide_heartrate_option', 'elev_high', 'elev_low', 'upload_id',
       'upload_id_str', 'external_id', 'from_accepted_tag', 'pr_count',
       'total_photo_count', 'has_kudoed', 'suffer_score', 'athlete.id',
       'athlete.resource_state', 'map.id', 'map.sum

In [158]:
activities_nfs.loc[:, ('Month_Year')] = pd.to_datetime(activities.loc[:, ('start_date')]).dt.strftime('%Y-%m')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [149]:
# https://stackoverflow.com/questions/25146121/extracting-just-month-and-year-separately-from-pandas-datetime-column
# create a column that extracts month and year from the activity
# df['yyyy-mm'] = pd.to_datetime(df['ArrivalDate']).dt.strftime('%Y-%m')
activities_nfs['Month_Year'] = pd.to_datetime(activities_nfs['start_date']).dt.strftime('%Y-%m')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [159]:
activities_nfs.Month_Year

4      2023-04
11     2023-04
25     2023-04
104    2023-01
Name: Month_Year, dtype: object

In [151]:
activities_nfs.start_date_local

4     2023-04-28
11    2023-04-20
25    2023-04-07
104   2023-01-19
Name: start_date_local, dtype: datetime64[ns]

In [78]:
import plotly.express as px

In [161]:
fig0 = px.bar(
    activities_nfs, x = "Month_Year", y = "distance",
    labels = dict(Month_Year ="Month and Year ", distance ="Distance (miles) "),
    hover_data=["start_date_local"],
    title = "NFS Team Rides",
    width = 1000
)
# Hover over should be the day, not the first of the month
fig0.add_hline(y = 20*8)
fig0.update_traces(marker_line_width = 2.5)
fig0.update_yaxes(range = [0, 300])
fig0.update_layout(bargap = 0.8)
fig0.show() 

In [11]:
# df.query('column_name.str.contains("abc")', engine='python')
activities_nfs_copy = activities.copy(deep = True)
# the line below is likely causing the "A value is trying to be set on a copy of a slice from a DataFrame." warning.
activities_nfs = activities_nfs_copy.query('name.str.contains("NFS")', engine = 'python')
len(activities_nfs)

4

In [30]:
# add decoded summary polylines
# activities['map.summary_polyline'] contains an encoded polyline
# .apply(polyline.decode) decodes that polyline into latitude and longitude
activities_nfs['map.polyline'] = activities_nfs['map.summary_polyline'].apply(polyline.decode)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  activities_nfs['map.polyline'] = activities_nfs['map.summary_polyline'].apply(polyline.decode)


In [31]:
activities_nfs['map.polyline']

9      [(38.56006, -121.50432), (38.56284, -121.50306...
16     [(38.56123, -121.50376), (38.56357, -121.50276...
30     [(38.56033, -121.50413), (38.56114, -121.50381...
109    [(38.557, -121.50212), (38.55702, -121.50178),...
Name: map.polyline, dtype: object

In [38]:
# select one activity
activities_nfs_map = activities_nfs.iloc[0, :] # first activity (most recent)
# plot ride on map
# centroid = [
#     np.mean([coord[0] for coord in my_ride['map.polyline'][0]]), 
#     np.mean([coord[1] for coord in my_ride['map.polyline'][0]])
# ]
m = folium.Map(location=activities_nfs['map.polyline'][9][0], zoom_start=14)
folium.PolyLine(activities_nfs['map.polyline'], color='red').add_to(m)

activities_nfs_map = activities_nfs.iloc[1, :] # first activity (most recent)
m = folium.Map(location=activities_nfs['map.polyline'][16][0], zoom_start=14)
folium.PolyLine(activities_nfs['map.polyline'], color='blue').add_to(m)
m = folium.Map(location=activities_nfs['map.polyline'][30][0], zoom_start=14)
m = folium.Map(location=activities_nfs['map.polyline'][109][0], zoom_start=14)
folium.PolyLine(activities_nfs['map.polyline'], color='red').add_to(m)


display(m)

In [11]:
# Set monthly goals in miles per activity
practice_sessions = 8

# 20 mile ride per session
ride_goal = 20 * practice_sessions

# 4 mile run per session
run_goal = 4 * practice_sessions

# half-mile swim per session
swim_goal = 0.5 * practice_sessions
print(ride_goal, run_goal, swim_goal)

160 32 4.0


In [12]:
dif_ride_miles = ride_goal - miles_per_activity['distance'][0]
dif_ride_miles

-13.94907490256901

In [13]:
print("You need to ride " + str(dif_ride_miles) + " to meet your goal this month!")

You need to ride -13.94907490256901 to meet your goal this month!


In [None]:
# filter activities based on start time per month???

In [None]:
# next step: Refer to Pamela Fox's best practices video.
# not for linting, but for managing git

In [15]:
import plotly.express as px

In [9]:
activities_copy.average_heartrate.head(5)

1    140.0
4    153.2
5    143.3
6    160.8
8    159.3
Name: average_heartrate, dtype: float64

In [16]:
fig1 = px.box(
    activities_copy, x = "distance", 
    title="1. Boxplot: Distance by Activity Type", 
    color="sport_type", 
    points="all"
)
fig1.show() 