In [1]:
pwd

'/Users/edwardterrell/Desktop/Training/kindle/strava'

In [9]:
import requests
import json
import time
import os
from pprint import pprint
import pandas as pd
pd.set_option('display.precision', 1)
from clean_convert import cleanup, convert_speed
from clean_convert import order_columns

from IPython.core.display import HTML
with open("style-table.css") as f1, open("style-notebook.css") as f2:
    css = f1.read() + f2.read()
HTML(f"<style>{css}</style>")

import sys
sys.executable

'/Users/edwardterrell/opt/anaconda3/envs/mlml/bin/python'

In [3]:
from token_manager import get_valid_access_token

access_token = get_valid_access_token()
print("Using access token:", access_token)

# Make an authenticated API call
response = requests.get(
    'https://www.strava.com/api/v3/athlete',
    headers={'Authorization': f'Bearer {access_token}'}
)
print(response.json())

Using access token: 8e5e6a3dd85ce84a4003a72c0a7b1b163bb2d747
{'id': 723727, 'username': 'eterrell', 'resource_state': 2, 'firstname': 'Ed', 'lastname': 'Terrell', 'bio': '', 'city': 'Boulder', 'state': 'CO', 'country': 'United States', 'sex': 'M', 'premium': True, 'summit': True, 'created_at': '2012-07-05T00:04:09Z', 'updated_at': '2025-07-09T12:38:59Z', 'badge_type_id': 1, 'weight': 61.235, 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/723727/20826329/1/medium.jpg', 'profile': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/723727/20826329/1/large.jpg', 'friend': None, 'follower': None}


In [4]:
# Get recent activities
response = requests.get(
    'https://www.strava.com/api/v3/athlete/activities',
    headers={'Authorization': f'Bearer {access_token}'}
)
activities = response.json()
#response.status_code

# Show the first 3 activities with ID and name
for activity in activities[:3]:
    print(f"ID: {activity['id']}, Name: {activity['name']}, Date: {activity['start_date']}")

ID: 15061307314, Name: Nederland exploration and exploitation, Date: 2025-07-09T14:17:28Z
ID: 15048399185, Name: Jamestown and eats at Lucky's Bakery, Date: 2025-07-08T14:44:02Z
ID: 15037283785, Name: Old Stage, Date: 2025-07-07T12:38:38Z


### Build strava_df from activities

In [19]:
# Build out strava_df with only the necessary columns
strava_df = pd.DataFrame(activities)
cols_needed = ['id','name', 'distance', 'moving_time','total_elevation_gain', 'sport_type',
          'average_speed', 'average_heartrate','max_heartrate', 'suffer_score','start_date']
strava_df = strava_df.loc[:,cols_needed]
strava_df.head()

Unnamed: 0,id,name,distance,moving_time,total_elevation_gain,sport_type,average_speed,average_heartrate,max_heartrate,suffer_score,start_date
0,15061307314,Nederland exploration and exploitation,54140.4,11638,993.7,Ride,4.7,109.1,131.0,106.0,2025-07-09T14:17:28Z
1,15048399185,Jamestown and eats at Lucky's Bakery,60380.4,9467,757.0,Ride,6.4,110.9,140.0,101.0,2025-07-08T14:44:02Z
2,15037283785,Old Stage,44640.8,7128,430.0,Ride,6.3,109.6,134.0,54.0,2025-07-07T12:38:38Z
3,15027982989,North dirt roads to Longmont bike paths and Ba...,59511.9,10021,361.6,Ride,5.9,102.5,129.0,47.0,2025-07-06T14:14:42Z
4,15017668877,Base of LickSkillet++,75306.7,13372,1087.6,Ride,5.6,109.9,141.0,117.0,2025-07-05T13:55:37Z


In [20]:
# drop activities where no heartrate data was collected
strava_df = strava_df.dropna(subset=['average_heartrate'])
#strava_df.head(2)

# process data with function cleanup (from clean_convert.py)
cleanup (strava_df)

# add column: converted_speed ride-mph  and run-min/mile  (from clean_convert.py)
strava_df['converted_speed'] = strava_df.apply(convert_speed, axis=1)

In [21]:
strava_df = order_columns(strava_df)
strava_df.head(2)

Unnamed: 0,id,name,date,sport,distance,time,speed,elev_gain,avg_HR,max_HR,suffer_score,time-minutes,average_speed,start_date
0,15061307314,Nederland exploration and exploitation,2025-07-09,Ride,33.6,3:13,10.4,3260.2,109.1,131.0,106.0,194.0,4.7,2025-07-09 14:17:28+00:00
1,15048399185,Jamestown and eats at Lucky's Bakery,2025-07-08,Ride,37.5,2:37,14.3,2483.6,110.9,140.0,101.0,157.8,6.4,2025-07-08 14:44:02+00:00


### Dataframe approach to create strava_zone_df

In [22]:
# Create a zone dataframe
cols = ['id', 'name', 'date', 'suffer_score']
data = strava_df.iloc[:,[0,1,2,-4]]
strava_zone_df = pd.DataFrame(data, columns=cols)
strava_zone_df.tail(1)

Unnamed: 0,id,name,date,suffer_score
29,14621663272,The Long Way Home,2025-05-28,230.0


In [23]:
# activity ID to inspect
# activity_id = 15048399185

def get_zones_for_id(activity_id):
    print(activity_id)
    url = f'https://www.strava.com/api/v3/activities/{activity_id}/zones'
    # Set up Authorization header and make request
    headers = {'Authorization': f'Bearer {access_token}'}
    response = requests.get(url, headers=headers)

    # Check if the request was successful
    if response.status_code == 200:
        zones = response.json()
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
    # create time_in_zones series
    zone_series  = pd.DataFrame(zones[0]).iloc[:,1]
    time_in_zones = zone_series.apply(lambda z: z['time']/60)
    time_in_zones.index = ['Zone1', 'Zone2', 'Zone3', 'Zone4', 'Zone5']
    time.sleep(.8)
    return (time_in_zones)

In [24]:
zone_df = strava_zone_df['id'].apply(get_zones_for_id)

# Add in summary zones: moderate and intense
zone_df['moderate'] = zone_df[['Zone1', 'Zone2']].sum(axis=1)
zone_df['intense'] = zone_df[['Zone3', 'Zone4', 'Zone5']].sum(axis=1)
# zone_df.head()
# concat both dfs
strava_zone_df = pd.concat([strava_zone_df, zone_df], axis=1)

In [26]:
strava_zone_df.head()

Unnamed: 0,id,name,date,suffer_score,Zone1,Zone2,Zone3,Zone4,Zone5,moderate,intense
0,15061307314,Nederland exploration and exploitation,2025-07-09,106.0,31.9,132.7,29.4,0.0,0.0,164.6,29.4
1,15048399185,Jamestown and eats at Lucky's Bakery,2025-07-08,101.0,21.4,109.4,26.1,0.8,0.0,130.8,26.9
2,15037283785,Old Stage,2025-07-07,54.0,2.6,104.8,11.4,0.0,0.0,107.3,11.4
3,15027982989,North dirt roads to Longmont bike paths and Ba...,2025-07-06,47.0,16.4,149.1,1.5,0.0,0.0,165.5,1.5
4,15017668877,Base of LickSkillet++,2025-07-05,117.0,18.0,195.0,8.5,1.4,0.0,212.9,9.9


In [27]:
strava_df.head(2)

Unnamed: 0,id,name,date,sport,distance,time,speed,elev_gain,avg_HR,max_HR,suffer_score,time-minutes,average_speed,start_date
0,15061307314,Nederland exploration and exploitation,2025-07-09,Ride,33.6,3:13,10.4,3260.2,109.1,131.0,106.0,194.0,4.7,2025-07-09 14:17:28+00:00
1,15048399185,Jamestown and eats at Lucky's Bakery,2025-07-08,Ride,37.5,2:37,14.3,2483.6,110.9,140.0,101.0,157.8,6.4,2025-07-08 14:44:02+00:00


In [28]:
strava_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 29 entries, 0 to 29
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype              
---  ------         --------------  -----              
 0   id             29 non-null     int64              
 1   name           29 non-null     object             
 2   date           29 non-null     object             
 3   sport          29 non-null     object             
 4   distance       29 non-null     float64            
 5   time           29 non-null     object             
 6   speed          29 non-null     object             
 7   elev_gain      29 non-null     float64            
 8   avg_HR         29 non-null     float64            
 9   max_HR         29 non-null     float64            
 10  suffer_score   29 non-null     float64            
 11  time-minutes   29 non-null     float64            
 12  average_speed  29 non-null     float64            
 13  start_date     29 non-null     datetime64[ns, UTC]
d