In [1]:
import os
import sys

import pandas as pd
import requests

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

from src.ingestion.auth import strava_auth  # noqa: E402

In [2]:
athlete_url = 'https://www.strava.com/api/v3/athlete'
access_token = strava_auth.get_access_token()
headers = {'Authorization': f'Bearer {access_token}'}

137549 1184ee4b53c17a4caec26f1fe6b1f0169d1136a4 c7b0516c8801fde1965ea36e40428b81941ba448
Refreshing Strava access token...
Successfully refreshed access token.


# General athlete information

In [3]:
athlete_url = 'https://www.strava.com/api/v3/athlete'
response = requests.get(athlete_url, headers=headers, timeout=10)
response.raise_for_status()
data = response.json()
data

{'id': 133094316,
 'username': None,
 'resource_state': 2,
 'firstname': 'Xaver',
 'lastname': 'Heuser',
 'bio': '',
 'city': 'Troisdorf',
 'state': 'Nordrhein-Westfalen',
 'country': 'Germany',
 'sex': 'M',
 'premium': False,
 'summit': False,
 'created_at': '2024-03-10T13:27:59Z',
 'updated_at': '2025-08-01T13:07:19Z',
 'badge_type_id': 0,
 'weight': None,
 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/133094316/33608972/1/medium.jpg',
 'profile': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/133094316/33608972/1/large.jpg',
 'friend': None,
 'follower': None}

In [4]:
data

{'id': 133094316,
 'username': None,
 'resource_state': 2,
 'firstname': 'Xaver',
 'lastname': 'Heuser',
 'bio': '',
 'city': 'Troisdorf',
 'state': 'Nordrhein-Westfalen',
 'country': 'Germany',
 'sex': 'M',
 'premium': False,
 'summit': False,
 'created_at': '2024-03-10T13:27:59Z',
 'updated_at': '2025-08-01T13:07:19Z',
 'badge_type_id': 0,
 'weight': None,
 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/133094316/33608972/1/medium.jpg',
 'profile': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/133094316/33608972/1/large.jpg',
 'friend': None,
 'follower': None}

In [5]:
athlete_id = data['id']
print(f'Athlete ID: {athlete_id}')

Athlete ID: 133094316


In [6]:
# Method using the strava extractor class
from src.ingestion.extractors.strava_extractor import StravaExtractor

extractor = StravaExtractor(access_token)
athlete_info = extractor.fetch_athlete_info()
athlete_info

Start fetching athlete information.


StravaAthleteInfo(id=133094316, username=None, resource_state=2, firstname='Xaver', lastname='Heuser', bio='', city='Troisdorf', state='Nordrhein-Westfalen', country='Germany', sex='M', premium=False, summit=False, created_at='2024-03-10T13:27:59Z', updated_at='2025-08-01T13:07:19Z', badge_type_id=0, weight=None, profile_medium='https://dgalywyr863hv.cloudfront.net/pictures/athletes/133094316/33608972/1/medium.jpg', profile='https://dgalywyr863hv.cloudfront.net/pictures/athletes/133094316/33608972/1/large.jpg', friend=None, follower=None)

In [7]:
athlete_info.id

133094316

In [8]:
df_athlete = pd.DataFrame([athlete_info.model_dump()])
df_athlete

Unnamed: 0,id,username,resource_state,firstname,lastname,bio,city,state,country,sex,premium,summit,created_at,updated_at,badge_type_id,weight,profile_medium,profile,friend,follower
0,133094316,,2,Xaver,Heuser,,Troisdorf,Nordrhein-Westfalen,Germany,M,False,False,2024-03-10T13:27:59Z,2025-08-01T13:07:19Z,0,,https://dgalywyr863hv.cloudfront.net/pictures/...,https://dgalywyr863hv.cloudfront.net/pictures/...,,


# Athlete stats

In [9]:
athlete_stats_url = f'https://www.strava.com/api/v3/athletes/{athlete_id}/stats'
r = requests.get(athlete_stats_url, headers=headers, timeout=10)
r.raise_for_status()

In [10]:
athlete_stats = r.json()
athlete_stats

{'biggest_ride_distance': 100376.0,
 'biggest_climb_elevation_gain': 284.2,
 'recent_ride_totals': {'count': 3,
  'distance': 57342.200000000004,
  'moving_time': 7987,
  'elapsed_time': 8110,
  'elevation_gain': 491.0,
  'achievement_count': 0},
 'all_ride_totals': {'count': 85,
  'distance': 2048763.2999999993,
  'moving_time': 312603,
  'elapsed_time': 330536,
  'elevation_gain': 17977.3},
 'recent_run_totals': {'count': 19,
  'distance': 191318.90000000002,
  'moving_time': 59643,
  'elapsed_time': 60024,
  'elevation_gain': 1207.0,
  'achievement_count': 0},
 'all_run_totals': {'count': 189,
  'distance': 1854962.7000000004,
  'moving_time': 567103,
  'elapsed_time': 572435,
  'elevation_gain': 13221.2},
 'recent_swim_totals': {'count': 0,
  'distance': 0,
  'moving_time': 0,
  'elapsed_time': 0,
  'elevation_gain': 0,
  'achievement_count': 0},
 'all_swim_totals': {'count': 6,
  'distance': 3017.5,
  'moving_time': 3462,
  'elapsed_time': 7108,
  'elevation_gain': 0.0},
 'ytd_rid

In [11]:
df_athlete_stats = pd.DataFrame([athlete_stats])
df_athlete_stats

Unnamed: 0,biggest_ride_distance,biggest_climb_elevation_gain,recent_ride_totals,all_ride_totals,recent_run_totals,all_run_totals,recent_swim_totals,all_swim_totals,ytd_ride_totals,ytd_run_totals,ytd_swim_totals
0,100376.0,284.2,"{'count': 3, 'distance': 57342.200000000004, '...","{'count': 85, 'distance': 2048763.2999999993, ...","{'count': 19, 'distance': 191318.90000000002, ...","{'count': 189, 'distance': 1854962.7000000004,...","{'count': 0, 'distance': 0, 'moving_time': 0, ...","{'count': 6, 'distance': 3017.5, 'moving_time'...","{'count': 77, 'distance': 1930280, 'moving_tim...","{'count': 176, 'distance': 1756941, 'moving_ti...","{'count': 6, 'distance': 3018, 'moving_time': ..."


In [12]:
from src.ingestion.extractors.strava_extractor import StravaExtractor
extractor = StravaExtractor(access_token)
athlete_stats = extractor.fetch_athlete_stats(athlete_id)

Start fetching athlete statistics.


In [13]:
athlete_stats

StravaAthleteStats(biggest_ride_distance=100376.0, biggest_climb_elevation_gain=284.2, recent_ride_totals=StravaAthleteStatsRecentTotals(count=3, distance=57342.200000000004, moving_time=7987, elapsed_time=8110, elevation_gain=491.0, achievement_count=0), recent_run_totals=StravaAthleteStatsRecentTotals(count=19, distance=191318.90000000002, moving_time=59643, elapsed_time=60024, elevation_gain=1207.0, achievement_count=0), recent_swim_totals=StravaAthleteStatsRecentTotals(count=0, distance=0.0, moving_time=0, elapsed_time=0, elevation_gain=0.0, achievement_count=0), all_ride_totals=StravaAthleteStatsAllTotals(count=85, distance=2048763.2999999993, moving_time=312603, elapsed_time=330536, elevation_gain=17977.3), all_run_totals=StravaAthleteStatsAllTotals(count=189, distance=1854962.7000000004, moving_time=567103, elapsed_time=572435, elevation_gain=13221.2), all_swim_totals=StravaAthleteStatsAllTotals(count=6, distance=3017.5, moving_time=3462, elapsed_time=7108, elevation_gain=0.0), 

In [16]:
df_athlete_stats_new = pd.DataFrame([athlete_stats.model_dump()])
df_athlete_stats_new

Unnamed: 0,biggest_ride_distance,biggest_climb_elevation_gain,recent_ride_totals,recent_run_totals,recent_swim_totals,all_ride_totals,all_run_totals,all_swim_totals,ytd_ride_totals,ytd_run_totals,ytd_swim_totals
0,100376.0,284.2,"{'count': 3, 'distance': 57342.200000000004, '...","{'count': 19, 'distance': 191318.90000000002, ...","{'count': 0, 'distance': 0.0, 'moving_time': 0...","{'count': 85, 'distance': 2048763.2999999993, ...","{'count': 189, 'distance': 1854962.7000000004,...","{'count': 6, 'distance': 3017.5, 'moving_time'...","{'count': 77, 'distance': 1930280.0, 'moving_t...","{'count': 176, 'distance': 1756941.0, 'moving_...","{'count': 6, 'distance': 3018.0, 'moving_time'..."


# Gears

- To get the id of a gear one have to check it manually or iterate over all activities and create a list
- Since they don't change that often, it makes sense to manually create a list with all gear ids

In [None]:
gear_id = 'g20984891'
gear_2 = 'b16370167'

In [None]:
gear_url = f'https://www.strava.com/api/v3/gear/{gear_2}'
r = requests.get(gear_url, headers=headers, timeout=10)
gear = r.json()
gear

# Activity Details

## Streams

In [None]:
activity_id = '16222086377'

In [None]:
stream_url = f'https://www.strava.com/api/v3/activities/{activity_id}/streams?heartrate'
r = requests.get(stream_url, headers=headers, timeout=10)
stream = r.json()

In [None]:
import requests

activity_id = '16222086377'
stream_url = f'https://www.strava.com/api/v3/activities/{activity_id}/streams'

params = {
    'keys': 'time,heartrate,altitude,velocity_smooth',  # or use 'types'
    'key_by_type': 'true',
}

r = requests.get(stream_url, headers=headers, params=params, timeout=10)
r.raise_for_status()
stream = r.json()

print(
    stream.keys()
)  # e.g. dict_keys(['time', 'heartrate', 'altitude', 'velocity_smooth'])

In [None]:
import pandas as pd

print(pd.__version__)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import requests

stream_url = f'https://www.strava.com/api/v3/activities/{activity_id}/streams'

params = {'keys': 'time,heartrate', 'key_by_type': 'true'}

r = requests.get(stream_url, headers=headers, params=params, timeout=10)
r.raise_for_status()
stream = r.json()

# --- Convert to DataFrame ---
df = pd.DataFrame({
    'time_s': stream['time']['data'],
    'heart_rate': stream['heartrate']['data'],
})

# Optional: convert seconds to minutes
df['time_min'] = df['time_s'] / 60

# --- Plot ---
plt.figure(figsize=(10, 5))
plt.plot(df['time_min'], df['heart_rate'], linewidth=1.5)
plt.title(f'Heart Rate Stream for Activity {activity_id}')
plt.xlabel('Time (minutes)')
plt.ylabel('Heart Rate (bpm)')
plt.grid(True)
plt.tight_layout()
plt.show()

## Comments

In [None]:
comment_url = f'https://www.strava.com/api/v3/activities/{activity_id}/comments'
r = requests.get(comment_url, headers=headers, timeout=10)
r.raise_for_status()
comments = r.json()
comments

## Kudos

In [None]:
kudos_url = f'https://www.strava.com/api/v3/activities/{activity_id}/kudos'
r = requests.get(kudos_url, headers=headers, timeout=10)
r.raise_for_status()
kudos = r.json()
kudos

## Laps

In [None]:
laps_url = f'https://www.strava.com/api/v3/activities/{activity_id}/laps'
r = requests.get(laps_url, headers=headers, timeout=10)
r.raise_for_status()
laps = r.json()
laps

## Zones

In [None]:
zones_url = f'https://www.strava.com/api/v3/activities/{activity_id}/zones'
r = requests.get(zones_url, headers=headers, timeout=10)
r.raise_for_status()
zones = r.json()
zones