In [1]:
# Importing libraries
import requests
import json
import polars as pl
from datetime import datetime
from utils import load_tokens, save_tokens, refresh_token_if_needed
import plotnine
#from dataprep.eda import create_report

In [3]:
# Load token from persisted JSON
token = load_tokens()

# Initial authentication
access_token = refresh_token_if_needed(token['client_id'], ['client_secret'])

# Request activities data from Strava API
activities_url = 'https://www.strava.com/api/v3/athlete/activities'
headers = {'Authorization': f'Bearer {access_token}'}

# Loop through pages to get all activities
activities = []

page = 1
while True:
    response = requests.get(url     =   activities_url,
                            headers =   headers, 
                            params  =   {'per_page' : 100, 'page' : page}).json()
    
    if not response:
        break
    activities.extend(response)
    page += 1

activities = pl.DataFrame(activities)

No token refresh needed.


**Identify units for Strava data**
|     Metric            | Unit              |
|-----------------------|-------------------|
|distance               | meters            |
|moving_time            | seconds           |
|total_elevation_gain   | meters            |
|average_speed          | meters per second |
|max_speed              | meters per second |

In [4]:
# Data cleaning
activities_curated = activities["id",
                                "name",
                                "distance", 
                                "moving_time", 
                                "total_elevation_gain", 
                                "sport_type",
                                "start_date_local", 
                                "gear_id", 
                                "start_latlng", 
                                "end_latlng",
                                "average_speed", 
                                "max_speed", 
                                "pr_count"]

columns_to_drop = ['distance', 'moving_time', 'id', 'name', 'max_speed', 'average_speed']

activities_curated= activities_curated.with_columns(
    activity_id             =  pl.col('id'),
    activity_name           =  pl.col('name'),
    distance_km             = (pl.col('distance')/1000).round(2),
    moving_time_hr          = (pl.col('moving_time')/60/60).round(2),
    average_speed_km_per_hr = (pl.col('average_speed')*3.6).round(2),
    max_speed_km_per_hr     = (pl.col('max_speed')*3.6).round(2),
).drop(columns_to_drop)

# Data coercing
activities_curated = activities_curated.with_columns(
    pl.col('activity_id').cast(pl.Int64),
    pl.col('gear_id').cast(pl.Categorical),
    pl.col('activity_name').cast(pl.Categorical),
    pl.col('sport_type').cast(pl.Categorical),
    pl.col('start_date_local').cast(pl.Datetime)

)

In [5]:
activities.glimpse()

Rows: 420
Columns: 51
$ resource_state                      <i64> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
$ athlete                       <struct[2]> {'id': 6996379, 'resource_state': 1}, {'id': 6996379, 'resource_state': 1}, {'id': 6996379, 'resource_state': 1}, {'id': 6996379, 'resource_state': 1}, {'id': 6996379, 'resource_state': 1}, {'id': 6996379, 'resource_state': 1}, {'id': 6996379, 'resource_state': 1}, {'id': 6996379, 'resource_state': 1}, {'id': 6996379, 'resource_state': 1}, {'id': 6996379, 'resource_state': 1}
$ name                                <str> 'Friday early ride!🏞️', 'Evening Walk', 'Quick 10!', 'Afternoon Walk', '20K along the canal 🏞️', 'Morning Ride', 'Morning Walk', 'Morning Run', 'Port Dalhousie sunrise', 'First ride in Nicasio +'
$ distance                            <f64> 10272.6, 1327.4, 10208.0, 3182.6, 20550.0, 15950.8, 1213.3, 963.7, 9518.0, 10092.8
$ moving_time                         <i64> 2327, 1218, 2072, 2950, 4511, 4284, 857, 391, 2569, 2297
$ elapsed_time

In [6]:
# Feature Engineering
activities_curated = activities_curated.with_columns(
    activity_hour = pl.col('start_date_local').dt.hour()
)

activity_counts = activities_curated.group_by("activity_hour").agg(
    pl.len().alias("activity_count")
).sort("activity_hour")

In [56]:
import plotly.express as px

px.bar(activities_curated
       .with_columns(activity_year = pl.col('start_date_local').dt.year())
       .group_by('activity_hour', 'sport_type', 'activity_year')
       .agg(pl.len().alias('activity_count'))
       .filter(pl.col('activity_year') > 2020)
       .sort("activity_year", "activity_hour"),
       x="activity_hour",
       y="activity_count",
       color = "sport_type",
       orientation = "v",
       facet_col='activity_year')
                                                               

In [None]:
import plotly.express as px

fig = px.bar(
    activity_counts,
    x="activity_hour",
    y="activity_count",
    color=,
    color_discrete_sequence=['skyblue'],
    title="Activity Distribution by Hour of Day",
    labels={"activity_hour": "Hour of Day", "activity_count": "Number of Activities"},
    orientation='v' # Explicitly set to vertical
)

fig.show()

In [28]:
import plotly.graph_objects as go
import pandas as pd # Plotly often works seamlessly with Pandas DataFrames


# --- 1. Prepare your data (using the activity_counts logic) ---


# Convert to Pandas DataFrame for Plotly.graph_objects
activity_counts_pd = activity_counts.to_pandas()

# --- 2. Create the Plotly Figure ---
# Using plotly.graph_objects for explicit control
fig = go.Figure(data=[go.Bar(
    x=activity_counts_pd["activity_hour"],
    y=activity_counts_pd["activity_count"],
    marker_color='skyblue' # Optional: set a color
)])

fig.show()

In [11]:
import dash
from dash import html, dcc
import plotly.graph_objects as go
import pandas as pd # Plotly often works seamlessly with Pandas DataFrames


# --- 1. Prepare your data (using the activity_counts logic) ---


# Convert to Pandas DataFrame for Plotly.graph_objects
activity_counts_pd = activity_counts.to_pandas()

# --- 2. Create the Plotly Figure ---
# Using plotly.graph_objects for explicit control
fig = go.Figure(data=[go.Bar(
    x=activity_counts_pd["activity_hour"],
    y=activity_counts_pd["activity_count"],
    marker_color='skyblue' # Optional: set a color
)])

fig.update_layout(
    title_text="Activity Distribution by Hour of Day",
    xaxis_title="Hour of Day",
    yaxis_title="Number of Activities",
    xaxis_tickmode='array', # Ensure all hours are shown
    xaxis_tickvals=list(range(0, 24)),
    xaxis_ticktext=[f"{h:02d}:00" for h in range(0, 24)], # Format ticks nicely
    # Removed template argument to rely on Plotly's default
)



In [12]:
# Filter bike rides only
activities_curated.filter(pl.col('sport_type') == 'Ride')

total_elevation_gain,sport_type,start_date_local,gear_id,start_latlng,end_latlng,pr_count,activity_id,activity_name,distance_km,moving_time_hr,average_speed_km_per_hr,max_speed_km_per_hr,activity_hour
f64,cat,datetime[μs],cat,list[f64],list[f64],i64,i64,cat,f64,f64,f64,f64,i8
22.9,"""Ride""",2025-05-16 15:01:52,"""b9747906""","[43.188027, -79.24132]","[43.185231, -79.239928]",3,14501529917,"""Friday early ride!🏞️""",10.27,0.65,15.89,36.86,15
11.6,"""Ride""",2025-05-12 20:05:20,"""b15965201""","[43.185688, -79.238113]","[43.185237, -79.239943]",4,14461863801,"""Quick 10!""",10.21,0.58,17.74,36.43,20
51.1,"""Ride""",2025-05-08 06:56:40,"""b15965201""","[43.186056, -79.237056]","[43.185262, -79.239991]",1,14415009088,"""20K along the canal 🏞️""",20.55,1.25,16.4,35.86,6
50.7,"""Ride""",2025-05-05 06:38:13,"""b15965201""","[43.185237, -79.240003]","[43.185216, -79.239966]",3,14384045503,"""Morning Ride""",15.95,1.19,13.4,32.47,6
22.8,"""Ride""",2025-05-01 05:58:17,"""b15965201""","[43.185268, -79.239972]","[43.185299, -79.239944]",1,14342156475,"""Port Dalhousie sunrise""",9.52,0.71,13.34,33.05,5
…,…,…,…,…,…,…,…,…,…,…,…,…,…
0.0,"""Ride""",2018-01-22 08:23:43,,"[47.676095, -122.125355]","[47.675937, -122.125343]",0,1369817866,"""It's cold""",0.53,0.05,10.9,22.68,8
10.3,"""Ride""",2015-10-18 17:29:41,,"[28.069222, -82.410976]","[28.069146, -82.376819]",0,425416333,"""Afternoon Ride""",3.36,0.25,13.6,24.12,17
0.0,"""Ride""",2015-08-20 18:01:43,,"[28.055122, -82.413474]","[28.054901, -82.449535]",0,376712576,"""Evening Ride""",4.02,0.43,9.33,34.92,18
11.1,"""Ride""",2015-08-15 19:05:53,,"[28.068955, -82.405798]","[28.056996, -82.415831]",0,369872656,"""Evening Ride""",3.47,0.33,10.59,23.4,19


In [13]:
# Which part of the day am I active the most?

In [14]:
# Which gear made me more active in the first 3 months of having it?

In [15]:
# Does weather correlate with my activities?