In [None]:
import pandas as pd
import requests
import urllib3
import os
from dotenv import load_dotenv

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Load environment variables from .env file
load_dotenv()

## Getting data from Strava's API

In [None]:
####### CREDENTIALS #######

# The activity parsing routine below comes directly from this very usefull code --> https://github.com/franchyze923/Code_From_Tutorials/tree/master/Strava_Api. 
# Mod to prevent from leaking credentials --> using a .env file 

####### What should I put in my .env file? ########

# CLIENT_ID="YourID"
# CLIENT_SECRET='YourSecretToken'
# REFRESH_TOKEN='YourRefreshToken'

####### How to get this ? ########

# Again, use the excellent work of the aforementionned author --> https://towardsdatascience.com/using-the-strava-api-and-pandas-to-explore-your-activity-data-d94901d9bfde/



auth_url = "https://www.strava.com/oauth/token"
activites_url = "https://www.strava.com/api/v3/athlete/activities"

payload = {
    'client_id': os.getenv('CLIENT_ID'),
    'client_secret': os.getenv('CLIENT_SECRET'),
    'refresh_token': os.getenv('REFRESH_TOKEN'),
    'grant_type': "refresh_token",
    'f': 'json'
}

print("Requesting Token...\n")
res = requests.post(auth_url, data=payload, verify=False)
access_token = res.json()['access_token']
print("Access Token Granted!\n")

header = {'Authorization': 'Bearer ' + access_token}
param = {'per_page': 200, 'page': 10}

# The first loop, request_page_number will be set to one, so it requests the first page. Increment this number after
# each request, so the next time we request the second page, then third, and so on...
request_page_num = 1
all_activities = []


while True:
    param = {'per_page': 200, 'page': request_page_num}
    # initial request, where we request the first page of activities
    my_dataset = requests.get(activites_url, headers=header, params=param).json()

    # check the response to make sure it is not empty. If it is empty, that means there is no more data left. So if you have
    # 1000 activities, on the 6th request, where we request page 6, there would be no more data left, so we will break out of the loop
    if len(my_dataset) == 0:
        print("breaking out of while loop because the response is zero, which means there must be no more activities")
        break

    # if the all_activities list is already populated, that means we want to add additional data to it via extend.
    if all_activities:
        print("all_activities is populated")
        all_activities.extend(my_dataset)

    # if the all_activities is empty, this is the first time adding data so we just set it equal to my_dataset
    else:
        print("all_activities is NOT populated")
        all_activities = my_dataset

    request_page_num += 1



## Storing data

In [None]:
# using a df for better handling
all_activities=pd.DataFrame(all_activities)

In [None]:
# getting individual sport types
sport_types=all_activities.sport_type.unique()
sport_types

## Exploring data

In [None]:
### Let's plot


import plotly.graph_objects as go
import plotly.express as px
import polyline

colors = px.colors.qualitative.Alphabet
if len(sport_types) > len(colors):
    raise ValueError("Not enough colors in the chosen palette for the number of sport types.")

custom_color_mapping = dict(zip(sport_types, colors[:len(sport_types)]))


fig = go.Figure()

for index, row in all_activities.iterrows():
    polyline_code = row['map']['summary_polyline']
    sport_type = row['sport_type']
    coordinates = polyline.decode(polyline_code, 5)
    longitudes = [coordinate[1] for coordinate in coordinates]
    latitudes = [coordinate[0] for coordinate in coordinates]
    
    color = custom_color_mapping[sport_type]

    # Add trace for each polyline
    fig.add_trace(go.Scattermap(
        lon=longitudes,
        lat=latitudes,
        mode="lines",
        line=dict(width=1, color=color),
        opacity=0.7,
        showlegend=False
    ))


# Update layout to enable the map display
fig.update_layout(
    height=800,
    margin=dict(l=0, r=0, t=0, b=0),
    map_style='carto-darkmatter',
)

# Show figure
fig.show()




In [None]:
all_activities

In [None]:
import plotly.graph_objects as go
import pandas as pd
import matplotlib.dates as mdates

import pandas as pd
import matplotlib.pyplot as plt




all_activities['time'] = pd.to_datetime(all_activities.start_date)

all_activities['year'] = all_activities['time'].dt.year
all_activities['day'] = all_activities['time'].dt.day


sports = all_activities['type'].unique()
years = all_activities['year'].unique()


colormap = plt.get_cmap('tab10')  
year_colors = {year: colormap(i) for i, year in enumerate(years)}


fig, axes = plt.subplots(len(sports), 1, figsize=(10, 5 * len(sports)), sharex=True)

if len(sports) == 1:
    axes = [axes]  

for ax, sport in zip(axes, sports):
    for year, year_data in all_activities[all_activities['type'] == sport].groupby('year'):
        year_data = year_data.sort_values('time')  
        year_data['cumulative_distance'] = year_data['distance'].cumsum()  
        year_data['day_of_year'] = year_data['time'].dt.dayofyear
        
        ax.plot(year_data['day_of_year'], year_data['cumulative_distance'], 
                label=f"{year}", color=year_colors[year], marker='o')

    ax.set_title(f"Cumulative Distance for {sport}")
    ax.set_ylabel("Cumulative Distance")
    ax.legend()
    ax.grid(True)

# Set common x-label
plt.xlabel("Day of the Year")
plt.suptitle("Cumulative Distance per Sport per Year")
plt.show()


In [None]:
all_activities.columns