In [25]:
import jupyter_black
from datetime import datetime, timedelta
import pandas
import altair as alt
import numpy
import requests

jupyter_black.load()

MY_STRAVA_CLIENT_ID = 125463
MY_STRAVA_CLIENT_SECRET = "017fafbf5c5067490f1382fd3454c30cdb61f4b0"
DAYS_BACK = 180

In [38]:
def get_token():

    url = f"https://www.strava.com/oauth/authorize?client_id={MY_STRAVA_CLIENT_ID}&redirect_uri=http://127.0.0.1:5000/authorization&approval_prompt=auto&scope=read,activity:read&response_type=code"
    print(f"Go to {url}")
    response = input(f"Enter the full URL:")
    code = response.split("code=")[1].split("&")[0]

    params = {
        "client_id": MY_STRAVA_CLIENT_ID,
        "client_secret": MY_STRAVA_CLIENT_SECRET,
        "code": code,
        "grant_type": "authorization_code",
    }
    print(params)

    response = requests.post("https://www.strava.com/oauth/token", params=params)
    print(response.json())
    token = response.json()["access_token"]

    return token


token = get_token()
print(f"{token=}")

Go to https://www.strava.com/oauth/authorize?client_id=125463&redirect_uri=http://127.0.0.1:5000/authorization&approval_prompt=auto&scope=read,activity:read&response_type=code
{'client_id': 125463, 'client_secret': '017fafbf5c5067490f1382fd3454c30cdb61f4b0', 'code': 'b51369e750b30fb97d750065c806334c39482c26', 'grant_type': 'authorization_code'}
{'token_type': 'Bearer', 'expires_at': 1719613597, 'expires_in': 20388, 'refresh_token': '4f6bcdaaa996c6d058e6750cd3cc846f8fb8fe19', 'access_token': '6145a153a1f4201f89abda18d47f2a0f3aad99af', 'athlete': {'id': 44717295, 'username': None, 'resource_state': 2, 'firstname': 'Duarte', 'lastname': 'Carmo', 'bio': 'duarteocarmo.com', 'city': 'Copenhagen ', 'state': 'DK', 'country': None, 'sex': 'M', 'premium': False, 'summit': False, 'created_at': '2019-07-27T19:20:59Z', 'updated_at': '2024-06-19T21:11:01Z', 'badge_type_id': 0, 'weight': 72.0, 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/44717295/13646951/6/medium.jpg', '

In [39]:
def fetch_activities(access_token: str) -> list:
    url = f"https://www.strava.com/api/v3/athlete/activities"
    headers = {"Authorization": f"Bearer {access_token}"}
    after_timestamp = int((datetime.now() - timedelta(days=DAYS_BACK)).timestamp())

    per_page = 200
    page = 1
    all_activities = []

    while True:
        params = {"after": after_timestamp, "page": page, "per_page": per_page}
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()  # This will raise an HTTPError if the HTTP request returned an unsuccessful status code
        activities = response.json()

        if not activities:
            break

        all_activities.extend(activities)
        if len(activities) < per_page:
            break

        page += 1

    return all_activities


activities = fetch_activities(token)

In [40]:
running_activities = [act for act in activities if act["type"] == "Run"]

In [41]:
data = {
    "start_date": [act["start_date"] for act in running_activities],
    "distance_meters": [float(act["distance"]) for act in running_activities],
    "time_seconds": [act["moving_time"] for act in running_activities],
}


df = pandas.DataFrame(data)
df["start_date"] = pandas.to_datetime(df["start_date"])
df.set_index("start_date", inplace=True)
weekly_data = df.resample("W").sum()
weekly_data["distance_km"] = round(weekly_data["distance_meters"] / 1000, 1)


def get_tanda_value(km_per_week: int, pace_sec_per_km: int) -> float:
    marathon_distance = 42.195
    marathon_pace_sec_per_km = (
        17.1 + 140.0 * numpy.exp(-0.0053 * km_per_week) + 0.55 * pace_sec_per_km
    )
    total_marathon_time_secs = marathon_distance * marathon_pace_sec_per_km
    total_marathon_time_hours = total_marathon_time_secs / 3600
    return total_marathon_time_hours


def get_pace_for_distance(km_per_week: int, total_marathon_time_hours: float) -> float:
    marathon_distance = 42.195
    marathon_pace_sec_per_km = total_marathon_time_hours * 3600 / marathon_distance
    pace_sec_per_km = (
        marathon_pace_sec_per_km - 17.1 - 140.0 * numpy.exp(-0.0053 * km_per_week)
    ) / 0.55
    return pace_sec_per_km


def pretty_marathon_time(total_marathon_time_hours: float) -> str:
    hours = int(total_marathon_time_hours)
    minutes = int((total_marathon_time_hours - hours) * 60)
    seconds = int(((total_marathon_time_hours - hours) * 60 - minutes) * 60)
    if seconds >= 30:
        minutes += 1

    return f"{hours} hours {minutes} minutes"

In [42]:
daily_df = df.groupby(df.index.date).sum()
daily_df.index = pandas.to_datetime(daily_df.index)
daily_df.index.name = "date"


daily_df["tanda_day"] = get_tanda_value(
    daily_df["distance_meters"] / 1000 * 7,
    daily_df["time_seconds"] / (daily_df["distance_meters"] / 1000),
)
daily_df["tanda_day_pretty"] = pandas.to_datetime(daily_df["tanda_day"], unit="h")


daily_df = daily_df.reset_index()
daily_df["date"] = pandas.to_datetime(daily_df["date"])
daily_df.set_index("date", inplace=True)

num_weeks = 8
num_days = num_weeks * 7
rolling = f"{num_days}d"


daily_df["rolling_distance_meters"] = (
    daily_df["distance_meters"].rolling(window=rolling).sum()
)
daily_df["rolling_time_seconds"] = (
    daily_df["time_seconds"].rolling(window=rolling).sum()
)

daily_df["rolling_km_per_week"] = daily_df["rolling_distance_meters"] / 1000 / num_weeks
daily_df["rolling_pace_sec_per_km"] = (
    daily_df["rolling_time_seconds"] / daily_df["rolling_distance_meters"] * 1000
)

daily_df["rolling_tanda_day"] = get_tanda_value(
    daily_df["rolling_km_per_week"], daily_df["rolling_pace_sec_per_km"]
)

daily_df["rolling_tanda_day_pretty"] = pandas.to_datetime(
    daily_df["rolling_tanda_day"], unit="h"
)

daily_df["type_rolling"] = "Tanda (8 weeks)"
daily_df["type_daily"] = "Tanda (daily)"

daily_df["pace_sec_per_km"] = daily_df["time_seconds"] / (
    daily_df["distance_meters"] / 1000
)
daily_df["distance_km"] = daily_df["distance_meters"] / 1000

daily_df = daily_df.sort_values(by="date", ascending=True)
daily_df["date_factor"] = numpy.exp(numpy.linspace(0, 15, len(daily_df)))


daily_df["daily_pace_pretty"] = daily_df["pace_sec_per_km"].apply(
    lambda x: f"{int(x//60)}:{int(x%60):02d}"
)
daily_df["rolling_pace_pretty"] = daily_df["rolling_pace_sec_per_km"].apply(
    lambda x: f"{int(x//60)}:{int(x%60):02d}"
)

daily_df["rolling_km_per_week_daily_distance"] = daily_df["rolling_km_per_week"] / 7
daily_df["Latest run"] = "Latest run"

daily_df["pretty_rolling_tanda_day"] = daily_df["rolling_tanda_day"].apply(
    pretty_marathon_time
)

# current_form_marathon_time = pretty_marathon_time(
#     daily_df.loc[start_date:last_date]
#     .reset_index()
#     .sort_values("date")
#     .tail(1)["rolling_tanda_day"]
#     .item()
# )


def pace_tick_formatter(value):
    minutes = int(value // 60)
    seconds = int(value % 60)
    return f"{minutes}:{seconds:02d}"

In [43]:
x_scale = alt.Scale(padding=20)
upper_limit = weekly_data["distance_km"].max()
lower_limit = 0

x = alt.X("start_date:T", scale=alt.Scale(padding=20), title="Week")
y = alt.Y(
    "distance_km:Q",
    axis=alt.Axis(title="Kilometers"),
    scale=alt.Scale(domain=[lower_limit, upper_limit]),
)

line_chart = (
    alt.Chart(weekly_data.reset_index())
    .mark_area(
        line={"color": "#ff561b"},
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="white", offset=0),
                alt.GradientStop(color="#ff561b", offset=1),
            ],
            x1=1,
            x2=1,
            y1=1,
            y2=0,
        ),
    )
    .encode(
        x=x,
        y=y,
        tooltip=["start_date:T", "distance_km:Q"],
    )
    .properties(width=800, height=500, title="Running distance per week (km)")
)

points = (
    alt.Chart(weekly_data.reset_index())
    .mark_point(
        filled=True,
        fill="white",
        stroke="#ff561b",
        strokeWidth=2,
        size=50,
        shape="circle",
    )
    .encode(
        x=x,
        y=y,
        tooltip=["start_date:T", "distance_km:Q"],
    )
)


chart = line_chart + points

chart.show()

In [44]:
x = alt.X("date:T", title="Date", scale=alt.Scale(padding=20))

daily_line = (
    alt.Chart(daily_df.reset_index())
    .mark_point(shape="square", filled=True, opacity=0.5)
    .encode(
        x=x,
        y=alt.Y("hoursminutes(tanda_day_pretty):O", title="Tanda day"),
        color=alt.value("#d65de0"),
        tooltip=[
            alt.Tooltip("tanda_day_pretty", timeUnit="hoursminutes"),
            alt.Tooltip("date", timeUnit="yearmonthdate"),
        ],
    )
)
rolling_line = (
    alt.Chart(daily_df.reset_index())
    .mark_line(interpolate="basis")
    .encode(
        x=x,
        y=alt.Y(
            "hoursminutes(rolling_tanda_day_pretty):O", title="Tanda trend (8 weeks)"
        ),
        color=alt.value("#d65de0"),
        tooltip=[
            alt.Tooltip("rolling_tanda_day_pretty", timeUnit="hoursminutes"),
            alt.Tooltip("date", timeUnit="yearmonthdate"),
        ],
    )
)


(daily_line + rolling_line).properties(width=800, height=500, title="Tanda").show()

In [45]:
pace_ticks_values = list(range(240, 60 * 8, 15))
last_date = max(daily_df.index)
start_date = last_date - timedelta(days=56)


daily_df["shape"] = daily_df.index.to_series().apply(
    lambda x: "square" if x == last_date else "circle"
)


daily_line = (
    alt.Chart(daily_df.loc[start_date:last_date].reset_index().sort_values("date"))
    .mark_point(
        filled=True,
        # shape="triangle",
        size=90,
    )
    .encode(
        x=alt.X(
            "distance_km:Q",
            title="Daily Distance (km)",
            axis=alt.Axis(tickCount=int(25 // 5)),
        ),
        y=alt.Y(
            "pace_sec_per_km:Q",
            scale=alt.Scale(
                reverse=True,
                zero=False,
                domain=(min(pace_ticks_values), max(pace_ticks_values)),
            ),
            title="Pace (mm:ss)",
            axis=alt.Axis(
                values=pace_ticks_values,
                labelExpr="datum.value > 0 ? timeFormat(datum.value * 1000, '%M:%S') : ''",
            ),
        ),
        tooltip=[
            alt.Tooltip("distance_km:Q", title="Distance (km)", format=".1f"),
            alt.Tooltip("date:T", title="Date"),
            alt.Tooltip("daily_pace_pretty:N", title="Pace (mm:ss/km)"),
        ],
        color=alt.Color(
            "date_factor:Q", scale=alt.Scale(scheme="lightgreyred"), legend=None
        ),
        shape=alt.Shape(
            "shape:N", scale=alt.Scale(range=["square", "circle"]), legend=None
        ),
    )
)


(daily_line).properties(
    width=800, height=500, title="Pace and daily distance"
).interactive().show()

In [46]:
marathon_times = []


for marathon_time in numpy.arange(2.5, 4.5, 0.25):
    for km_day in range(0, 50, 1):
        km_week = km_day * 7
        pace = get_pace_for_distance(km_week, marathon_time)
        formatted_pace = pace_tick_formatter(pace)
        marathon_times.append(
            {
                "marathon_time": marathon_time,
                "km_day": km_day,
                "km_week": km_week,
                "pace": pace,
                "formatted_pace": formatted_pace,
            }
        )

times_df = pandas.DataFrame(marathon_times)

In [47]:
daily_df.loc[start_date:last_date].distance_km.max()

np.float64(14.549299999999999)

In [48]:
marathon_times = (
    alt.Chart(times_df)
    .mark_line(interpolate="basis")
    .encode(
        x=alt.X(
            "km_day:Q",
            title="Daily Distance (km)",
            scale=alt.Scale(
                domain=[0, daily_df.loc[start_date:last_date].distance_km.max()]
            ),
        ),
        y=alt.Y(
            "pace:Q",
            title="Pace (mm:ss)",
            scale=alt.Scale(
                reverse=True,
                zero=False,
                domain=(300, 405),
            ),
            axis=alt.Axis(
                values=pace_ticks_values,
                labelExpr="datum.value > 0 ? timeFormat(datum.value * 1000, '%M:%S') : ''",
            ),
        ),
        color=alt.Color(
            "marathon_time:N",
            title="Marathon Time",
            scale=alt.Scale(scheme="turbo"),
            legend=alt.Legend(
                labelExpr="floor(datum.value) + ':' + (floor((datum.value % 1) * 60) < 10 ? '0' : '') + floor((datum.value % 1) * 60)"
            ),
        ),
        tooltip=[
            alt.Tooltip("km_day:Q", title="Distance (km)"),
            alt.Tooltip("formatted_pace:N", title="Pace (mm:ss)"),
            alt.Tooltip("marathon_time:Q", title="Marathon time (hours)"),
        ],
    )
    .properties(width=800, height=500, title="Pace and daily distance")
    .interactive()
)

marathon_times.show()

In [49]:
daily_df["Legend"] = "Tanda Progression line"


tooltip = [
    alt.Tooltip(
        "rolling_km_per_week_daily_distance:Q",
        title="Distance (km)",
        format=".1f",
    ),
    alt.Tooltip(
        "rolling_pace_pretty:N",
        title="Pace (s/km)",
    ),
    alt.Tooltip("date:T", title="Date"),
    alt.Tooltip("pretty_rolling_tanda_day:N", title="Marathon Form"),
]


tanda_progression = (
    alt.Chart(daily_df.loc[start_date:last_date].reset_index().sort_values("date"))
    .mark_line(point=True, strokeWidth=2)
    .encode(
        x=alt.X(
            "rolling_km_per_week_daily_distance:Q",
            title="Daily Distance (km)",
            scale=alt.Scale(zero=False),
        ),
        y=alt.Y(
            "rolling_pace_sec_per_km:Q",
            title="Pace (mm:ss)",
            scale=alt.Scale(
                reverse=True,
                zero=False,
                domain=(min(pace_ticks_values), max(pace_ticks_values)),
            ),
            axis=alt.Axis(
                values=pace_ticks_values,
                labelExpr="datum.value > 0 ? timeFormat(datum.value * 1000, '%M:%S') : ''",
            ),
        ),
        tooltip=tooltip,
        order="date",
        color=alt.Color(
            "Legend:N",
            legend=alt.Legend(title=None),
            scale=alt.Scale(domain=["Tanda Progression line"], range=["#87f94c"]),
        ),
    )
    .properties(width=800, height=500, title="Pace and daily distance")
    .interactive()
)


current_form = (
    alt.Chart(
        daily_df.loc[start_date:last_date].reset_index().sort_values("date").tail(1)
    )
    .mark_point(filled=True, size=70)
    .encode(
        x=alt.X(
            "rolling_km_per_week_daily_distance:Q",
            title="Daily Distance (km)",
            scale=alt.Scale(zero=False),
        ),
        y=alt.Y(
            "rolling_pace_sec_per_km:Q",
            title="Pace (mm:ss)",
            scale=alt.Scale(
                reverse=True,
                zero=False,
                domain=(min(pace_ticks_values), max(pace_ticks_values)),
            ),
            axis=alt.Axis(
                values=pace_ticks_values,
                labelExpr="datum.value > 0 ? timeFormat(datum.value * 1000, '%M:%S') : ''",
            ),
        ),
        color=alt.value("#142ef5"),
        tooltip=tooltip,
    )
    .properties(width=800, height=500, title="Pace and daily distance")
    .interactive()
)

(tanda_progression + current_form).show()

In [50]:
(marathon_times + daily_line + tanda_progression + current_form).properties(
    title="Tanda and marathon pace",
).interactive().show()

In [51]:
df

Unnamed: 0_level_0,distance_meters,time_seconds
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-01 13:29:04+00:00,5000.0,1802
2024-01-03 07:52:38+00:00,4943.9,1805
2024-01-04 07:51:24+00:00,6031.7,2037
2024-01-05 16:45:20+00:00,6051.1,2303
2024-01-07 11:47:59+00:00,7013.8,2629
...,...,...
2024-06-24 05:17:39+00:00,5056.8,1683
2024-06-25 05:31:22+00:00,8535.4,2862
2024-06-26 06:01:43+00:00,5041.1,1682
2024-06-27 08:12:31+00:00,5073.2,1676


In [53]:
pandas.DataFrame(running_activities)

Unnamed: 0,resource_state,athlete,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,...,display_hide_heartrate_option,elev_high,elev_low,upload_id,upload_id_str,external_id,from_accepted_tag,pr_count,total_photo_count,has_kudoed
0,2,"{'id': 44717295, 'resource_state': 1}",Afternoon Run,5000.0,1802,1809,0.0,Run,Run,,...,True,0.0,0.0,11208156238,11208156238,garmin_ping_312805347463,False,0,0,False
1,2,"{'id': 44717295, 'resource_state': 1}",Morning Run,4943.9,1805,1813,0.0,Run,Run,,...,True,0.0,0.0,11219025750,11219025750,garmin_ping_313169555917,False,0,0,False
2,2,"{'id': 44717295, 'resource_state': 1}",Morning Run,6031.7,2037,2043,55.0,Run,Run,,...,True,56.2,27.2,11225853869,11225853869,garmin_ping_313366553724,False,0,0,False
3,2,"{'id': 44717295, 'resource_state': 1}",Afternoon Run,6051.1,2303,2387,63.0,Run,Run,,...,True,57.8,29.6,11235506622,11235506622,garmin_ping_313631426522,False,0,0,False
4,2,"{'id': 44717295, 'resource_state': 1}",Lunch Run,7013.8,2629,2636,65.0,Run,Run,,...,True,59.8,30.4,11247788872,11247788872,garmin_ping_313955679705,False,0,0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119,2,"{'id': 44717295, 'resource_state': 1}",Morning Run,5056.8,1683,1687,7.0,Run,Run,,...,True,3.6,-1.8,12508608186,12508608186,garmin_ping_350025546569,False,0,0,False
120,2,"{'id': 44717295, 'resource_state': 1}",Morning Run,8535.4,2862,2910,14.0,Run,Run,,...,True,4.0,-8.6,12517016955,12517016955,garmin_ping_350277996182,False,0,0,False
121,2,"{'id': 44717295, 'resource_state': 1}",Morning Run,5041.1,1682,1682,2.0,Run,Run,,...,True,3.4,-1.8,12525664717,12525664717,garmin_ping_350518902319,False,0,0,False
122,2,"{'id': 44717295, 'resource_state': 1}",Morning Run,5073.2,1676,1684,5.0,Run,Run,,...,True,4.0,-2.0,12534498280,12534498280,garmin_ping_350773260386,False,0,0,False


In [56]:
running_activities[0].keys()

dict_keys(['resource_state', 'athlete', 'name', 'distance', 'moving_time', 'elapsed_time', 'total_elevation_gain', 'type', 'sport_type', 'workout_type', 'id', 'start_date', 'start_date_local', 'timezone', 'utc_offset', 'location_city', 'location_state', 'location_country', 'achievement_count', 'kudos_count', 'comment_count', 'athlete_count', 'photo_count', 'map', 'trainer', 'commute', 'manual', 'private', 'visibility', 'flagged', 'gear_id', 'start_latlng', 'end_latlng', 'average_speed', 'max_speed', 'average_cadence', 'average_watts', 'max_watts', 'weighted_average_watts', 'kilojoules', 'device_watts', 'has_heartrate', 'average_heartrate', 'max_heartrate', 'heartrate_opt_out', 'display_hide_heartrate_option', 'elev_high', 'elev_low', 'upload_id', 'upload_id_str', 'external_id', 'from_accepted_tag', 'pr_count', 'total_photo_count', 'has_kudoed'])

In [68]:
cols = [
    # "resource_state",
    # "athlete",
    "name",
    "distance",
    "moving_time",
    # "elapsed_time",
    # "total_elevation_gain",
    # "type",
    "sport_type",
    # "workout_type",
    # "id",
    # "start_date",
    "start_date_local",
    # "timezone",
    # "utc_offset",
    # "location_city",
    # "location_state",
    # "location_country",
    # "achievement_count",
    # "kudos_count",
    # "comment_count",
    # "athlete_count",
    # "photo_count",
    # "map",
    # "trainer",
    # "commute",
    # "manual",
    # "private",
    # "visibility",
    # "flagged",
    # "gear_id",
    # "start_latlng",
    # "end_latlng",
    "average_speed",
    # "max_speed",
    # "average_cadence",
    # "average_watts",
    # "max_watts",
    # "weighted_average_watts",
    # "kilojoules",
    # "device_watts",
    # "has_heartrate",
    "average_heartrate",
    "max_heartrate",
    # "heartrate_opt_out",
    # "display_hide_heartrate_option",
    # "elev_high",
    # "elev_low",
    # "upload_id",
    # "upload_id_str",
    # "external_id",
    # "from_accepted_tag",
    # "pr_count",
    # "total_photo_count",
    # "has_kudoed",
]
pandas.DataFrame(running_activities)[cols]

Unnamed: 0,name,distance,moving_time,sport_type,start_date_local,average_speed,average_heartrate,max_heartrate
0,Afternoon Run,5000.0,1802,Run,2024-01-01T13:29:04Z,2.775,141.9,151.0
1,Morning Run,4943.9,1805,Run,2024-01-03T07:52:38Z,2.739,127.4,139.0
2,Morning Run,6031.7,2037,Run,2024-01-04T07:51:24Z,2.961,161.1,182.0
3,Afternoon Run,6051.1,2303,Run,2024-01-05T16:45:20Z,2.627,148.5,168.0
4,Lunch Run,7013.8,2629,Run,2024-01-07T11:47:59Z,2.668,151.3,166.0
...,...,...,...,...,...,...,...,...
119,Morning Run,5056.8,1683,Run,2024-06-24T07:17:39Z,3.005,140.0,149.0
120,Morning Run,8535.4,2862,Run,2024-06-25T07:31:22Z,2.982,144.7,171.0
121,Morning Run,5041.1,1682,Run,2024-06-26T08:01:43Z,2.997,145.1,156.0
122,Morning Run,5073.2,1676,Run,2024-06-27T10:12:31Z,3.027,148.9,162.0


In [69]:
pandas.DataFrame(running_activities)[cols].to_csv(
    "../static/dummy/running_activities.csv", index=False
)