In [1]:
import jupyter_black
from datetime import datetime, timedelta
import pandas
import altair as alt
import numpy
import requests

jupyter_black.load()

MY_STRAVA_CLIENT_ID = 125463
MY_STRAVA_CLIENT_SECRET = "017fafbf5c5067490f1382fd3454c30cdb61f4b0"
DAYS_BACK = 180

In [2]:
def get_token():

    url = f"https://www.strava.com/oauth/authorize?client_id={MY_STRAVA_CLIENT_ID}&redirect_uri=http://127.0.0.1:5000/authorization&approval_prompt=auto&scope=read,activity:read&response_type=code"
    print(f"Go to {url}")
    response = input(f"Enter the full URL:")
    code = response.split("code=")[1].split("&")[0]

    params = {
        "client_id": MY_STRAVA_CLIENT_ID,
        "client_secret": MY_STRAVA_CLIENT_SECRET,
        "code": code,
        "grant_type": "authorization_code",
    }
    print(params)

    response = requests.post("https://www.strava.com/oauth/token", params=params)
    print(response.json())
    token = response.json()["access_token"]

    return token


token = get_token()
print(f"{token=}")

Go to https://www.strava.com/oauth/authorize?client_id=125463&redirect_uri=http://127.0.0.1:5000/authorization&approval_prompt=auto&scope=read,activity:read&response_type=code
{'client_id': 125463, 'client_secret': '017fafbf5c5067490f1382fd3454c30cdb61f4b0', 'code': '06c24dcda080dc4b24bd5521b09f359a320bea1f', 'grant_type': 'authorization_code'}
{'token_type': 'Bearer', 'expires_at': 1720994488, 'expires_in': 20670, 'refresh_token': '4f6bcdaaa996c6d058e6750cd3cc846f8fb8fe19', 'access_token': '513b9f942a9280389c96d79c98523f22233329fa', 'athlete': {'id': 44717295, 'username': None, 'resource_state': 2, 'firstname': 'Duarte', 'lastname': 'Carmo', 'bio': 'duarteocarmo.com', 'city': 'Copenhagen ', 'state': 'DK', 'country': None, 'sex': 'M', 'premium': False, 'summit': False, 'created_at': '2019-07-27T19:20:59Z', 'updated_at': '2024-07-14T16:01:27Z', 'badge_type_id': 0, 'weight': 72.0, 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/44717295/13646951/6/medium.jpg', '

In [32]:
def fetch_activities(access_token: str) -> list:
    url = f"https://www.strava.com/api/v3/athlete/activities"
    headers = {"Authorization": f"Bearer {access_token}"}
    after_timestamp = int((datetime.now() - timedelta(days=DAYS_BACK)).timestamp())

    per_page = 200
    page = 1
    all_activities = []

    while True:
        params = {"after": after_timestamp, "page": page, "per_page": per_page}
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()  # This will raise an HTTPError if the HTTP request returned an unsuccessful status code
        activities = response.json()

        if not activities:
            break

        all_activities.extend(activities)
        if len(activities) < per_page:
            break

        page += 1

    return all_activities


activities = fetch_activities(token)

In [33]:
running_activities = [act for act in activities if act["type"] == "Run"]

In [35]:
pandas.DataFrame(running_activities)

Unnamed: 0,resource_state,athlete,name,distance,moving_time,elapsed_time,total_elevation_gain,type,sport_type,workout_type,...,display_hide_heartrate_option,elev_high,elev_low,upload_id,upload_id_str,external_id,from_accepted_tag,pr_count,total_photo_count,has_kudoed
0,2,"{'id': 44717295, 'resource_state': 1}",Come back to CPH they said 🧊,8505.7,2850,2868,42.0,Run,Run,0.0,...,True,6.0,-2.2,11313425789,11313425789,garmin_ping_315775982513,False,0,1,False
1,2,"{'id': 44717295, 'resource_state': 1}",Morning Run,5079.3,1640,1651,1.0,Run,Run,,...,True,3.8,-0.8,11319844319,11319844319,garmin_ping_315956956970,False,0,0,False
2,2,"{'id': 44717295, 'resource_state': 1}",Afternoon Run,5620.0,1773,1773,0.0,Run,Run,,...,True,0.0,0.0,11327903967,11327903967,garmin_ping_316185594144,False,0,0,False
3,2,"{'id': 44717295, 'resource_state': 1}",With Vitto,5092.3,1849,1852,1.0,Run,Run,0.0,...,True,3.8,-1.0,11335511656,11335511656,garmin_ping_316389359056,False,0,0,False
4,2,"{'id': 44717295, 'resource_state': 1}",Morning Run,16926.3,5103,5391,72.0,Run,Run,,...,True,10.6,-2.4,11340168790,11340168790,garmin_ping_316529408008,False,2,0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122,2,"{'id': 44717295, 'resource_state': 1}",Evening Run,10179.4,3182,3244,17.0,Run,Run,,...,True,3.8,-2.6,12644020110,12644020110,garmin_ping_353957443886,False,0,0,False
123,2,"{'id': 44717295, 'resource_state': 1}",Evening Run,6722.1,2358,2370,11.0,Run,Run,,...,True,4.6,-0.2,12651795370,12651795370,garmin_ping_354184399141,False,0,0,False
124,2,"{'id': 44717295, 'resource_state': 1}",Evening Run,6581.4,2082,2136,13.0,Run,Run,,...,True,2.8,-3.0,12660179923,12660179923,garmin_ping_354432054602,False,0,0,False
125,2,"{'id': 44717295, 'resource_state': 1}",Afternoon Run,8241.3,2710,2710,0.0,Run,Run,,...,True,0.0,0.0,12666230100,12666230100,garmin_ping_354615604806,False,0,0,False


In [18]:
data = {
    "start_date": [act["start_date"] for act in running_activities],
    "distance_meters": [float(act["distance"]) for act in running_activities],
    "time_seconds": [act["moving_time"] for act in running_activities],
}


df = pandas.DataFrame(data)
df["start_date"] = pandas.to_datetime(df["start_date"])
df.set_index("start_date", inplace=True)
weekly_data = df.resample("W").sum()
weekly_data["distance_km"] = round(weekly_data["distance_meters"] / 1000, 1)


def get_tanda_value(km_per_week: int, pace_sec_per_km: int) -> float:
    marathon_distance = 42.195
    marathon_pace_sec_per_km = (
        17.1 + 140.0 * numpy.exp(-0.0053 * km_per_week) + 0.55 * pace_sec_per_km
    )
    total_marathon_time_secs = marathon_distance * marathon_pace_sec_per_km
    total_marathon_time_hours = total_marathon_time_secs / 3600
    return total_marathon_time_hours


def get_pace_for_distance(km_per_week: int, total_marathon_time_hours: float) -> float:
    marathon_distance = 42.195
    marathon_pace_sec_per_km = total_marathon_time_hours * 3600 / marathon_distance
    pace_sec_per_km = (
        marathon_pace_sec_per_km - 17.1 - 140.0 * numpy.exp(-0.0053 * km_per_week)
    ) / 0.55
    return pace_sec_per_km


def pretty_marathon_time(total_marathon_time_hours: float) -> str:
    hours = int(total_marathon_time_hours)
    minutes = int((total_marathon_time_hours - hours) * 60)
    seconds = int(((total_marathon_time_hours - hours) * 60 - minutes) * 60)
    if seconds >= 30:
        minutes += 1

    return f"{hours} hours {minutes} minutes"

In [31]:
df

Unnamed: 0_level_0,distance_meters,time_seconds
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-17 06:17:52+00:00,8505.7,2850
2024-01-18 06:35:21+00:00,5079.3,1640
2024-01-19 12:49:06+00:00,5620.0,1773
2024-01-20 15:35:05+00:00,5092.3,1849
2024-01-21 09:40:50+00:00,16926.3,5103
...,...,...
2024-07-10 17:09:24+00:00,10179.4,3182
2024-07-11 16:12:56+00:00,6722.1,2358
2024-07-12 17:43:15+00:00,6581.4,2082
2024-07-13 12:35:09+00:00,8241.3,2710


In [19]:
daily_df = df.groupby(df.index.date).sum()
daily_df.index = pandas.to_datetime(daily_df.index)
daily_df.index.name = "date"


daily_df["tanda_day"] = get_tanda_value(
    daily_df["distance_meters"] / 1000 * 7,
    daily_df["time_seconds"] / (daily_df["distance_meters"] / 1000),
)
daily_df["tanda_day_pretty"] = pandas.to_datetime(daily_df["tanda_day"], unit="h")


daily_df = daily_df.reset_index()
daily_df["date"] = pandas.to_datetime(daily_df["date"])
daily_df.set_index("date", inplace=True)

num_weeks = 8
num_days = num_weeks * 7
rolling = f"{num_days}d"


daily_df["rolling_distance_meters"] = (
    daily_df["distance_meters"].rolling(window=rolling).sum()
)
daily_df["rolling_time_seconds"] = (
    daily_df["time_seconds"].rolling(window=rolling).sum()
)

daily_df["rolling_km_per_week"] = daily_df["rolling_distance_meters"] / 1000 / num_weeks
daily_df["rolling_pace_sec_per_km"] = (
    daily_df["rolling_time_seconds"] / daily_df["rolling_distance_meters"] * 1000
)

daily_df["rolling_tanda_day"] = get_tanda_value(
    daily_df["rolling_km_per_week"], daily_df["rolling_pace_sec_per_km"]
)

daily_df["rolling_tanda_day_pretty"] = pandas.to_datetime(
    daily_df["rolling_tanda_day"], unit="h"
)

daily_df["type_rolling"] = "Tanda (8 weeks)"
daily_df["type_daily"] = "Tanda (daily)"

daily_df["pace_sec_per_km"] = daily_df["time_seconds"] / (
    daily_df["distance_meters"] / 1000
)
daily_df["distance_km"] = daily_df["distance_meters"] / 1000

daily_df = daily_df.sort_values(by="date", ascending=True)
daily_df["date_factor"] = numpy.exp(numpy.linspace(0, 15, len(daily_df)))


daily_df["daily_pace_pretty"] = daily_df["pace_sec_per_km"].apply(
    lambda x: f"{int(x//60)}:{int(x%60):02d}"
)
daily_df["rolling_pace_pretty"] = daily_df["rolling_pace_sec_per_km"].apply(
    lambda x: f"{int(x//60)}:{int(x%60):02d}"
)

daily_df["rolling_km_per_week_daily_distance"] = daily_df["rolling_km_per_week"] / 7
daily_df["Latest run"] = "Latest run"

daily_df["pretty_rolling_tanda_day"] = daily_df["rolling_tanda_day"].apply(
    pretty_marathon_time
)

# current_form_marathon_time = pretty_marathon_time(
#     daily_df.loc[start_date:last_date]
#     .reset_index()
#     .sort_values("date")
#     .tail(1)["rolling_tanda_day"]
#     .item()
# )


def pace_tick_formatter(value):
    minutes = int(value // 60)
    seconds = int(value % 60)
    return f"{minutes}:{seconds:02d}"

In [20]:
x_scale = alt.Scale(padding=20)
upper_limit = weekly_data["distance_km"].max()
lower_limit = 0

x = alt.X("start_date:T", scale=alt.Scale(padding=20), title="Week")
y = alt.Y(
    "distance_km:Q",
    axis=alt.Axis(title="Kilometers"),
    scale=alt.Scale(domain=[lower_limit, upper_limit]),
)

line_chart = (
    alt.Chart(weekly_data.reset_index())
    .mark_area(
        line={"color": "#ff561b"},
        color=alt.Gradient(
            gradient="linear",
            stops=[
                alt.GradientStop(color="white", offset=0),
                alt.GradientStop(color="#ff561b", offset=1),
            ],
            x1=1,
            x2=1,
            y1=1,
            y2=0,
        ),
    )
    .encode(
        x=x,
        y=y,
        tooltip=["start_date:T", "distance_km:Q"],
    )
    .properties(width=800, height=500, title="Running distance per week (km)")
)

points = (
    alt.Chart(weekly_data.reset_index())
    .mark_point(
        filled=True,
        fill="white",
        stroke="#ff561b",
        strokeWidth=2,
        size=50,
        shape="circle",
    )
    .encode(
        x=x,
        y=y,
        tooltip=["start_date:T", "distance_km:Q"],
    )
)


chart = line_chart + points

chart.show()

In [21]:
x = alt.X("date:T", title="Date", scale=alt.Scale(padding=20))

daily_line = (
    alt.Chart(daily_df.reset_index())
    .mark_point(shape="square", filled=True, opacity=0.5)
    .encode(
        x=x,
        y=alt.Y("hoursminutes(tanda_day_pretty):O", title="Tanda day"),
        color=alt.value("#d65de0"),
        tooltip=[
            alt.Tooltip("tanda_day_pretty", timeUnit="hoursminutes"),
            alt.Tooltip("date", timeUnit="yearmonthdate"),
        ],
    )
)
rolling_line = (
    alt.Chart(daily_df.reset_index())
    .mark_line(interpolate="basis")
    .encode(
        x=x,
        y=alt.Y(
            "hoursminutes(rolling_tanda_day_pretty):O", title="Tanda trend (8 weeks)"
        ),
        color=alt.value("#d65de0"),
        tooltip=[
            alt.Tooltip("rolling_tanda_day_pretty", timeUnit="hoursminutes"),
            alt.Tooltip("date", timeUnit="yearmonthdate"),
        ],
    )
)


(daily_line + rolling_line).properties(width=800, height=500, title="Tanda").show()

In [22]:
pace_ticks_values = list(range(240, 60 * 8, 15))
last_date = max(daily_df.index)
start_date = last_date - timedelta(days=56)


daily_df["shape"] = daily_df.index.to_series().apply(
    lambda x: "square" if x == last_date else "circle"
)
min_pace, max_pace = (
    daily_df["pace_sec_per_km"].min(),
    daily_df["pace_sec_per_km"].max(),
)


daily_line = (
    alt.Chart(daily_df.loc[start_date:last_date].reset_index().sort_values("date"))
    .mark_point(
        filled=True,
        size=90,
    )
    .encode(
        x=alt.X(
            "distance_km:Q",
            title="Daily Distance (km)",
            axis=alt.Axis(tickCount=int(25 // 5)),
        ),
        y=alt.Y(
            "pace_sec_per_km:Q",
            scale=alt.Scale(
                reverse=True,
                zero=False,
                domain=(min_pace, max_pace),
            ),
            title="Pace (mm:ss)",
            axis=alt.Axis(
                values=pace_ticks_values,
                labelExpr="datum.value > 0 ? timeFormat(datum.value * 1000, '%M:%S') : ''",
            ),
        ),
        tooltip=[
            alt.Tooltip("distance_km:Q", title="Distance (km)", format=".1f"),
            alt.Tooltip("date:T", title="Date"),
            alt.Tooltip("daily_pace_pretty:N", title="Pace (mm:ss/km)"),
        ],
        color=alt.Color(
            "date_factor:Q", scale=alt.Scale(scheme="lightgreyred"), legend=None
        ),
        shape=alt.Shape(
            "shape:N", scale=alt.Scale(range=["square", "circle"]), legend=None
        ),
    )
)


(daily_line).properties(
    width=800, height=500, title="Pace and daily distance"
).interactive().show()

In [23]:
marathon_times = []


for marathon_time in numpy.arange(2.5, 4.5, 0.25):
    for km_day in range(0, 50, 1):
        km_week = km_day * 7
        pace = get_pace_for_distance(km_week, marathon_time)
        formatted_pace = pace_tick_formatter(pace)
        marathon_times.append(
            {
                "marathon_time": marathon_time,
                "km_day": km_day,
                "km_week": km_week,
                "pace": pace,
                "formatted_pace": formatted_pace,
            }
        )

times_df = pandas.DataFrame(marathon_times)

In [24]:
daily_df.loc[start_date:last_date].distance_km.max()

16.6094

In [25]:
marathon_times = (
    alt.Chart(times_df)
    .mark_line(interpolate="basis")
    .encode(
        x=alt.X(
            "km_day:Q",
            title="Daily Distance (km)",
            scale=alt.Scale(
                domain=[0, daily_df.loc[start_date:last_date].distance_km.max()]
            ),
        ),
        y=alt.Y(
            "pace:Q",
            title="Pace (mm:ss)",
            scale=alt.Scale(
                reverse=True,
                zero=False,
                domain=(300, 405),
            ),
            axis=alt.Axis(
                values=pace_ticks_values,
                labelExpr="datum.value > 0 ? timeFormat(datum.value * 1000, '%M:%S') : ''",
            ),
        ),
        color=alt.Color(
            "marathon_time:N",
            title="Marathon Time",
            scale=alt.Scale(scheme="turbo"),
            legend=alt.Legend(
                labelExpr="floor(datum.value) + ':' + (floor((datum.value % 1) * 60) < 10 ? '0' : '') + floor((datum.value % 1) * 60)"
            ),
        ),
        tooltip=[
            alt.Tooltip("km_day:Q", title="Distance (km)"),
            alt.Tooltip("formatted_pace:N", title="Pace (mm:ss)"),
            alt.Tooltip("marathon_time:Q", title="Marathon time (hours)"),
        ],
    )
    .properties(width=800, height=500, title="Pace and daily distance")
    .interactive()
)

marathon_times.show()

In [26]:
daily_df["Legend"] = "Tanda Progression line"


tooltip = [
    alt.Tooltip(
        "rolling_km_per_week_daily_distance:Q",
        title="Distance (km)",
        format=".1f",
    ),
    alt.Tooltip(
        "rolling_pace_pretty:N",
        title="Pace (s/km)",
    ),
    alt.Tooltip("date:T", title="Date"),
    alt.Tooltip("pretty_rolling_tanda_day:N", title="Marathon Form"),
]


tanda_progression = (
    alt.Chart(daily_df.loc[start_date:last_date].reset_index().sort_values("date"))
    .mark_line(point=True, strokeWidth=2)
    .encode(
        x=alt.X(
            "rolling_km_per_week_daily_distance:Q",
            title="Daily Distance (km)",
            scale=alt.Scale(zero=False),
        ),
        y=alt.Y(
            "rolling_pace_sec_per_km:Q",
            title="Pace (mm:ss)",
            scale=alt.Scale(
                reverse=True,
                zero=False,
                domain=(min(pace_ticks_values), max(pace_ticks_values)),
            ),
            axis=alt.Axis(
                values=pace_ticks_values,
                labelExpr="datum.value > 0 ? timeFormat(datum.value * 1000, '%M:%S') : ''",
            ),
        ),
        tooltip=tooltip,
        order="date",
        color=alt.Color(
            "Legend:N",
            legend=alt.Legend(title=None),
            scale=alt.Scale(domain=["Tanda Progression line"], range=["#87f94c"]),
        ),
    )
    .properties(width=800, height=500, title="Pace and daily distance")
    .interactive()
)


current_form = (
    alt.Chart(
        daily_df.loc[start_date:last_date].reset_index().sort_values("date").tail(1)
    )
    .mark_point(filled=True, size=70)
    .encode(
        x=alt.X(
            "rolling_km_per_week_daily_distance:Q",
            title="Daily Distance (km)",
            scale=alt.Scale(zero=False),
        ),
        y=alt.Y(
            "rolling_pace_sec_per_km:Q",
            title="Pace (mm:ss)",
            scale=alt.Scale(
                reverse=True,
                zero=False,
                domain=(min(pace_ticks_values), max(pace_ticks_values)),
            ),
            axis=alt.Axis(
                values=pace_ticks_values,
                labelExpr="datum.value > 0 ? timeFormat(datum.value * 1000, '%M:%S') : ''",
            ),
        ),
        color=alt.value("#142ef5"),
        tooltip=tooltip,
    )
    .properties(width=800, height=500, title="Pace and daily distance")
    .interactive()
)

(tanda_progression + current_form).show()

In [27]:
(marathon_times + daily_line + tanda_progression + current_form).properties(
    title="Tanda and marathon pace",
).interactive().show()

In [29]:
daily_df

Unnamed: 0_level_0,distance_meters,time_seconds,tanda_day,tanda_day_pretty,rolling_distance_meters,rolling_time_seconds,rolling_km_per_week,rolling_pace_sec_per_km,rolling_tanda_day,rolling_tanda_day_pretty,...,date_factor,daily_pace_pretty,rolling_pace_pretty,rolling_km_per_week_daily_distance,Latest run,pretty_rolling_tanda_day,shape,Legend,week_number,day_of_the_week_name
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-17,8505.7,2850,3.557288,1970-01-01 03:33:26.236344160,8505.7,2850.0,1.063213,335.069424,3.992133,1970-01-01 03:59:31.680213198,...,1.000000e+00,5:35,5:35,0.151888,Latest run,3 hours 60 minutes,circle,Tanda Progression line,3,Wed
2024-01-18,5079.3,1640,3.640940,1970-01-01 03:38:27.383805848,13585.0,4490.0,1.698125,330.511594,3.957270,1970-01-01 03:57:26.172145814,...,1.136786e+00,5:22,5:30,0.242589,Latest run,3 hours 57 minutes,circle,Tanda Progression line,3,Thu
2024-01-19,5620.0,1773,3.566253,1970-01-01 03:33:58.510301774,19205.0,6263.0,2.400625,326.112991,3.922871,1970-01-01 03:55:22.335919287,...,1.292283e+00,5:15,5:26,0.342946,Latest run,3 hours 55 minutes,circle,Tanda Progression line,3,Fri
2024-01-20,5092.3,1849,3.899549,1970-01-01 03:53:58.375850071,24297.3,8112.0,3.037163,333.864257,3.967383,1970-01-01 03:58:02.577424616,...,1.469049e+00,6:03,5:33,0.433880,Latest run,3 hours 58 minutes,circle,Tanda Progression line,3,Sat
2024-01-21,16926.3,5103,3.019645,1970-01-01 03:01:10.723466722,41223.6,13215.0,5.152950,320.568800,3.863668,1970-01-01 03:51:49.205800796,...,1.669995e+00,5:01,5:20,0.736136,Latest run,3 hours 52 minutes,circle,Tanda Progression line,3,Sun
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-07-10,10179.4,3182,3.340331,1970-01-01 03:20:25.193354377,302848.5,98016.0,37.856063,323.646972,3.629416,1970-01-01 03:37:45.898372311,...,1.957502e+06,5:12,5:23,5.408009,Latest run,3 hours 38 minutes,circle,Tanda Progression line,28,Wed
2024-07-11,6722.1,2358,3.740463,1970-01-01 03:44:25.666293994,309570.6,100374.0,38.696325,324.236216,3.627249,1970-01-01 03:37:38.095913643,...,2.225261e+06,5:50,5:24,5.528046,Latest run,3 hours 38 minutes,circle,Tanda Progression line,28,Thu
2024-07-12,6581.4,2082,3.525157,1970-01-01 03:31:30.566780508,308079.8,99987.0,38.509975,324.549029,3.630586,1970-01-01 03:37:50.110316889,...,2.529646e+06,5:16,5:24,5.501425,Latest run,3 hours 38 minutes,circle,Tanda Progression line,28,Fri
2024-07-13,8241.3,2710,3.528874,1970-01-01 03:31:43.946653224,306700.4,99535.0,38.337550,324.534953,3.631719,1970-01-01 03:37:54.187406932,...,2.875666e+06,5:28,5:24,5.476793,Latest run,3 hours 38 minutes,circle,Tanda Progression line,28,Sat


In [28]:
daily_df["week_number"] = daily_df.index.isocalendar().week
daily_df["day_of_the_week_name"] = daily_df.index.strftime("%a")
heatmap_data = daily_df[
    [
        "week_number",
        "day_of_the_week_num",
        "distance_km",
    ]
].sort_values(["week_number", "day_of_the_week_num"])

all_days = pandas.date_range(
    heatmap_data.index.min(), heatmap_data.index.max(), freq="D"
)
heatmap_data = heatmap_data.reindex(all_days).fillna(0.0)
heatmap_data["week_number"] = heatmap_data.index.isocalendar().week
heatmap_data["day_of_the_week_name"] = heatmap_data.index.strftime("%a")
heatmap_data["month"] = heatmap_data.index.strftime("%b")
heatmap_data["day_of_the_month"] = heatmap_data.index.day

upper_limit = (
    heatmap_data["distance_km"].mean() + heatmap_data["distance_km"].std() * 1.5
)

day_order = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]


heatmap = (
    alt.Chart(heatmap_data)
    .mark_rect(cornerRadius=2)
    .encode(
        x=alt.X(
            "week_number:O",
            axis=alt.Axis(title=None, domain=False, ticks=False, labels=False),
        ),
        y=alt.Y(
            "day_of_the_week_name:O",
            sort=day_order,
            axis=alt.Axis(
                title=None,
                domain=False,
                ticks=False,
            ),
        ),
        color=alt.Color(
            "distance_km:Q",
            scale=alt.Scale(
                domain=[0, upper_limit],
                scheme="lightorange",
            ),
            legend=None,
        ),
        tooltip=[
            alt.Tooltip("distance_km:Q", title="Distance (km)", format=".1f"),
            alt.Tooltip("month:O", title="Month"),
            alt.Tooltip("day_of_the_month:O", title="Day of the month"),
        ],
    )
    .configure_scale(bandPaddingInner=0.20)
    .properties(title="Running heatmap", width=600, height=150)
    .interactive()
    .configure_view(stroke=None)
)

heatmap.show()

KeyError: "['day_of_the_week_num'] not in index"