In [None]:
import os
import pandas as pd
import numpy as np
import seaborn as sns

from dotenv import load_dotenv
from google.cloud import bigquery
from google.oauth2 import service_account

import matplotlib.pyplot as plt

load_dotenv()

_GCP_PROJECT_ID = os.getenv('GCP_PROJECT_ID')
_BQ_DATASET_MARTS = os.getenv('BIGQUERY_DATASET_MARTS')

In [None]:
try:
    creds = service_account.Credentials.from_service_account_info(
        st.secrets['gcp_service_account']
    )
    client = bigquery.Client(credentials=creds, project=creds.project_id)
except Exception:
    # Local dev fallback (requires GOOGLE_APPLICATION_CREDENTIALS)
    client = bigquery.Client()

In [None]:
# Get activities
query_activities = f"SELECT * FROM {_GCP_PROJECT_ID}.{_BQ_DATASET_MARTS}.fct_activities"
df_activities = client.query(query_activities).to_dataframe()

print(f'Count activities: {len(df_activities)}')

In [None]:
df_runs = df_activities[df_activities['discipline'] == 'Run'].copy()
print(f'Len relevant activities: {len(df_runs)}')

In [None]:
df = df_runs.copy()

# Required columns check
# TODO: Add more cols
req = ["start_date_local", "distance_km", "moving_time_s"]
missing = [c for c in req if c not in df.columns]
if missing:
    raise ValueError(f"Missing required columns: {missing}")

df["start_date_local"] = pd.to_datetime(df["start_date_local"])
df = df.sort_values("start_date_local").reset_index(drop=True)

# Compute pace_sec_per_km if missing
if "pace_sec_per_km" not in df.columns:
    df["pace_sec_per_km"] = df["moving_time_s"] / df["distance_km"]

# Basic sanity filtering (tune)
df = df[(df["distance_km"] > 0) & (df["moving_time_s"] > 0)].copy()
df = df[(df["pace_sec_per_km"] >= 120) & (df["pace_sec_per_km"] <= 900)].copy()

df.head()

In [None]:
df["date"] = df["start_date_local"].dt.date

# Time-weighted pace: total_time / total_distance
daily = (
    df.groupby("date", as_index=False)
      .agg(
          runs=("distance_km", "size"),
          distance_km=("distance_km", "sum"),
          time_s=("moving_time_s", "sum"),
          elev_gain_m=("elevation_gain_m", "sum") if "elevation_gain_m" in df.columns else ("moving_time_s", "sum")
          )
)

# Get weighted average pace
daily["pace_sec_per_km"] = daily["time_s"] / daily["distance_km"]

# HR time-weighted mean (only if available)
if "avg_heartrate" in df.columns:
    if "has_heartrate" in df.columns:
        dhr = df[df["has_heartrate"].fillna(False)].copy()
    else:
        dhr = df[df["avg_heartrate"].notna()].copy()

    if len(dhr) > 0:
        dhr["date"] = dhr["start_date_local"].dt.date
        hr_daily = (
            dhr.groupby("date", as_index=False)
               .apply(lambda g: pd.Series({
                   "hr_mean": np.sum(g["avg_heartrate"] * g["moving_time_s"]) / np.sum(g["moving_time_s"])
               }))
               .reset_index(drop=True)
        )
        daily = daily.merge(hr_daily, on="date", how="left")
    else:
        daily["hr_mean"] = np.nan
else:
    daily["hr_mean"] = np.nan

daily.head()

In [None]:
# Fill missing dates
daily["date"] = pd.to_datetime(daily["date"])
daily = daily.sort_values("date").reset_index(drop=True)

full_dates = pd.date_range(daily["date"].min(), daily["date"].max(), freq="D")
daily = daily.set_index("date").reindex(full_dates).rename_axis("date").reset_index()

# Fill "no training day" with zeros where appropriate
daily["runs"] = daily["runs"].fillna(0).astype(int)
daily["distance_km"] = daily["distance_km"].fillna(0.0)
daily["time_s"] = daily["time_s"].fillna(0.0)
daily["elev_gain_m"] = daily["elev_gain_m"].fillna(0.0)

# pace & hr are undefined on rest days
daily.loc[daily["distance_km"] == 0, "pace_sec_per_km"] = np.nan
daily.loc[daily["distance_km"] == 0, "hr_mean"] = np.nan

daily.tail()

In [None]:
# Define a personal reference pace (baseline intensity)

# Why median?
# - Robust to outliers (races, very easy days)
# - Represents a "typical aerobic running pace"
# - Stable over long periods
base_pace = daily["pace_sec_per_km"].median(skipna=True)  # sec/km


# Compute relative intensity for each day

# intensity = base_pace / actual_pace
# Interpretation:
# - intensity = 1.0 - running at baseline pace
# - intensity > 1.0 - faster than baseline (harder day)
# - intensity < 1.0 - slower than baseline (easier day)
daily["intensity"] = np.where(
    daily["pace_sec_per_km"].notna(),
    base_pace / daily["pace_sec_per_km"],
    0.0
)


# Compute daily training load ("effective hours")

# Training load combines:
# - DURATION (how long you trained)
# - INTENSITY (how hard you trained relative to baseline) squared because of hard impact
# load = (time in hours) × (relative intensity**2)
P_INTENSITY = 2

daily["load"] = (daily["time_s"] / 3600.0) * (daily["intensity"] **P_INTENSITY)  # "effective hours"
daily.sort_values(by='load').tail(10)

In [None]:
def ewma_load(load: pd.Series, tau_days: float) -> pd.Series:
    """
    Compute an Exponentially Weighted Moving Average (EWMA)
    with a specified decay time constant (tau_days).

    alpha determines how much weight is given to "today"
    versus the accumulated past.

    This formulation makes tau directly interpretable:
    - After tau days, the influence of a single training impulse
      has decayed to ~37% of its original effect.

    EWMA turns training history into a living memory of your body.
    It remembers:
      how hard you trained
      how recently
      and forgets at the right speed
    """
    # EWMA with decay constant tau
    alpha = 1 - np.exp(-1 / tau_days)
    return load.ewm(alpha=alpha, adjust=False).mean()

# Define time constants (sport science defaults)
# Fitness builds and decays slowly → long time constant
# Fatigue builds and decays quickly → short time constant
TAU_FITNESS = 42
TAU_FATIGUE = 7

# Compute latent training states
daily["fitness"] = ewma_load(daily["load"], TAU_FITNESS)
daily["fatigue"] = ewma_load(daily["load"], TAU_FATIGUE)

# Readiness = how much fitness is "available" after fatigue
daily["readiness"] = daily["fitness"] - daily["fatigue"]

daily.tail(10)

In [None]:
# Plot daily load
plt.figure(figsize=(12, 6))
plt.plot(daily["date"], daily["load"])
plt.title("Daily Training Load")
plt.xlabel("Date")
plt.ylabel("Load")
plt.show()

# Plot fitness, fatigue, readiness
plt.figure(figsize=(12,6))
plt.plot(daily["date"], daily["fitness"], label="fitness")
plt.plot(daily["date"], daily["fatigue"], label="fatigue")
plt.plot(daily["date"], daily["readiness"], label="readiness")
plt.title("Fitness / Fatigue / Readiness (EWMA)")
plt.xlabel("Date")
plt.legend()
plt.show()