<a href="https://colab.research.google.com/github/baptonx7/enduraw-case-study/blob/main/GAP_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install gpxpy


Collecting gpxpy
  Downloading gpxpy-1.6.2-py3-none-any.whl.metadata (5.9 kB)
Downloading gpxpy-1.6.2-py3-none-any.whl (42 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gpxpy
Successfully installed gpxpy-1.6.2


In [18]:
import gpxpy
import pandas as pd
import numpy as np
import math

# --- 1) Load GPX geometry (no timestamps) ---
with open("Lahti_run.gpx", "r") as f:   # replace with your GPX filename
    gpx = gpxpy.parse(f)

points = []
for track in gpx.tracks:
    for seg in track.segments:
        for p in seg.points:
            points.append({'lat': p.latitude, 'lon': p.longitude, 'ele': p.elevation})
df_gpx = pd.DataFrame(points)

# --- 2) Load your split times from CSV ---
df_splits = pd.read_csv("Amelia Watkinson - Run-splits CSV.csv")

# Expecting columns like: "km","split_time"
# Make sure split_time is in seconds, otherwise convert
# Example if format is "mm:ss":
def time_to_sec(x):
    parts = str(x).split(":")
    if len(parts) == 2:
        return int(parts[0])*60 + int(parts[1])
    elif len(parts) == 3:
        return int(parts[0])*3600 + int(parts[1])*60 + int(parts[2])
    else:
        return float(x)

df_splits['time_s'] = df_splits['split_time'].apply(time_to_sec)

# --- 3) Compute cumulative distance from GPX ---
def haversine_m(lat1, lon1, lat2, lon2):
    R = 6371000.0
    phi1, phi2 = math.radians(lat1), math.radians(lat2)
    dphi = math.radians(lat2 - lat1)
    dlambda = math.radians(lon2 - lon1)
    a = math.sin(dphi/2.0)**2 + math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2.0)**2
    return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))

distances = [0.0]
for i in range(1, len(df_gpx)):
    d = haversine_m(df_gpx.loc[i-1,'lat'], df_gpx.loc[i-1,'lon'],
                    df_gpx.loc[i,'lat'], df_gpx.loc[i,'lon'])
    distances.append(d)
df_gpx['d_m'] = distances
df_gpx['cum_m'] = df_gpx['d_m'].cumsum()

# --- 4) Compute grade per km ---
km_bins = np.arange(1000, df_gpx['cum_m'].iloc[-1]+1000, 1000)
elev_changes, grades = [], []
for km in km_bins:
    mask = df_gpx['cum_m'] <= km
    elev = df_gpx.loc[mask, 'ele'].iloc[-1]
    if len(elev_changes)==0:
        elev_changes.append(0)
    else:
        elev_changes.append(elev - prev_elev)
    prev_elev = elev
grades = [ec/1000 for ec in elev_changes]  # grade = elev gain / horiz dist

# --- 5) Merge geometry + splits ---
df_splits['elev_change_m'] = elev_changes[:len(df_splits)]
df_splits['grade_frac'] = df_splits['elev_change_m']/1000.0
df_splits['pace_s_per_km'] = df_splits['time_s']  # since each split = 1 km

# --- 6) Apply simple GAP model ---
def gap_multiplier_simple(grade_percent, k_up=10.0, k_down=4.0):
    if grade_percent >= 0:
        return 1.0 + (k_up * grade_percent / 100.0)
    else:
        return max(0.7, 1.0 + (-k_down * abs(grade_percent) / 100.0))

df_splits['gap_pace_s_per_km'] = df_splits.apply(
    lambda r: r['pace_s_per_km'] / gap_multiplier_simple(r['grade_frac']*100),
    axis=1
)

# --- 7) Output results ---
df_splits['pace_min_per_km'] = df_splits['pace_s_per_km']/60
df_splits['gap_min_per_km'] = df_splits['gap_pace_s_per_km']/60

print(df_splits[['km','pace_min_per_km','gap_min_per_km','grade_frac']].head())

# Save to CSV
df_splits.to_csv("gpx_gap_splits.csv", index=False)


    km  pace_min_per_km  gap_min_per_km  grade_frac
0  0.4         1.783333        1.783333      0.0000
1  1.0         3.716667        2.845840      0.0306
2  1.2         5.116667        4.520024      0.0132
3  1.4         5.350000        5.397498     -0.0022
4  1.3         4.566667        5.161242     -0.0288
