In [2]:
import pandas as pd
import os

# ------- SETTINGS --------
data_folder = "Data"   # folder inside repo containing all runner CSVs

# ------- HELPERS --------

def seconds_to_mmss(value):
    value = float(value)
    minutes = int(value) // 60
    seconds = int(value) % 60
    return f"{minutes}:{seconds:02d}"

def mph_to_pace(mph):
    if pd.isna(mph) or mph <= 0:
        return None
    pace = 60 / mph
    minutes = int(pace)
    seconds = int(round((pace - minutes) * 60))
    if seconds == 60:
        minutes += 1
        seconds = 0
    return f"{minutes}:{seconds:02d}"

# ------- READ & PROCESS ALL CSV FILES -------

all_dataframes = []

for filename in os.listdir(data_folder):
    if filename.endswith(".csv"):
        
        # Extract person name by removing extension (e.g., "Alex.csv" → "Alex")
        person_name = os.path.splitext(filename)[0]

        # Read the file
        file_path = os.path.join(data_folder, filename)
        df = pd.read_csv(file_path)

        # Add person column
        df["Person"] = person_name

        # Keep only the needed columns
        columns_to_keep = [
            "Activity Date", "Activity Type", "Elapsed Time", "Distance", "Moving Time",
            "Max Speed", "Average Speed", "Elevation Gain", "Elevation Loss",
            "Elevation Low", "Elevation High", "Max Grade", "Average Grade",
            "Average Grade Adjusted Pace", "Person"
        ]
        df = df[columns_to_keep]

        # Filter to runs only
        df = df[df["Activity Type"] == "Run"]

        # Format date
        df["Activity Date"] = pd.to_datetime(df["Activity Date"])
        df["Activity Date"] = df["Activity Date"].dt.strftime("%-m/%-d/%y")  # use %#m/%#d/%y on Windows

        # Convert time columns
        df["Elapsed Time"] = df["Elapsed Time"].apply(seconds_to_mmss)
        df["Moving Time"] = df["Moving Time"].apply(seconds_to_mmss)

        # Convert distance (km → miles)
        df["Distance"] = (df["Distance"] * 0.621371).round(2)

        # Convert speeds (m/s → mph)
        df["Max Speed"] = (df["Max Speed"] * 2.23694).round(2)
        df["Average Speed"] = (df["Average Speed"] * 2.23694).round(2)
        df["Average Grade Adjusted Pace"] = (df["Average Grade Adjusted Pace"] * 2.23694).round(2)

        # Convert elevation (m → ft)
        meters_to_feet = 3.28084
        elevation_cols = ["Elevation Gain", "Elevation Loss", "Elevation Low", "Elevation High"]
        df[elevation_cols] = (df[elevation_cols] * meters_to_feet).round(1)

        # Add new pace columns
        df["Pace"] = df["Average Speed"].apply(mph_to_pace)
        df["Grade Adjusted Pace"] = df["Average Grade Adjusted Pace"].apply(mph_to_pace)

        # Store processed dataframe
        all_dataframes.append(df)

# ------- MERGE ALL RUNNERS INTO ONE DF -------

runs = pd.concat(all_dataframes, ignore_index=True)

# ------- OPTIONAL: ORDER COLUMNS -------

runs = runs[
    [
        "Activity Date", "Activity Type", "Elapsed Time", "Distance", "Pace",
        "Moving Time", "Max Speed", "Average Speed", "Elevation Gain",
        "Elevation Loss", "Elevation Low", "Elevation High", "Max Grade",
        "Average Grade", "Average Grade Adjusted Pace", "Grade Adjusted Pace",
        "Person"
    ]
]

runs

  df["Activity Date"] = pd.to_datetime(df["Activity Date"])
  df["Activity Date"] = pd.to_datetime(df["Activity Date"])


Unnamed: 0,Activity Date,Activity Type,Elapsed Time,Distance,Pace,Moving Time,Max Speed,Average Speed,Elevation Gain,Elevation Loss,Elevation Low,Elevation High,Max Grade,Average Grade,Average Grade Adjusted Pace,Grade Adjusted Pace,Person
0,9/5/23,Run,24:16,2.04,10:56,22:18,9.92,5.49,42.0,42.0,33.5,59.4,7.7,0.0,,,Karina
1,11/19/24,Run,18:49,1.07,9:10,9:50,11.36,6.55,9.8,149.9,282.8,429.1,14.5,-2.6,6.38,9:24,Karina
2,11/21/24,Run,29:14,1.62,17:45,28:38,10.36,3.38,197.8,197.8,196.5,403.5,35.7,0.0,3.76,15:57,Karina
3,11/24/24,Run,119:21,6.15,13:22,82:14,9.91,4.49,418.6,417.7,192.9,444.6,46.8,0.0,4.64,12:56,Karina
4,12/17/24,Run,73:39,2.97,9:17,27:38,11.05,6.46,93.8,93.8,27.9,58.7,28.8,0.0,6.53,9:11,Karina
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,11/8/25,Run,60:42,5.48,7:29,41:03,17.72,8.02,278.2,277.6,229.3,364.5,12.2,0.0,8.02,7:29,Alex_Activities
153,11/12/25,Run,50:08,5.33,7:33,40:17,11.05,7.94,351.0,352.4,273.0,388.1,26.3,0.0,8.10,7:24,Alex_Activities
154,11/20/25,Run,44:52,4.16,7:29,31:09,11.63,8.02,150.6,150.9,298.9,367.1,11.5,0.0,8.10,7:24,Alex_Activities
155,11/21/25,Run,63:16,6.46,7:44,49:58,13.24,7.76,285.8,270.0,327.1,444.6,12.6,0.0,7.85,7:39,Alex_Activities
