In [5]:
import pandas as pd
import os

data_folder = "Data"   # folder inside repo containing all runner CSVs

# ------- HELPERS --------

def seconds_to_mmss(value):
    value = float(value)
    minutes = int(value) // 60
    seconds = int(value) % 60
    return f"{minutes}:{seconds:02d}"

def mph_to_pace(mph):
    if pd.isna(mph) or mph <= 0:
        return None
    pace = 60 / mph
    minutes = int(pace)
    seconds = int(round((pace - minutes) * 60))
    if seconds == 60:
        minutes += 1
        seconds = 0
    return f"{minutes}:{seconds:02d}"

# ------- READ & PROCESS ALL CSV FILES -------

all_dataframes = []

for filename in os.listdir(data_folder):
    if filename.endswith(".csv"):
        
        person_name = os.path.splitext(filename)[0]
        file_path = os.path.join(data_folder, filename)
        df = pd.read_csv(file_path)

        df["Person"] = person_name

        # Updated name and removed columns previously requested
        columns_to_keep = [
            "Activity Date", "Activity Type", "Elapsed Time", "Distance", "Moving Time",
            "Average Speed", "Elevation Gain", "Elevation Loss",
            "Average Grade Adjusted Pace", "Person"
        ]
        df = df[columns_to_keep]

        df = df[df["Activity Type"] == "Run"]

        df["Activity Date"] = pd.to_datetime(df["Activity Date"], format="mixed")
        df["Activity Date"] = df["Activity Date"].dt.strftime("%-m/%-d/%y")  # %#m/%#d/%y for Windows

        df["Elapsed Time"] = df["Elapsed Time"].apply(seconds_to_mmss)
        df["Moving Time"] = df["Moving Time"].apply(seconds_to_mmss)

        df["Distance"] = (df["Distance"] * 0.621371).round(2)

        df["Average Speed"] = (df["Average Speed"] * 2.23694).round(2)

        # Convert original column and rename it
        df["Average Grade Adjusted Speed"] = (df["Average Grade Adjusted Pace"] * 2.23694).round(2)
        df.drop(columns=["Average Grade Adjusted Pace"], inplace=True)

        meters_to_feet = 3.28084
        elevation_cols = ["Elevation Gain", "Elevation Loss"]
        df[elevation_cols] = (df[elevation_cols] * meters_to_feet).round(1)

        df["Pace"] = df["Average Speed"].apply(mph_to_pace)
        df["Grade Adjusted Pace"] = df["Average Grade Adjusted Speed"].apply(mph_to_pace)

        all_dataframes.append(df)

# ------- MERGE ALL RUNNERS INTO ONE DF -------

runs = pd.concat(all_dataframes, ignore_index=True)

runs = runs[
    [
        "Activity Date", "Activity Type", "Elapsed Time", "Distance", "Pace",
        "Moving Time", "Average Speed", "Elevation Gain", "Elevation Loss",
        "Average Grade Adjusted Speed", "Grade Adjusted Pace", "Person"
    ]
]

runs.to_csv("runs_processed.csv", index=False)
print("Saved runs_processed.csv successfully!")

Saved runs_processed.csv successfully!


In [6]:
runs

Unnamed: 0,Activity Date,Activity Type,Elapsed Time,Distance,Pace,Moving Time,Average Speed,Elevation Gain,Elevation Loss,Average Grade Adjusted Speed,Grade Adjusted Pace,Person
0,9/5/23,Run,24:16,2.04,10:56,22:18,5.49,42.0,42.0,,,Karina
1,11/19/24,Run,18:49,1.07,9:10,9:50,6.55,9.8,149.9,6.38,9:24,Karina
2,11/21/24,Run,29:14,1.62,17:45,28:38,3.38,197.8,197.8,3.76,15:57,Karina
3,11/24/24,Run,119:21,6.15,13:22,82:14,4.49,418.6,417.7,4.64,12:56,Karina
4,12/17/24,Run,73:39,2.97,9:17,27:38,6.46,93.8,93.8,6.53,9:11,Karina
...,...,...,...,...,...,...,...,...,...,...,...,...
419,10/30/25,Run,33:39,4.14,7:14,30:00,8.29,91.5,96.8,8.33,7:12,Zubin
420,11/2/25,Run,173:16,10.13,7:50,79:24,7.66,376.6,749.3,7.65,7:51,Zubin
421,11/14/25,Run,85:10,10.21,7:40,78:13,7.83,237.9,238.5,7.87,7:37,Zubin
422,11/20/25,Run,48:31,5.31,9:06,48:25,6.59,55.4,53.1,6.60,9:05,Zubin
