## Race Time Predictor

This machine learning model aims to predict the duration of a run (jog) based on gender, distance, elevation gain, heart rate zone and time of day. Simply enter the information for your run and click "Get Prediction". Note that this model was built using a dataset found on Kaggle [here](https://www.kaggle.com/datasets/olegoaer/running-races-strava). Also, to see the full model-building process, head to the project's [GitHub page](https://github.com/jackkq/Race-Time-Predictor).
***

In [29]:
# Make model

from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit
import numpy as np

def load_data(path):
    return pd.read_csv(path, sep=";", header=0, names=["athlete_id", "gender", "timestamp", "distance", "elapsed_time", "elev_gain", "bpm"])

raw_data = load_data("./data.csv")

# Remove rows with 0 distance as these are not actually activities
data = raw_data[raw_data["distance"] != 0.0]
data = data.drop(columns=["athlete_id"])
data["timestamp"] = data.apply(lambda x: x['timestamp'][11:13], axis = 1)
data = data.reset_index(drop=True)

data["distance_cat"] = pd.cut(data["distance"], 
                                bins = [0., 5000., 10000., 15000., 20000., np.inf],
                                labels=[1, 2, 3, 4, 5])

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, test_index in split.split(data, data["distance_cat"]):
    train_set = data.loc[train_index]
    test_set = data.loc[test_index]

for set_ in (train_set, test_set):
    set_.drop("distance_cat", axis=1, inplace=True)

explore = train_set.copy()
explore["gender"] = explore["gender"].fillna("M")
explore = explore.dropna(subset=["bpm"])
explore.loc[explore['gender'] == 'M', 'gender'] = 0
explore.loc[explore['gender'] == 'F', 'gender'] = 1

def map_to_zone(bpm):
    if bpm <= 104: 
        return 1
    elif bpm > 104 and bpm <= 121:
        return 2
    elif bpm > 121 and bpm <= 139:
        return 3
    elif bpm > 139 and bpm <= 156:
        return 4
    else:
        return 5 

explore["bpm"] = explore["bpm"].apply(map_to_zone)

explore["timestamp"] = explore["timestamp"].astype("int")
explore["time_of_day"] = (explore['timestamp'] % 24) // 4 
explore = explore.drop(columns=["timestamp"])
prepared = explore.drop("elapsed_time", axis=1) 
labels = explore["elapsed_time"].copy()

scaler = StandardScaler().set_output(transform="pandas")
prepared = scaler.fit_transform(prepared)

model = ElasticNet(alpha=0.1, l1_ratio=0.93)
model.fit(prepared, labels)
pass

In [13]:
import datetime

def map_time_to_int(time):
    hour = time.hour
    if 0 <= hour < 4:
        return 0
    elif 4 <= hour < 8:
        return 1
    elif 8 <= hour < 12:
        return 2
    elif 12 <= hour < 16:
        return 3
    elif 16 <= hour < 20:
        return 4
    else:
        return 5
    
def format_time(seconds):
    hours = seconds // 3600
    remaining_seconds = seconds % 3600
    minutes = remaining_seconds // 60
    remaining_seconds %= 60
    return hours, minutes, round(remaining_seconds)

In [14]:
import ipywidgets as widgets

layout = widgets.Layout(width='auto')

def get_prediction(gender, distance, elev_gain, bpm, time_of_day):
    gender = (0 if gender=='M' else 1)
    distance = distance*1000 # Use m instead of km
    time_of_day = map_time_to_int(time_of_day)
    predict_data = pd.DataFrame(np.array([[gender, distance, elev_gain, bpm, time_of_day]]), columns = ['gender','distance','elev_gain', 'bpm','time_of_day'])
    make_predict = scaler.transform(predict_data)
    prediction = model.predict(make_predict)[0]
    hours, minutes, seconds = format_time(prediction)
    print("The model predicts that your run will take {} hours, {} minutes and {} seconds.".format(hours, minutes, seconds))
im = widgets.interact_manual(get_prediction,gender=widgets.Dropdown(options=['M', 'F'], value='M', description='Gender: ', style= {'description_width': 'initial'}),
    distance = widgets.FloatSlider(min=0, max=100, step=0.1, description='Distance (km): ', style= {'description_width': 'initial'}),
    elev_gain = widgets.IntSlider(min=0, max=1000, step=1, description='Elevation gain (m): ', style= {'description_width': 'initial'}),
    bpm = widgets.Dropdown(options=[1,2,3,4,5], value=1, description='Heart Rate Zone: ', style= {'description_width': 'initial'}),
    time_of_day = widgets.TimePicker(description='Time of run: ', style= {'description_width': 'initial'})
)
im.widget.children[-2].description = 'Get Prediction'
display(im)
pass

interactive(children=(Dropdown(description='Gender: ', options=('M', 'F'), style=DescriptionStyle(description_…

<function __main__.get_prediction(gender, distance, elev_gain, bpm, time_of_day)>