In [17]:
import numpy as np
import pandas as pd
from utils import get_data, get_table, get_predictions, plot_rmse, other_stats, add_intervals_to_test, plot_interval_checks, plot_finish_groups, plot_finish_age_gender
np.random.seed(2025)

size = 125
save_val = True
train_yr, test_yr = [2021, 2022, 2023], [2024]
train_bos, test_bos = get_data(racename="bos", size_train=size, size_test=1000, train_lis=train_yr, test_lis=test_yr, save=save_val)
train_nyc, test_nyc = get_data(racename="nyc", size_train=size, size_test=size, train_lis=train_yr, test_lis=test_yr, save=save_val)
train_chi, test_chi = get_data(racename="chi", size_train=size, size_test=size, train_lis=train_yr, test_lis=test_yr, save=save_val)
data = {"bos": (train_bos, test_bos), "nyc": (train_nyc, test_nyc), "chi": (train_chi, test_chi)}
# test_nyc = pd.read_csv("processed_data/test_nyc.csv")

In [18]:
race = "bos"
test = test_bos
models = ["model1", "model2", "model3"]


model_info = [
    ("model1", f"stan_results/model1/params_{race}.csv", ["alpha", "total_pace"]),
    ("model2", f"stan_results/model2/params_{race}.csv", ["alpha", "total_pace", "curr_pace"]),
    ("model3", f"stan_results/model3/params_{race}.csv", ["alpha", "total_pace", "curr_pace", "male", "age"]),
]
mpreds = {name: get_predictions(test, path, feats_lis=feats, full=False) for (name, path, feats) in model_info}
test2 = get_table(test_bos, mpreds)

In [19]:
plot_rmse(test2, models + ["extrap"], save_name=f"-", bar=True)

File saved: analysis/-_rmse_bar.png


Unnamed: 0_level_0,extrap,model1,model2,model3,pcnt_model1,pcnt_model2,pcnt_model3
dist,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5K,31.144609,22.104028,22.08788,21.71801,0.290278,0.290796,0.302672
10K,28.89976,19.759995,18.464104,18.239917,0.316257,0.361098,0.368856
15K,26.514288,18.212254,17.359618,17.199871,0.313115,0.345273,0.351298
20K,22.954247,15.935229,13.906636,13.787909,0.305783,0.394158,0.399331
25K,19.192217,13.402563,12.202356,12.152836,0.301667,0.364203,0.366783
30K,13.946247,10.495328,7.915663,7.88067,0.247444,0.432416,0.434925
35K,7.878522,6.707869,4.863553,4.839919,0.148588,0.382682,0.385682
40K,2.140023,1.959993,1.423421,1.398897,0.084125,0.334857,0.346317


In [20]:
plot_finish_groups(test2, label_pair=["extrap", "model2"], num=4, overall=True, save_name="--", palette="inferno")
plot_finish_age_gender(test2, label_pair=["extrap", "model2"], num=4, overall=True, save_name='=', palette="crest", grouping="age")
other_stats(test2[models + ["extrap"]], test2["finish"])

File saved: analysis/--_rmse_groups.png
4 [0.0, 25.0, 50.0, 75.0] [18. 35. 44. 53.]
File saved: analysis/=_rmse_gender_age.png


Unnamed: 0,model1,model2,model3,extrap
0,15.013664,13.967748,13.808904,21.402621
1,0.898744,0.91236,0.914342,0.79423


In [21]:
mpreds2 = {name: (42195 / 60) / get_predictions(test, path, feats_lis=feats, full=True) for (name, path, feats) in model_info}
intervals_tbl = add_intervals_to_test(test2, mpreds2, models)
i_check, i_sizes = plot_interval_checks(intervals_tbl, models, save_name=f"---")

File saved: analysis/---_intervals.png
