In [1]:
import numpy as np
from utils import get_data, get_table, get_predictions, other_stats, add_intervals_to_test
from plots import plot_rmse, plot_finish_groups, plot_interval_checks, plot_finish_age_gender
np.random.seed(2025)

size1, size2 = 250, 4000
save_val = True
train_yr, test_yr = [2021, 2022, 2023], [2024]
train_bos, test_bos = get_data(racename="bos", size_train=size1, size_test=size2, train_lis=train_yr, test_lis=test_yr, save=save_val)
train_nyc, test_nyc = get_data(racename="nyc", size_train=size1, size_test=size2, train_lis=train_yr, test_lis=test_yr, save=save_val)
train_chi, test_chi = get_data(racename="chi", size_train=size1, size_test=size2, train_lis=train_yr, test_lis=test_yr, save=save_val)
data = {"bos": (train_bos, test_bos), "nyc": (train_nyc, test_nyc), "chi": (train_chi, test_chi)}
# test_nyc = pd.read_csv("processed_data/test_nyc.csv")

In [2]:
race = "nyc"
test = data[race][1]

model_info = [
    ("M1", f"stan_results/model1/params_{race}.csv", ["alpha", "total_pace"]),
    ("M2", f"stan_results/model2/params_{race}.csv", ["alpha", "total_pace", "curr_pace"]),
    ("M3", f"stan_results/model3/params_{race}.csv", ["alpha", "total_pace", "curr_pace", "male", "age"]),
]
mpreds = {name: get_predictions(test, path, feats_lis=feats, full=False) for (name, path, feats) in model_info}
models, baseline = ["M1", "M2", "M3"], "BL"
test2 = get_table(test, mpreds, baseline_name=baseline)
test2

Unnamed: 0,id,dist,curr_pace,total_pace,finish,age,gender,year,prop,propleft,male,propxcurr,malexage,alpha,lvl,BL,M1,M2,M3
0,161315,5K,2.311604,2.311604,2.336379,38,W,2024,0.118497,0.881503,0,0.273919,0,1,1,3.225950,39.156196,39.220071,34.533020
1,165765,5K,2.239140,2.239140,2.190696,38,M,2024,0.118497,0.881503,1,0.265332,38,1,1,-6.945217,30.776305,30.842508,35.944004
2,159788,5K,2.472799,2.472799,2.376246,37,W,2024,0.118497,0.881503,0,0.293020,0,1,1,-11.555700,20.892179,20.951417,16.456977
3,142012,5K,3.285151,3.285151,2.857191,26,M,2024,0.118497,0.881503,1,0.389282,26,1,1,-32.064033,-10.634253,-10.590892,-10.426367
4,125569,5K,3.477051,3.477051,3.562563,46,M,2024,0.118497,0.881503,1,0.412022,46,1,1,4.854700,24.633421,24.674198,26.379966
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31995,138845,40K,2.988643,2.942908,2.953384,34,W,2024,0.947980,0.052020,0,2.833173,0,1,8,0.847683,1.889895,0.662701,0.557553
31996,129560,40K,2.707093,3.321707,3.295969,45,M,2024,0.947980,0.052020,1,2.566269,45,1,8,-1.653254,-0.615513,0.125385,0.154894
31997,139810,40K,2.757860,2.934488,2.930208,45,M,2024,0.947980,0.052020,1,2.614395,45,1,8,-0.349981,0.691965,0.231739,0.349111
31998,170500,40K,1.992032,1.994217,1.999668,37,W,2024,0.947980,0.052020,0,1.888406,0,1,8,0.961379,1.706816,0.055384,-0.115124


In [3]:
tbl = plot_rmse(test2, models, baseline, save_name=race, bar=True)
other_stats(test2[[baseline] + models], test2["finish"], save_name=race)
# test2["model1"].argmax()

File saved: analysis/plots/nyc_rmse_bar.png
File saved: analysis/tables/nyc_rmse.csv
File saved: analysis/tables/nyc_rmse2.csv


Unnamed: 0,BL,M1,M2,M3,pcnt_BL,pcnt_M1,pcnt_M2,pcnt_M3
Overall RMSE,21.199937,15.657409,14.8282,14.745852,-,-,-,-
Overall R-squared,0.87206,0.930213,0.937409,0.938102,-,-,-,-


In [4]:
c_model = "M2"
a = plot_finish_groups(test2, model=c_model, baseline=baseline, num=4, overall=True, save_name=race, palette="inferno")
plot_finish_age_gender(test2, model=c_model, baseline=baseline, num=4, overall=True, save_name=race, palette="crest", grouping="age")

File saved: analysis/plots/nyc_rmse_groups.png
File saved: analysis/plots/nyc_rmse_gender_age.png


In [6]:
mpreds2 = {name: (42195 / 60) / get_predictions(test, path, feats_lis=feats, full=True) for (name, path, feats) in model_info}
intervals_tbl = add_intervals_to_test(test2, mpreds2, models)
i_check, i_sizes = plot_interval_checks(intervals_tbl, models, save_name=race)

File saved: analysis/plots/nyc_intervals
analysis/tables/nyc_intsizes.csv
analysis/tables/nyc_intcheck.csv
