In [6]:
import numpy as np
from utils import get_data, get_table, get_predictions, plot_rmse, other_stats, add_intervals_to_test, plot_interval_checks, plot_finish_groups, plot_finish_age_gender
np.random.seed(2025)

size = 125
size_test = 1000
save_val = True
train_yr, test_yr = [2021, 2022, 2023], [2024]
train_bos, test_bos = get_data(racename="bos", size_train=size, size_test=size_test, train_lis=train_yr, test_lis=test_yr, save=save_val)
train_nyc, test_nyc = get_data(racename="nyc", size_train=size, size_test=size_test, train_lis=train_yr, test_lis=test_yr, save=save_val)
train_chi, test_chi = get_data(racename="chi", size_train=size, size_test=size_test, train_lis=train_yr, test_lis=test_yr, save=save_val)
data = {"bos": (train_bos, test_bos), "nyc": (train_nyc, test_nyc), "chi": (train_chi, test_chi)}
# test_nyc = pd.read_csv("processed_data/test_nyc.csv")

In [7]:
race = "nyc"
test = data[race][1]

model_info = [
    ("model1", f"stan_results/model1/params_{race}.csv", ["alpha", "total_pace"]),
    ("model2", f"stan_results/model2/params_{race}.csv", ["alpha", "total_pace", "curr_pace"]),
    ("model3", f"stan_results/model3/params_{race}.csv", ["alpha", "total_pace", "curr_pace", "male", "age"]),
]
mpreds = {name: get_predictions(test, path, feats_lis=feats, full=False) for (name, path, feats) in model_info}
test2 = get_table(test, mpreds)
test2

Unnamed: 0,id,dist,curr_pace,total_pace,finish,age,gender,year,prop,propleft,male,propxcurr,malexage,alpha,lvl,extrap,model1,model2,model3
0,161315,5K,2.311604,2.311604,2.336379,38,W,2024,0.118497,0.881503,0,0.273919,0,1,1,3.225950,42.727564,42.770290,38.703891
1,165765,5K,2.239140,2.239140,2.190696,38,M,2024,0.118497,0.881503,1,0.265332,38,1,1,-6.945217,34.886190,34.939410,47.476884
2,159788,5K,2.472799,2.472799,2.376246,37,W,2024,0.118497,0.881503,0,0.293020,0,1,1,-11.555700,23.470145,23.493758,19.381125
3,142012,5K,3.285151,3.285151,2.857191,26,M,2024,0.118497,0.881503,1,0.389282,26,1,1,-32.064033,-10.639308,-10.662851,-6.916016
4,125569,5K,3.477051,3.477051,3.562563,46,M,2024,0.118497,0.881503,1,0.412022,46,1,1,4.854700,24.331630,24.303141,25.352262
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7995,122995,40K,3.676471,3.945941,3.924751,31,M,2024,0.947980,0.052020,1,3.485219,31,1,8,-0.962202,0.035940,-0.395154,-0.449341
7996,130342,40K,2.951594,3.272787,3.254029,52,M,2024,0.947980,0.052020,1,2.798051,52,1,8,-1.238629,-0.158821,-0.322005,-0.182322
7997,133143,40K,2.727769,3.140950,3.132284,55,M,2024,0.947980,0.052020,1,2.585869,55,1,8,-0.619448,0.474010,0.734981,0.925156
7998,142224,40K,2.767017,2.861026,2.850821,40,W,2024,0.947980,0.052020,0,2.623076,0,1,8,-0.879877,0.236150,-0.892943,-0.878948


In [8]:
models = ["model1", "model2", "model3"]
plot_rmse(test2, models + ["extrap"], save_name=race, bar=True)

File saved: analysis/nyc_rmse_bar.png


Unnamed: 0_level_0,extrap,model1,model2,model3,pcnt_model1,pcnt_model2,pcnt_model3
dist,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5K,29.018639,24.909457,24.921521,25.981397,0.141605,0.141189,0.104665
10K,28.185188,21.840241,21.000448,21.80733,0.225116,0.254912,0.226284
15K,26.434595,19.140733,17.29079,17.257587,0.275921,0.345903,0.347159
20K,22.896961,16.132399,13.558487,13.787064,0.295435,0.407848,0.397865
25K,17.30165,12.852564,10.308957,10.685641,0.257148,0.404163,0.382392
30K,13.336283,9.756698,7.860225,7.882412,0.26841,0.410614,0.40895
35K,8.011997,6.456665,5.508506,5.532562,0.194125,0.312468,0.309465
40K,1.901339,1.81342,1.147388,1.146627,0.046241,0.396537,0.396937


In [9]:
plot_finish_groups(test2, label_pair=["extrap", "model2"], num=4, overall=True, save_name=race, palette="inferno")
plot_finish_age_gender(test2, label_pair=["extrap", "model2"], num=4, overall=True, save_name=race, palette="crest", grouping="age")
other_stats(test2[models + ["extrap"]], test2["finish"])

File saved: analysis/nyc_rmse_groups.png
4 [0.0, 25.0, 50.0, 75.0] [19. 30. 39. 47.]
File saved: analysis/nyc_rmse_gender_age.png


Unnamed: 0,model1,model2,model3,extrap
0,15.927404,14.76746,15.193499,20.629335
1,0.922054,0.932994,0.929072,0.86924


In [10]:
mpreds2 = {name: (42195 / 60) / get_predictions(test, path, feats_lis=feats, full=True) for (name, path, feats) in model_info}
intervals_tbl = add_intervals_to_test(test2, mpreds2, models)
i_check, i_sizes = plot_interval_checks(intervals_tbl, models, save_name=race)

File saved: analysis/nyc_intervals.png
