In [29]:
import ipywidgets as widgets
from IPython.display import display

race_id_dropdown = widgets.Dropdown(
    options=[35, 36, 37, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50],
    description='Race ID:'
)
display(race_id_dropdown)

Dropdown(description='Race ID:', options=(35, 36, 37, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50), value=35)

In [30]:
import pandas as pd
import sys
from pathlib import Path

# # make parent folder importable
parent_folder = Path.cwd().parent
sys.path.append(str(parent_folder))

# import config
from config import DATA_DIR

df = pd.read_csv("arca_lr_analysis_ready_finishing.csv")
target_col = "finishing_position"

In [31]:
# race_id_to_check = 5580 
race_id_to_check = race_id_dropdown.value

race_df = df[df["race_id"] == race_id_to_check].copy()

# compute absolute error between predicted rank and actual finish
race_df["abs_error_weighted"] = (race_df["pred_rank_lr"] - race_df[target_col]).abs()

# compute summary metrics
mean_error = race_df["abs_error_weighted"].mean()
median_error = race_df["abs_error_weighted"].median()

print(f"Weighted model for race {race_id_to_check}:")
print(f"Mean absolute error: {mean_error:.2f}")
print(f"Median absolute error: {median_error:.2f}")

# Sort by predicted rank (best at the top)
race_df = race_df.sort_values("pred_rank_lr")

# Select useful columns to display
cols_to_show = [
    "race_id",
    "driver_fullname",
    # "team_name",
    # "weighted_score_lr_flipped",
    "pred_rank_lr",
    target_col  # actual for comparison
]

print(race_df[cols_to_show].head(42))
# race_df.to_csv("race_db.csv", index=False)

Weighted model for race 36:
Mean absolute error: 6.47
Median absolute error: 7.00
      race_id    driver_fullname  pred_rank_lr  finishing_position
1422       36       Lawless Alan           1.0                 4.0
743        36        Brent Crews           2.0                 1.0
1134       36        Lavar Scott           3.0                 5.0
1035       36      Brenden Queen           4.0                 2.0
631        36  Trevor Huddleston           5.0                12.0
1282       36         Tyler Reif           6.0                19.0
1539       36   Isabella Robusto           7.0                31.0
247        36        Tanner Reif           8.0                13.0
831        36        Kyle Keller           9.0                15.0
1486       36  Patrick Staropoli          10.0                 8.0
225        36      Adrian Ferrer          11.0                18.0
1385       36  Treyten Lapcevich          12.0                 3.0
43         36           Kole Raz          13.0 

In [32]:
df_2025 = df[df["race_season"] == 2025].copy()

df_2025["abs_error_my_model"] = (df_2025["pred_rank_lr"] - df_2025[target_col]).abs()

print("\nMean Absolute Error (2025):")
print("My model:", df_2025["abs_error_my_model"].mean().round(2))


Mean Absolute Error (2025):
My model: 4.43


In [33]:
from scipy.stats import spearmanr
import numpy as np
import pandas as pd

def race_spearman_corrs(g):
    result = {}
    
    # Only calculate if we have at least 2 drivers
    if g[target_col].nunique() > 1:
        # Your model correlation
        if g["pred_rank_lr"].nunique() > 1:
            result["my_model_corr"] = spearmanr(g["pred_rank_lr"], g[target_col]).correlation
        else:
            result["my_model_corr"] = np.nan
    
    return pd.Series(result)

race_corrs_2025 = (
    df_2025.dropna(subset=[target_col])
    .groupby("race_id")
    .apply(race_spearman_corrs)
)

  .apply(race_spearman_corrs)


In [34]:
print("Race-by-race Spearman Correlations (2025):")
print(race_corrs_2025.round(3))

print("\nAverage correlations across 2025:")
print("My model:", race_corrs_2025["my_model_corr"].mean().round(3))

Race-by-race Spearman Correlations (2025):
         my_model_corr
race_id               
35               0.502
36               0.674
37               0.302
38               0.725
39               0.625
40               0.837
41               0.905
42               0.913
43               0.869
44               0.563
45               0.676
46               0.864
47               0.708
48               0.349
49               0.794
50               0.546
51               0.952
52               0.739
53               0.665
54               0.794
55               0.853
56               0.690
57               0.594
58               0.636
59               0.455
60               0.698
61               0.762
62               0.307
63               0.698
64               0.762
65               0.787
66               0.569
67               0.576
68               0.679

Average correlations across 2025:
My model: 0.679
