In [1]:
import pandas as pd
from sklearn.metrics import log_loss, brier_score_loss

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="darkgrid", context="paper")

In [2]:
odds_df = pd.read_csv("../../data/backtesting/backtest_odds_case_study.csv", parse_dates=["date"]).drop(columns=["sportsbook"])
odds_df["red_odds"] = odds_df["red_odds"].apply(lambda x: 100 / (x + 100) if x > 0 else -x / (-x + 100))
odds_df["blue_odds"] = odds_df["blue_odds"].apply(lambda x: 100 / (x + 100) if x > 0 else -x / (-x + 100))
odds_df["Bovada"] = odds_df["red_odds"] / (odds_df["red_odds"] + odds_df["blue_odds"])
odds_df = odds_df.drop(columns=["red_odds", "blue_odds"])
preds_df = pd.read_csv("../../model_files/lr/case_study/predictions.csv")
df = odds_df.merge(preds_df, on=["bout_id"]).rename(columns={"y_pred": "Model"})
df

Unnamed: 0,bout_id,event_id,date,red_win,Bovada,Model
0,c1356395d6b055d7,46effbd1135423c5,2017-01-15,0.0,0.489879,0.508947
1,ae803440d778a12b,46effbd1135423c5,2017-01-15,0.0,0.328185,0.296975
2,3f7684492c9df05e,46effbd1135423c5,2017-01-15,1.0,0.585987,0.551018
3,8156479490877d08,46effbd1135423c5,2017-01-15,1.0,0.601770,0.784083
4,1a81573425c585fb,46effbd1135423c5,2017-01-15,1.0,0.559946,0.470315
...,...,...,...,...,...,...
2617,5238f6470d0557fb,72c9c2eadfc3277e,2024-12-14,0.0,0.305344,0.469556
2618,7b1bc4ff776f12c1,72c9c2eadfc3277e,2024-12-14,0.0,0.735516,0.843433
2619,1a635a5e4551e7d5,72c9c2eadfc3277e,2024-12-14,1.0,0.772329,0.735444
2620,7521015554088962,72c9c2eadfc3277e,2024-12-14,1.0,0.351079,0.355399


In [3]:
subset = df.loc[df["red_win"].notnull()].copy()
model_log_loss = log_loss(subset["red_win"], subset["Model"])
model_brier = brier_score_loss(subset["red_win"], subset["Model"])
bovada_log_loss = log_loss(subset["red_win"], subset["Bovada"])
bovada_brier = brier_score_loss(subset["red_win"], subset["Bovada"])

print(f"Model Log Loss: {model_log_loss:.6f}")
print(f"Bovada Log Loss: {bovada_log_loss:.6f}")
print(f"Delta: {model_log_loss - bovada_log_loss:.6f}")

print(f"Model Brier Score: {model_brier:.6f}")
print(f"Bovada Brier Score: {bovada_brier:.6f}")
print(f"Delta: {model_brier - bovada_brier:.6f}")

Model Log Loss: 0.611250
Bovada Log Loss: 0.616538
Delta: -0.005288
Model Brier Score: 0.211724
Bovada Brier Score: 0.214093
Delta: -0.002369


In [11]:
temp = df.loc[(df["date"].dt.year == 2024) & (df["red_win"].notnull())].copy()
bovada_log_loss = log_loss(temp["red_win"], temp["Bovada"])
model_log_loss = log_loss(temp["red_win"], temp["Model"])
bovada_brier_score = brier_score_loss(temp["red_win"], temp["Bovada"])
model_brier_score = brier_score_loss(temp["red_win"], temp["Model"])

print(f"Model Log Loss: {model_log_loss:.6f}")
print(f"Bovada Log Loss: {bovada_log_loss:.6f}")
print(f"Delta: {model_log_loss - bovada_log_loss:.6f}")

print(f"Model Brier Score: {model_brier_score:.6f}")
print(f"Bovada Brier Score: {bovada_brier_score:.6f}")
print(f"Delta: {model_brier_score - bovada_brier_score:.6f}")

Model Log Loss: 0.583331
Bovada Log Loss: 0.583660
Delta: -0.000329
Model Brier Score: 0.199085
Bovada Brier Score: 0.199324
Delta: -0.000238


In [12]:
features = pd.read_pickle("../../data/features_case_study.pkl.xz")
features

Unnamed: 0,id,avg_knockdowns_scored_diff,cumulative_knockdowns_scored_diff,avg_knockdowns_scored_per_second_diff,cumulative_knockdowns_scored_per_second_diff,avg_knockdowns_scored_per_strike_landed_diff,cumulative_knockdowns_scored_per_strike_landed_diff,avg_knockdowns_scored_per_strike_attempted_diff,cumulative_knockdowns_scored_per_strike_attempted_diff,avg_knockdowns_scored_per_significant_strike_landed_diff,...,avg_opp_event_attendance_change_diff,avg_opp_event_attendance_change_diff_diff,avg_opp_avg_event_attendance_change_diff,avg_opp_avg_event_attendance_change_diff_diff,avg_opp_avg_event_occupancy_pct_diff,avg_opp_avg_event_occupancy_pct_diff_diff,avg_opp_event_occupancy_pct_change_diff,avg_opp_event_occupancy_pct_change_diff_diff,avg_opp_avg_event_occupancy_pct_change_diff,avg_opp_avg_event_occupancy_pct_change_diff_diff
0,be38ed9ccfe2ee03,0.000000,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,-3611.000000,6580.500000,206.916667,7790.250000,-0.001338,-0.316345,-0.167808,0.596338,-0.008228,0.201749
1,219bd976b8ca745d,0.000000,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,5791.250000,0.000000,2043.861111,-10063.500000,-0.097682,-0.516345,-0.102990,-0.649139,0.073671,-0.509056
2,af178adff964d854,0.200000,1,0.000313,0.000430,0.002247,0.004386,0.001639,0.003115,0.004651,...,-5569.000000,0.000000,0.000000,0.000000,-0.237066,0.000000,0.000000,0.000000,0.000000,0.000000
3,96da5813683649f5,-0.200000,-1,-0.000222,-0.000269,-0.002817,-0.006024,-0.001600,-0.003846,-0.007692,...,1661.466667,-1201.000000,23.208333,5164.013889,-0.216629,0.107524,0.202652,-0.021123,0.002736,0.031342
4,df3b75809b7fe252,0.000000,0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,-0.310681,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4834,5238f6470d0557fb,0.250000,4,0.001888,0.001181,0.015227,0.010821,0.006283,0.003462,0.015179,...,-4734.666667,11243.000000,722.184292,-1511.853896,-0.202461,-0.584005,0.000000,0.000000,0.007550,0.173006
4835,7b1bc4ff776f12c1,-0.085714,-3,-0.001594,-0.000143,-0.004453,0.000881,-0.003147,0.000704,0.006262,...,-18745.000000,0.000000,82.594036,4943.723440,0.039104,0.251747,-0.224510,0.421001,0.035729,0.306498
4836,1a635a5e4551e7d5,-0.142857,-1,-0.001952,-0.000514,-0.023014,-0.007278,-0.010774,-0.004009,-0.028357,...,-8795.333333,-1100.500000,-3269.043056,20082.425000,0.167921,-0.258702,-0.320822,0.585316,-0.161427,0.579691
4837,7521015554088962,0.175000,7,0.000810,0.000208,0.010541,0.003426,0.005460,0.001763,0.010358,...,3649.085714,-4911.654135,1349.485978,1308.150105,0.138067,0.222074,-0.297118,0.372520,0.084494,0.042074


In [14]:
# features["is_ppv"]
ppv_bout_ids = features.loc[features["is_ppv"] == 0, "id"].tolist()

temp = df.loc[(df["bout_id"].isin(ppv_bout_ids)) & (df["red_win"].notnull())].copy()
model_log_loss = log_loss(temp["red_win"], temp["Model"])
bovada_log_loss = log_loss(temp["red_win"], temp["Bovada"])
model_brier_score = brier_score_loss(temp["red_win"], temp["Model"])
bovada_brier_score = brier_score_loss(temp["red_win"], temp["Bovada"])

print(f"Model Log Loss: {model_log_loss:.6f}")
print(f"Bovada Log Loss: {bovada_log_loss:.6f}")
print(f"Delta: {model_log_loss - bovada_log_loss:.6f}")
print(f"Model Brier Score: {model_brier_score:.6f}")
print(f"Bovada Brier Score: {bovada_brier_score:.6f}")
print(f"Delta: {model_brier_score - bovada_brier_score:.6f}")

Model Log Loss: 0.621889
Bovada Log Loss: 0.625981
Delta: -0.004092
Model Brier Score: 0.216614
Bovada Brier Score: 0.218438
Delta: -0.001824


In [15]:
import os
import sqlite3

db_path = os.path.join(os.path.dirname("__file__"), "..", "..", "data", "ufc.db")

In [24]:
query = """
SELECT id FROM ufcstats_bouts WHERE weight_class = 'Catch Weight'
"""

with sqlite3.connect(db_path) as conn:
    query_res = pd.read_sql_query(query, conn)
weight_class_bout_ids = query_res["id"].tolist()

temp = df.loc[(df["bout_id"].isin(weight_class_bout_ids)) & (df["red_win"].notnull())].copy()
model_log_loss = log_loss(temp["red_win"], temp["Model"])
bovada_log_loss = log_loss(temp["red_win"], temp["Bovada"])
model_brier_score = brier_score_loss(temp["red_win"], temp["Model"])
bovada_brier_score = brier_score_loss(temp["red_win"], temp["Bovada"])

print(f"Model Log Loss: {model_log_loss:.6f}")
print(f"Bovada Log Loss: {bovada_log_loss:.6f}")
print(f"Delta: {model_log_loss - bovada_log_loss:.6f}")
print(f"Model Brier Score: {model_brier_score:.6f}")
print(f"Bovada Brier Score: {bovada_brier_score:.6f}")
print(f"Delta: {model_brier_score - bovada_brier_score:.6f}")

Model Log Loss: 0.608679
Bovada Log Loss: 0.613781
Delta: -0.005103
Model Brier Score: 0.212001
Bovada Brier Score: 0.215196
Delta: -0.003195
