In [1]:
import pandas as pd
import numpy as np
from modules import constants
from modules.functions import convert_box_score_dataframe_to_regression_format
from sklearn.linear_model import Ridge
from dataclasses import dataclass

team_id_name_dict = {v:k for k,v in constants.TEAM_NAME_ID_DICT.items()}
@dataclass
class RegressionHub:
    ortg_regression: Ridge
    drtg_regression: Ridge
    pace_regression: Ridge

Read in full data filter on D1 games only

In [2]:
full_box_df = pd.read_parquet('parquet_files/box_scores_sports_reference_2024.gzip')
reg_df = convert_box_score_dataframe_to_regression_format(full_box_df)
reg_df

Unnamed: 0,TM_0,TM_1,TM_2,TM_3,TM_4,TM_5,TM_6,TM_7,TM_8,TM_9,...,OPP_358,OPP_359,OPP_620,OPP_631,home,ortg,drtg,pace,to_pct,opp_to_pct
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,-1.0,91.135635,82.517483,70.225,21.359915,16.783217
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,-1.0,95.772540,122.807018,66.825,16.460905,10.233918
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1.0,98.963557,111.033029,74.775,18.722835,19.676739
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.0,111.031002,111.459969,69.350,7.209805,9.419152
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.0,89.292471,67.189250,66.075,13.620885,28.368794
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8359,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,-1.0,119.646499,106.280193,73.550,14.955812,17.943409
8360,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,-1.0,99.079073,113.259669,78.725,15.242934,11.049724
8361,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1.0,110.825478,101.040119,77.800,11.425307,15.453195
8362,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1.0,116.246499,119.957158,71.400,7.002801,14.280614


In [3]:
games_df = reg_df.drop(['ortg','drtg','pace','to_pct','opp_to_pct'],axis = 1).copy()
y_df = reg_df[['ortg','drtg','pace']].copy()

reg = Ridge(alpha=0.5,fit_intercept=True)

In [12]:
reg_dict = {}
master_df = pd.DataFrame()
for column in y_df.columns.tolist():
    
    reg.fit(X = games_df ,y = y_df[column].to_numpy())
    reg_results = {"variable":[],f'coef_{column}':[],f'constant_{column}':[],f'adj_{column}':[]}
    intercept = round(reg.intercept_,2)
    reg_dict[f"{column}_regression"] = reg
    
    for variable,coefficient in zip(games_df.columns,reg.coef_):
        reg_results[f'constant_{column}'].append(intercept)
        reg_results['variable'].append(variable)
        reg_results[f'coef_{column}'].append(coefficient)
        reg_results[f'adj_{column}'].append(coefficient + reg.intercept_)
    
    output_df = pd.DataFrame(reg_results)
    output_df = output_df.loc[(output_df.variable.str.startswith("TM")) | (output_df.variable == "home")].copy()
    output_df['tm_id'] = output_df.variable.map(lambda x: x.split("_")[1] if x != 'home' else -1).astype(int)
    output_df['team_name'] = output_df.tm_id.map(lambda x: team_id_name_dict.get(x,"HOME_COURT_ADVANTAGE"))
    output_df = output_df[['team_name',f'constant_{column}',f'coef_{column}',f'adj_{column}']]
    
    if master_df.empty:
        master_df = output_df.copy()
        continue
    
    master_df = master_df.merge(output_df,on = 'team_name')
    

master_df['adj_nrtg'] = master_df.adj_ortg - master_df.adj_drtg
master_df.sort_values('adj_nrtg',ascending=False)

Unnamed: 0,team_name,constant_ortg,coef_ortg,adj_ortg,constant_drtg,coef_drtg,adj_drtg,constant_pace,coef_pace,adj_pace,adj_nrtg
335,HOUSTON,103.33,13.486063,116.812043,103.33,-22.279116,81.046864,69.75,-4.281965,65.468769,35.765179
66,PURDUE,103.33,20.347915,123.673896,103.33,-13.314748,90.011232,69.75,0.032196,69.782930,33.662664
31,ARIZONA,103.33,17.411369,120.737350,103.33,-14.077877,89.248104,69.75,5.023320,74.774054,31.489246
144,UCONN,103.33,19.469401,122.795382,103.33,-10.739814,92.586167,69.75,-3.076504,66.674230,30.209215
136,ALABAMA,103.33,21.900074,125.226054,103.33,-7.572636,95.753345,69.75,5.015723,74.766456,29.472709
...,...,...,...,...,...,...,...,...,...,...,...
129,PACIFIC,103.33,-12.443708,90.882273,103.33,11.809068,115.135049,69.75,0.344927,70.095661,-24.252776
174,SIENA,103.33,-17.536694,85.789287,103.33,6.972974,110.298955,69.75,-0.608213,69.142521,-24.509668
15,COPPIN STATE,103.33,-22.268101,81.057880,103.33,3.775157,107.101138,69.75,-1.167220,68.583514,-26.043258
186,IUPUI,103.33,-13.992371,89.333609,103.33,14.216744,117.542725,69.75,-0.518362,69.232371,-28.209115
