# Imports

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import LinearRegression
from scipy.stats import norm

# Setup

In [2]:
def prob_nd1_elo(elo1, elo2, r=0, var=5, t=1): # 8 looks best
    elo1+=100
    if elo2>elo1:
        temp=elo1
        elo1=elo2
        elo2=temp
        return 1-norm.cdf((np.log(elo1/elo2)+(r+var/2)*(t/365)) / ((var*t/365)**.5))
    return norm.cdf((np.log(elo1/elo2)+(r+var/2)*(t/365)) / ((var*t/365)**.5))

In [3]:
NBA_data= pd.read_csv("nbaallelo.csv")
NBA_data

Unnamed: 0,gameorder,game_id,lg_id,_iscopy,year_id,date_game,seasongame,is_playoffs,team_id,fran_id,...,win_equiv,opp_id,opp_fran,opp_pts,opp_elo_i,opp_elo_n,game_location,game_result,forecast,notes
0,1,194611010TRH,NBA,0,1947,11/1/1946,1,0,TRH,Huskies,...,40.294830,NYK,Knicks,68,1300.0000,1306.7233,H,L,0.640065,
1,1,194611010TRH,NBA,1,1947,11/1/1946,1,0,NYK,Knicks,...,41.705170,TRH,Huskies,66,1300.0000,1293.2767,A,W,0.359935,
2,2,194611020CHS,NBA,0,1947,11/2/1946,1,0,CHS,Stags,...,42.012257,NYK,Knicks,47,1306.7233,1297.0712,H,W,0.631101,
3,2,194611020CHS,NBA,1,1947,11/2/1946,2,0,NYK,Knicks,...,40.692783,CHS,Stags,63,1300.0000,1309.6521,A,L,0.368899,
4,3,194611020DTF,NBA,0,1947,11/2/1946,1,0,DTF,Falcons,...,38.864048,WSC,Capitols,50,1300.0000,1320.3811,H,L,0.640065,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126309,63155,201506110CLE,NBA,0,2015,6/11/2015,100,1,CLE,Cavaliers,...,60.309792,GSW,Warriors,103,1790.9591,1809.9791,H,L,0.546572,
126310,63156,201506140GSW,NBA,0,2015,6/14/2015,102,1,GSW,Warriors,...,68.013329,CLE,Cavaliers,91,1704.3949,1700.7391,H,W,0.765565,
126311,63156,201506140GSW,NBA,1,2015,6/14/2015,101,1,CLE,Cavaliers,...,60.010067,GSW,Warriors,104,1809.9791,1813.6349,A,L,0.234435,
126312,63157,201506170CLE,NBA,0,2015,6/16/2015,102,1,CLE,Cavaliers,...,59.290245,GSW,Warriors,105,1813.6349,1822.2881,H,L,0.481450,


In [4]:
NBA_data.columns

Index(['gameorder', 'game_id', 'lg_id', '_iscopy', 'year_id', 'date_game',
       'seasongame', 'is_playoffs', 'team_id', 'fran_id', 'pts', 'elo_i',
       'elo_n', 'win_equiv', 'opp_id', 'opp_fran', 'opp_pts', 'opp_elo_i',
       'opp_elo_n', 'game_location', 'game_result', 'forecast', 'notes'],
      dtype='object')

In [5]:
NBA_data['nd1_forecast'] = NBA_data.apply(lambda x: prob_nd1_elo(x['elo_i'], x['opp_elo_i']), axis=1)
NBA_data['results'] = (NBA_data['game_result'] == 'W')*1

In [6]:
NBA_var=np.var(NBA_data['elo_i'])
NBA_mean=np.mean(NBA_data[NBA_data['team_id']=='NYK']['elo_i'])

print(NBA_mean, NBA_var/(126314**.5))

1497.6156040908302 35.382742825274356


In [7]:
NBA_data[['elo_i', 'opp_elo_i', 'forecast', 'nd1_forecast', 'results']]

Unnamed: 0,elo_i,opp_elo_i,forecast,nd1_forecast,results
0,1300.0000,1300.0000,0.640065,0.755437,0
1,1300.0000,1300.0000,0.359935,0.755437,1
2,1300.0000,1306.7233,0.631101,0.741386,1
3,1306.7233,1300.0000,0.368899,0.768109,0
4,1300.0000,1300.0000,0.640065,0.755437,0
...,...,...,...,...,...
126309,1723.4149,1790.9591,0.546572,0.583934,0
126310,1809.9791,1704.3949,0.765565,0.848854,1
126311,1704.3949,1809.9791,0.234435,0.466162,0
126312,1700.7391,1813.6349,0.481450,0.452444,0


In [8]:
regdf1=NBA_data[['game_id','forecast', 'nd1_forecast']]
model1 = smf.ols(formula='nd1_forecast ~ forecast', data=regdf1).fit()
model1.summary()

0,1,2,3
Dep. Variable:,nd1_forecast,R-squared:,0.632
Model:,OLS,Adj. R-squared:,0.632
Method:,Least Squares,F-statistic:,216500.0
Date:,"Mon, 04 Jan 2021",Prob (F-statistic):,0.0
Time:,14:56:37,Log-Likelihood:,58957.0
No. Observations:,126314,AIC:,-117900.0
Df Residuals:,126312,BIC:,-117900.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2095,0.001,194.080,0.000,0.207,0.212
forecast,0.9229,0.002,465.347,0.000,0.919,0.927

0,1,2,3
Omnibus:,25202.167,Durbin-Watson:,2.689
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5358.687
Skew:,-0.138,Prob(JB):,0.0
Kurtosis:,2.03,Cond. No.,5.85


# 538 Model Test Results

In [9]:
regdf2=NBA_data[['game_id','forecast', 'results']]
model2 = smf.ols(formula='forecast ~ results', data=regdf2).fit()
model2.summary()

0,1,2,3
Dep. Variable:,forecast,R-squared:,0.185
Model:,OLS,Adj. R-squared:,0.185
Method:,Least Squares,F-statistic:,28640.0
Date:,"Mon, 04 Jan 2021",Prob (F-statistic):,0.0
Time:,14:56:37,Log-Likelihood:,27686.0
No. Observations:,126314,AIC:,-55370.0
Df Residuals:,126312,BIC:,-55350.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.4075,0.001,526.892,0.000,0.406,0.409
results,0.1851,0.001,169.227,0.000,0.183,0.187

0,1,2,3
Omnibus:,6278.891,Durbin-Watson:,3.0
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2485.259
Skew:,-0.0,Prob(JB):,0.0
Kurtosis:,2.313,Cond. No.,2.62


In [10]:
log_reg1 = sm.Logit(regdf2['results'], regdf2['forecast']).fit() 
log_reg1.summary()

Optimization terminated successfully.
         Current function value: 0.678589
         Iterations 4


0,1,2,3
Dep. Variable:,results,No. Observations:,126314.0
Model:,Logit,Df Residuals:,126313.0
Method:,MLE,Df Model:,0.0
Date:,"Mon, 04 Jan 2021",Pseudo R-squ.:,0.021
Time:,14:56:37,Log-Likelihood:,-85715.0
converged:,True,LL-Null:,-87554.0
Covariance Type:,nonrobust,LLR p-value:,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
forecast,0.6341,0.011,59.958,0.000,0.613,0.655


# ND1 Test Results

In [11]:
regdf3=NBA_data[['game_id','nd1_forecast', 'results']]
model3 = smf.ols(formula='nd1_forecast ~ results', data=regdf3).fit()
model3.summary()

0,1,2,3
Dep. Variable:,nd1_forecast,R-squared:,0.115
Model:,OLS,Adj. R-squared:,0.115
Method:,Least Squares,F-statistic:,16490.0
Date:,"Mon, 04 Jan 2021",Prob (F-statistic):,0.0
Time:,14:56:37,Log-Likelihood:,3641.5
No. Observations:,126314,AIC:,-7279.0
Df Residuals:,126312,BIC:,-7260.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.5860,0.001,626.464,0.000,0.584,0.588
results,0.1699,0.001,128.425,0.000,0.167,0.172

0,1,2,3
Omnibus:,8455.275,Durbin-Watson:,2.87
Prob(Omnibus):,0.0,Jarque-Bera (JB):,9054.413
Skew:,-0.621,Prob(JB):,0.0
Kurtosis:,2.577,Cond. No.,2.62


In [12]:
log_reg2 = sm.Logit(regdf3['results'], regdf3['nd1_forecast']).fit() 
log_reg2.summary()

Optimization terminated successfully.
         Current function value: 0.686087
         Iterations 4


0,1,2,3
Dep. Variable:,results,No. Observations:,126314.0
Model:,Logit,Df Residuals:,126313.0
Method:,MLE,Df Model:,0.0
Date:,"Mon, 04 Jan 2021",Pseudo R-squ.:,0.01019
Time:,14:56:37,Log-Likelihood:,-86662.0
converged:,True,LL-Null:,-87554.0
Covariance Type:,nonrobust,LLR p-value:,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
nd1_forecast,0.3335,0.008,42.032,0.000,0.318,0.349


# Weighted Mix

In [13]:
a=.25
NBA_data['w_forecast']= NBA_data['nd1_forecast']*a+ NBA_data['forecast']*(1-a)

In [14]:
regdf4=NBA_data[['game_id','w_forecast', 'results']]
model4 = smf.ols(formula='w_forecast ~ results', data=regdf4).fit()
model4.summary()

0,1,2,3
Dep. Variable:,w_forecast,R-squared:,0.179
Model:,OLS,Adj. R-squared:,0.179
Method:,Least Squares,F-statistic:,27460.0
Date:,"Mon, 04 Jan 2021",Prob (F-statistic):,0.0
Time:,14:56:37,Log-Likelihood:,27656.0
No. Observations:,126314,AIC:,-55310.0
Df Residuals:,126312,BIC:,-55290.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.4521,0.001,584.486,0.000,0.451,0.454
results,0.1813,0.001,165.719,0.000,0.179,0.183

0,1,2,3
Omnibus:,4908.891,Durbin-Watson:,2.988
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2486.861
Skew:,-0.153,Prob(JB):,0.0
Kurtosis:,2.385,Cond. No.,2.62


In [15]:
log_reg3 = sm.Logit(regdf3['results'], regdf4['w_forecast']).fit() 
log_reg3.summary()

Optimization terminated successfully.
         Current function value: 0.681014
         Iterations 4


0,1,2,3
Dep. Variable:,results,No. Observations:,126314.0
Model:,Logit,Df Residuals:,126313.0
Method:,MLE,Df Model:,0.0
Date:,"Mon, 04 Jan 2021",Pseudo R-squ.:,0.0175
Time:,14:56:37,Log-Likelihood:,-86022.0
converged:,True,LL-Null:,-87554.0
Covariance Type:,nonrobust,LLR p-value:,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
w_forecast,0.5386,0.010,54.873,0.000,0.519,0.558
