# Imports

In [147]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import LinearRegression
from scipy.stats import norm

# Setup

In [148]:
def prob_nd1_elo(elo1, elo2, r=0, var=22, t=1):
    elo1+=100
    if elo2>elo1:
        temp=elo1
        elo1=elo2
        elo2=temp
        return 1-norm.cdf((np.log(elo1/elo2)+(r+var/2)*(t/365)) / ((var*t/365)**.5))
    return norm.cdf((np.log(elo1/elo2)+(r+var/2)*(t/365)) / ((var*t/365)**.5))

In [149]:
NBA_data= pd.read_csv("nbaallelo.csv")
NBA_data

Unnamed: 0,gameorder,game_id,lg_id,_iscopy,year_id,date_game,seasongame,is_playoffs,team_id,fran_id,...,win_equiv,opp_id,opp_fran,opp_pts,opp_elo_i,opp_elo_n,game_location,game_result,forecast,notes
0,1,194611010TRH,NBA,0,1947,11/1/1946,1,0,TRH,Huskies,...,40.294830,NYK,Knicks,68,1300.0000,1306.7233,H,L,0.640065,
1,1,194611010TRH,NBA,1,1947,11/1/1946,1,0,NYK,Knicks,...,41.705170,TRH,Huskies,66,1300.0000,1293.2767,A,W,0.359935,
2,2,194611020CHS,NBA,0,1947,11/2/1946,1,0,CHS,Stags,...,42.012257,NYK,Knicks,47,1306.7233,1297.0712,H,W,0.631101,
3,2,194611020CHS,NBA,1,1947,11/2/1946,2,0,NYK,Knicks,...,40.692783,CHS,Stags,63,1300.0000,1309.6521,A,L,0.368899,
4,3,194611020DTF,NBA,0,1947,11/2/1946,1,0,DTF,Falcons,...,38.864048,WSC,Capitols,50,1300.0000,1320.3811,H,L,0.640065,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126309,63155,201506110CLE,NBA,0,2015,6/11/2015,100,1,CLE,Cavaliers,...,60.309792,GSW,Warriors,103,1790.9591,1809.9791,H,L,0.546572,
126310,63156,201506140GSW,NBA,0,2015,6/14/2015,102,1,GSW,Warriors,...,68.013329,CLE,Cavaliers,91,1704.3949,1700.7391,H,W,0.765565,
126311,63156,201506140GSW,NBA,1,2015,6/14/2015,101,1,CLE,Cavaliers,...,60.010067,GSW,Warriors,104,1809.9791,1813.6349,A,L,0.234435,
126312,63157,201506170CLE,NBA,0,2015,6/16/2015,102,1,CLE,Cavaliers,...,59.290245,GSW,Warriors,105,1813.6349,1822.2881,H,L,0.481450,


In [150]:
NBA_data.columns

Index(['gameorder', 'game_id', 'lg_id', '_iscopy', 'year_id', 'date_game',
       'seasongame', 'is_playoffs', 'team_id', 'fran_id', 'pts', 'elo_i',
       'elo_n', 'win_equiv', 'opp_id', 'opp_fran', 'opp_pts', 'opp_elo_i',
       'opp_elo_n', 'game_location', 'game_result', 'forecast', 'notes'],
      dtype='object')

In [None]:
NBA_data['nd1_forecast'] = NBA_data.apply(lambda x: prob_nd1_elo(x['elo_i'], x['opp_elo_i']), axis=1)
NBA_data['results'] = (NBA_data['game_result'] == 'W')*1

In [None]:
NBA_var=np.var(NBA_data['elo_i'])
NBA_mean=np.mean(NBA_data[NBA_data['team_id']=='NYK']['elo_i'])

print(NBA_mean, NBA_var/(126314**.5))

In [None]:
NBA_data[['elo_i', 'opp_elo_i', 'forecast', 'nd1_forecast', 'results']]

In [None]:
regdf1=NBA_data[['game_id','forecast', 'nd1_forecast']]
model1 = smf.ols(formula='nd1_forecast ~ forecast', data=regdf1).fit()
model1.summary()

# 538 Model Test Results

In [None]:
regdf2=NBA_data[['game_id','forecast', 'results']]
model2 = smf.ols(formula='forecast ~ results', data=regdf2).fit()
model2.summary()

In [None]:
log_reg1 = sm.Logit(regdf2['results'], regdf2['forecast']).fit() 
log_reg1.summary()

# ND1 Test Results

In [None]:
regdf3=NBA_data[['game_id','nd1_forecast', 'results']]
model3 = smf.ols(formula='nd1_forecast ~ results', data=regdf3).fit()
model3.summary()

In [None]:
log_reg2 = sm.Logit(regdf3['results'], regdf3['nd1_forecast']).fit() 
log_reg2.summary()

# Weighted Mix

In [None]:
a=.25
NBA_data['w_forecast']= NBA_data['nd1_forecast']*a+ NBA_data['forecast']*(1-a)

In [None]:
regdf4=NBA_data[['game_id','w_forecast', 'results']]
model4 = smf.ols(formula='w_forecast ~ results', data=regdf4).fit()
model4.summary()

In [None]:
log_reg3 = sm.Logit(regdf3['results'], regdf4['w_forecast']).fit() 
log_reg3.summary()