In [47]:
import pandas as pd
pd.set_option('display.max_rows', 15)
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

In [16]:
data = pd.read_csv('./nba_data.csv')
data.drop('Unnamed: 0', axis=1, inplace=True)
data_train = data[data.Year < 2017]
data_test = data[data.Year == 2017]

In [23]:
data_test.columns

Index(['Team', 'Year', 'Wins', 'Champion', 'MVP', 'Scoring Leader',
       'Rebound Leader', 'Assist Leader', 'WS Leader', 'DPOY', 'MIP', '6MOY',
       'Coach of Year', 'All-Stars', 'All-Defensive', 'All-NBA', 'FG%',
       '3P Attempts', '3P%', '2P Attempts', '2P%', 'FT%', 'RPG', 'APG', 'STL',
       'BLK', 'TOVPG', 'PPG'],
      dtype='object')

In [42]:
model = LogisticRegression()
inputs = data.columns.tolist()
inputs.remove('Team')
inputs.remove('Champion')
inputs.remove('Year')
print(inputs)

model.fit(data_train[inputs], data_train['Champion'])
model.coef_

['Wins', 'MVP', 'Scoring Leader', 'Rebound Leader', 'Assist Leader', 'WS Leader', 'DPOY', 'MIP', '6MOY', 'Coach of Year', 'All-Stars', 'All-Defensive', 'All-NBA', 'FG%', '3P Attempts', '3P%', '2P Attempts', '2P%', 'FT%', 'RPG', 'APG', 'STL', 'BLK', 'TOVPG', 'PPG']


array([[  2.22431763e-01,   2.60804822e-01,   9.40934114e-02,
         -4.59806307e-02,  -1.56373938e-01,   4.43408033e-02,
          2.20786059e-01,  -2.06649499e-02,   8.12684444e-02,
          9.32133433e-02,  -7.61082513e-02,   1.43229923e-01,
          4.21319147e-01,  -2.71053919e-02,  -1.71377393e-03,
         -1.07038608e-01,  -3.26850608e-04,  -2.18814197e-02,
         -7.90860844e-02,   3.36264004e-02,  -4.89398720e-02,
         -2.64006713e-01,  -1.91689018e-01,   1.38098192e-01,
         -1.10787829e-01]])

It looks like having the scoring leader, the sixth man of the year, and coach of the year greatly increase your chance of winning the finals. 

In [52]:
data['log_pred'] = model.predict(data[inputs])

In [53]:
data[data.Champion]

Unnamed: 0,Team,Year,Wins,Champion,MVP,Scoring Leader,Rebound Leader,Assist Leader,WS Leader,DPOY,...,2P%,FT%,RPG,APG,STL,BLK,TOVPG,PPG,rf_pred,log_pred
0,Boston Celtics,1960.0,59.0,True,0.0,0.0,0.0,1.0,0.0,0.0,...,0.417,0.734,80.2,24.7,0.0,0.0,0.0,124.5,True,True
8,Boston Celtics,1961.0,57.0,True,1.0,0.0,0.0,0.0,0.0,0.0,...,0.398,0.735,77.6,23.7,0.0,0.0,0.0,119.7,True,True
16,Boston Celtics,1962.0,60.0,True,1.0,0.0,0.0,0.0,0.0,0.0,...,0.423,0.728,76.0,25.6,0.0,0.0,0.0,121.1,True,True
25,Boston Celtics,1963.0,58.0,True,1.0,0.0,0.0,0.0,0.0,0.0,...,0.427,0.725,72.7,24.5,0.0,0.0,0.0,118.8,True,True
34,Boston Celtics,1964.0,59.0,True,0.0,0.0,1.0,0.0,0.0,0.0,...,0.413,0.725,71.7,22.0,0.0,0.0,0.0,113.0,True,True
43,Boston Celtics,1965.0,62.0,True,1.0,0.0,1.0,0.0,0.0,0.0,...,0.414,0.731,71.9,22.2,0.0,0.0,0.0,112.8,True,True
53,Boston Celtics,1966.0,54.0,True,0.0,0.0,0.0,0.0,0.0,0.0,...,0.417,0.739,69.9,22.4,0.0,0.0,0.0,112.7,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1105,Los Angeles Lakers,2010.0,57.0,True,0.0,0.0,0.0,0.0,0.0,0.0,...,0.492,0.765,44.3,21.1,7.5,4.9,13.4,101.7,True,True
1141,Dallas Mavericks,2011.0,57.0,True,0.0,0.0,0.0,0.0,0.0,1.0,...,0.516,0.777,41.4,23.8,6.8,4.3,14.0,100.2,True,True


In [78]:
model = RandomForestClassifier()

model.fit(data[inputs], data['Champion'])

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

In [79]:
data['rf_pred'] = model.predict_proba(data[inputs])

In [80]:
data

Unnamed: 0,Team,Year,Wins,Champion,MVP,Scoring Leader,Rebound Leader,Assist Leader,WS Leader,DPOY,...,FT%,RPG,APG,STL,BLK,TOVPG,PPG,rf_pred,log_pred,pred_wins
0,Boston Celtics,1960.0,59.0,True,0.0,0.0,0.0,1.0,0.0,0.0,...,0.734,80.2,24.7,0.0,0.0,0.0,124.5,0.3,True,55.3
1,Golden State Warriors,1960.0,49.0,False,1.0,1.0,1.0,0.0,1.0,0.0,...,0.669,78.9,23.9,0.0,0.0,0.0,118.6,0.9,False,44.3
2,Philadelphia 76ers,1960.0,45.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.791,72.1,22.3,0.0,0.0,0.0,118.9,1.0,False,43.0
3,New York Knicks,1960.0,27.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.765,70.0,22.2,0.0,0.0,0.0,117.3,1.0,False,32.4
4,Atlanta Hawks,1960.0,46.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.745,71.2,25.1,0.0,0.0,0.0,113.4,1.0,False,39.8
5,Detroit Pistons,1960.0,30.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.729,73.2,19.6,0.0,0.0,0.0,111.6,0.9,False,27.1
6,Los Angeles Lakers,1960.0,25.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.730,72.4,19.3,0.0,0.0,0.0,107.3,1.0,False,24.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1318,Portland Trail Blazers,2017.0,41.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.780,43.7,21.1,7.0,5.0,13.7,107.9,1.0,False,41.8
1319,Denver Nuggets,2017.0,40.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.774,46.4,25.3,6.9,4.0,14.9,111.7,1.0,False,40.4


In [54]:
data[data.Team == 'Cleveland Cavaliers']

Unnamed: 0,Team,Year,Wins,Champion,MVP,Scoring Leader,Rebound Leader,Assist Leader,WS Leader,DPOY,...,2P%,FT%,RPG,APG,STL,BLK,TOVPG,PPG,rf_pred,log_pred
118,Cleveland Cavaliers,1971.0,15.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.424,0.746,48.6,25.2,0.0,0.0,0.0,102.1,False,False
135,Cleveland Cavaliers,1972.0,23.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.428,0.736,50.0,25.1,0.0,0.0,0.0,105.8,False,False
152,Cleveland Cavaliers,1973.0,32.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.435,0.747,49.5,25.7,0.0,0.0,0.0,102.7,False,False
169,Cleveland Cavaliers,1974.0,29.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.439,0.772,45.9,25.0,7.3,3.6,18.8,100.3,False,False
185,Cleveland Cavaliers,1975.0,40.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.462,0.742,43.4,23.2,7.3,4.2,17.8,99.0,False,False
201,Cleveland Cavaliers,1976.0,49.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.454,0.737,46.1,22.5,7.8,4.8,16.2,101.7,False,False
223,Cleveland Cavaliers,1977.0,43.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.449,0.737,47.3,22.5,7.1,5.8,16.5,102.1,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1124,Cleveland Cavaliers,2011.0,19.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.461,0.745,40.3,21.0,6.6,4.2,14.2,95.5,False,False
1154,Cleveland Cavaliers,2012.0,21.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.446,0.716,42.3,19.8,7.1,4.0,15.4,93.0,False,False


In [55]:
data[data.Team == 'Golden State Warriors']

Unnamed: 0,Team,Year,Wins,Champion,MVP,Scoring Leader,Rebound Leader,Assist Leader,WS Leader,DPOY,...,2P%,FT%,RPG,APG,STL,BLK,TOVPG,PPG,rf_pred,log_pred
1,Golden State Warriors,1960.0,49.0,False,1.0,1.0,1.0,0.0,1.0,0.0,...,0.409,0.669,78.9,23.9,0.0,0.0,0.0,118.6,False,False
9,Golden State Warriors,1961.0,46.0,False,0.0,1.0,1.0,0.0,1.0,0.0,...,0.424,0.651,75.2,24.8,0.0,0.0,0.0,121.0,False,False
17,Golden State Warriors,1962.0,49.0,False,0.0,1.0,1.0,0.0,1.0,0.0,...,0.439,0.686,74.2,25.9,0.0,0.0,0.0,125.4,False,False
32,Golden State Warriors,1963.0,31.0,False,0.0,1.0,1.0,1.0,1.0,0.0,...,0.450,0.669,67.0,23.8,0.0,0.0,0.0,118.5,False,False
38,Golden State Warriors,1964.0,48.0,False,0.0,1.0,0.0,0.0,1.0,0.0,...,0.438,0.638,68.7,23.7,0.0,0.0,0.0,107.7,False,False
51,Golden State Warriors,1965.0,17.0,False,0.0,1.0,0.0,0.0,0.0,0.0,...,0.403,0.640,71.4,20.7,0.0,0.0,0.0,105.8,False,False
59,Golden State Warriors,1966.0,35.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.418,0.739,71.6,23.4,0.0,0.0,0.0,115.5,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1137,Golden State Warriors,2011.0,36.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.484,0.761,40.5,22.5,9.0,5.0,14.6,103.4,False,False
1168,Golden State Warriors,2012.0,23.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.480,0.770,39.2,22.3,8.0,5.5,13.9,97.8,False,False


In [56]:
model.predict_proba(data.loc[1311][inputs])



array([[ 0.9,  0.1]])

In [57]:
model.predict_proba(data.loc[1297][inputs])



array([[ 1.,  0.]])

In [58]:
data.loc[1297][inputs]

Wins                 51
MVP                   0
Scoring Leader        0
Rebound Leader        0
Assist Leader         0
WS Leader             0
DPOY                  0
                  ...  
FT%               0.748
RPG                43.8
APG                22.7
STL                 6.6
BLK                   4
TOVPG              13.7
PPG               110.3
Name: 1297, dtype: object

In [71]:
inputs.remove('Wins')

ValueError: list.remove(x): x not in list

In [66]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()

model.fit(data[inputs], data['Wins'])

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [67]:
data['pred_wins'] = model.predict(data[inputs])

In [68]:
data

Unnamed: 0,Team,Year,Wins,Champion,MVP,Scoring Leader,Rebound Leader,Assist Leader,WS Leader,DPOY,...,FT%,RPG,APG,STL,BLK,TOVPG,PPG,rf_pred,log_pred,pred_wins
0,Boston Celtics,1960.0,59.0,True,0.0,0.0,0.0,1.0,0.0,0.0,...,0.734,80.2,24.7,0.0,0.0,0.0,124.5,True,True,55.3
1,Golden State Warriors,1960.0,49.0,False,1.0,1.0,1.0,0.0,1.0,0.0,...,0.669,78.9,23.9,0.0,0.0,0.0,118.6,False,False,44.3
2,Philadelphia 76ers,1960.0,45.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.791,72.1,22.3,0.0,0.0,0.0,118.9,False,False,43.0
3,New York Knicks,1960.0,27.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.765,70.0,22.2,0.0,0.0,0.0,117.3,False,False,32.4
4,Atlanta Hawks,1960.0,46.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.745,71.2,25.1,0.0,0.0,0.0,113.4,False,False,39.8
5,Detroit Pistons,1960.0,30.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.729,73.2,19.6,0.0,0.0,0.0,111.6,False,False,27.1
6,Los Angeles Lakers,1960.0,25.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.730,72.4,19.3,0.0,0.0,0.0,107.3,False,False,24.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1318,Portland Trail Blazers,2017.0,41.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.780,43.7,21.1,7.0,5.0,13.7,107.9,False,False,41.8
1319,Denver Nuggets,2017.0,40.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.774,46.4,25.3,6.9,4.0,14.9,111.7,False,False,40.4


In [69]:
print(-cross_val_score(model, data[inputs], data['Wins'], cv= 10,
                       scoring='neg_mean_squared_error').mean())

69.9928624402


In [75]:
(data['pred_wins'] - data['Wins']).mean()

-0.053660377358490705

In [76]:
(data['pred_wins'] - data['Wins']).std()

3.322710322551529

In [81]:
data[data.Team == 'Golden State Warriors']

Unnamed: 0,Team,Year,Wins,Champion,MVP,Scoring Leader,Rebound Leader,Assist Leader,WS Leader,DPOY,...,FT%,RPG,APG,STL,BLK,TOVPG,PPG,rf_pred,log_pred,pred_wins
1,Golden State Warriors,1960.0,49.0,False,1.0,1.0,1.0,0.0,1.0,0.0,...,0.669,78.9,23.9,0.0,0.0,0.0,118.6,0.9,False,44.3
9,Golden State Warriors,1961.0,46.0,False,0.0,1.0,1.0,0.0,1.0,0.0,...,0.651,75.2,24.8,0.0,0.0,0.0,121.0,1.0,False,47.5
17,Golden State Warriors,1962.0,49.0,False,0.0,1.0,1.0,0.0,1.0,0.0,...,0.686,74.2,25.9,0.0,0.0,0.0,125.4,0.9,False,49.4
32,Golden State Warriors,1963.0,31.0,False,0.0,1.0,1.0,1.0,1.0,0.0,...,0.669,67.0,23.8,0.0,0.0,0.0,118.5,1.0,False,33.9
38,Golden State Warriors,1964.0,48.0,False,0.0,1.0,0.0,0.0,1.0,0.0,...,0.638,68.7,23.7,0.0,0.0,0.0,107.7,1.0,False,45.9
51,Golden State Warriors,1965.0,17.0,False,0.0,1.0,0.0,0.0,0.0,0.0,...,0.640,71.4,20.7,0.0,0.0,0.0,105.8,0.9,False,25.6
59,Golden State Warriors,1966.0,35.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.739,71.6,23.4,0.0,0.0,0.0,115.5,1.0,False,35.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1137,Golden State Warriors,2011.0,36.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.761,40.5,22.5,9.0,5.0,14.6,103.4,1.0,False,36.4
1168,Golden State Warriors,2012.0,23.0,False,0.0,0.0,0.0,0.0,0.0,0.0,...,0.770,39.2,22.3,8.0,5.5,13.9,97.8,1.0,False,26.6


In [82]:
elo_data = pd.read_csv('./nbaallelo.csv')

In [87]:
elo_data

Unnamed: 0,gameorder,game_id,lg_id,_iscopy,year_id,date_game,seasongame,is_playoffs,team_id,fran_id,...,win_equiv,opp_id,opp_fran,opp_pts,opp_elo_i,opp_elo_n,game_location,game_result,forecast,notes
0,1,194611010TRH,NBA,0,1947,11/1/1946,1,0,TRH,Huskies,...,40.294830,NYK,Knicks,68,1300.0000,1306.7233,H,L,0.640065,
1,1,194611010TRH,NBA,1,1947,11/1/1946,1,0,NYK,Knicks,...,41.705170,TRH,Huskies,66,1300.0000,1293.2767,A,W,0.359935,
2,2,194611020CHS,NBA,0,1947,11/2/1946,1,0,CHS,Stags,...,42.012257,NYK,Knicks,47,1306.7233,1297.0712,H,W,0.631101,
3,2,194611020CHS,NBA,1,1947,11/2/1946,2,0,NYK,Knicks,...,40.692783,CHS,Stags,63,1300.0000,1309.6521,A,L,0.368899,
4,3,194611020DTF,NBA,0,1947,11/2/1946,1,0,DTF,Falcons,...,38.864048,WSC,Capitols,50,1300.0000,1320.3811,H,L,0.640065,
5,3,194611020DTF,NBA,1,1947,11/2/1946,1,0,WSC,Capitols,...,43.135952,DTF,Falcons,33,1300.0000,1279.6189,A,W,0.359935,
6,4,194611020PRO,NBA,1,1947,11/2/1946,1,0,BOS,Celtics,...,40.459381,PRO,Steamrollers,59,1300.0000,1305.1542,A,L,0.359935,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126307,63154,201506090CLE,NBA,0,2015,6/9/2015,99,1,CLE,Cavaliers,...,61.826408,GSW,Warriors,91,1797.5032,1790.9591,H,W,0.527843,
126308,63155,201506110CLE,NBA,1,2015,6/11/2015,101,1,GSW,Warriors,...,67.794983,CLE,Cavaliers,82,1723.4149,1704.3949,A,W,0.453428,
