In [1]:
# !pip install nba-api
# !pip install Keras

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
# dependencies

# basic
from datetime import datetime
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas import ExcelFile
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pandas.plotting import scatter_matrix
import plotly.express as px
import requests
import seaborn as sns
import time

# nba api
import nba_api
from nba_api.stats.endpoints import BoxScoreDefensive, BoxScoreMiscV2, BoxScorePlayerTrackV2, BoxScoreUsageV2
from nba_api.stats.static import teams
from nba_api.stats.endpoints import LeagueGameFinder, LeagueGameLog, PlayerGameLog

# sklearn
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, MinMaxScaler, Normalizer, StandardScaler
from sklearn.svm import SVC

# tensorflow
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical

In [4]:
plt.style.use('seaborn')

-------
# csv dataset
-------

In [5]:
# df_final = pd.read_csv('/content/drive/My Drive/mo_bucks_ml/current/gamestatsfinal_version_3.csv')
df_final = pd.read_csv('../resources/gamestatsfinal_version_3.csv')

In [6]:
df_final

Unnamed: 0,AST_PCT,AST_RATIO,AST_TOV,AST,BLK,BLKA,BLK.1,CFGA,CFGM,CFG_PCT,...,TS_TEAM_AVG,UFGA,UFGM,UFG_PCT,USG_PCT,VIDEO_AVAILABLE,WL,YEAR BUILT,ARENA,ATTENDANCE
0,0.214,27.3,3.00,3,0,1,0,2,0,0.000,...,0.480333,5,3,0.600,0.163,1,W,1995,TD Garden,18624
1,0.091,11.1,1.00,2,0,2,0,6,2,0.333,...,0.480333,7,3,0.429,0.229,1,W,1995,TD Garden,18624
2,0.000,0.0,0.00,0,0,1,0,5,1,0.200,...,0.480333,7,3,0.429,0.206,1,W,1995,TD Garden,18624
3,0.095,16.7,0.67,2,4,0,4,3,3,1.000,...,0.480333,4,1,0.250,0.137,1,W,1995,TD Garden,18624
4,0.292,28.0,2.33,7,0,1,0,2,1,0.500,...,0.480333,12,1,0.083,0.247,1,W,1995,TD Garden,18624
5,0.000,0.0,0.00,0,0,0,0,1,1,1.000,...,0.480333,11,6,0.545,0.241,1,W,1995,TD Garden,18624
6,0.000,0.0,0.00,0,0,0,0,0,0,0.000,...,0.480333,0,0,0.000,0.000,1,W,1995,TD Garden,18624
7,0.048,8.3,1.00,1,1,0,1,3,0,0.000,...,0.480333,7,5,0.714,0.167,1,W,1995,TD Garden,18624
8,0.176,30.0,1.50,3,0,0,0,0,0,0.000,...,0.480333,4,2,0.500,0.109,1,W,1995,TD Garden,18624
9,0.250,13.0,3.00,3,0,0,0,5,2,0.400,...,0.480333,12,7,0.583,0.290,1,W,1995,TD Garden,18624


In [7]:
# fix gameid column w/ leading zeros
df_final['GAME_ID'] = df_final['GAME_ID'].map(lambda x: f'{x:0>10}')

In [8]:
df_final.head()

Unnamed: 0,AST_PCT,AST_RATIO,AST_TOV,AST,BLK,BLKA,BLK.1,CFGA,CFGM,CFG_PCT,...,TS_TEAM_AVG,UFGA,UFGM,UFG_PCT,USG_PCT,VIDEO_AVAILABLE,WL,YEAR BUILT,ARENA,ATTENDANCE
0,0.214,27.3,3.0,3,0,1,0,2,0,0.0,...,0.480333,5,3,0.6,0.163,1,W,1995,TD Garden,18624
1,0.091,11.1,1.0,2,0,2,0,6,2,0.333,...,0.480333,7,3,0.429,0.229,1,W,1995,TD Garden,18624
2,0.0,0.0,0.0,0,0,1,0,5,1,0.2,...,0.480333,7,3,0.429,0.206,1,W,1995,TD Garden,18624
3,0.095,16.7,0.67,2,4,0,4,3,3,1.0,...,0.480333,4,1,0.25,0.137,1,W,1995,TD Garden,18624
4,0.292,28.0,2.33,7,0,1,0,2,1,0.5,...,0.480333,12,1,0.083,0.247,1,W,1995,TD Garden,18624


# player selection

In [9]:
# dfplayerawards = pd.read_csv('/content/drive/My Drive/mo_bucks_ml/current/2019_nba_awards.csv')
dfplayerawards = pd.read_csv('../resources/2019_nba_awards.csv')

In [10]:
players_awarded = dfplayerawards['Player'].unique()

In [11]:
df_final_awarded = df_final.loc[df_final['PLAYER_NAME'].isin(players_awarded)]

# feature selection

In [12]:
list(df_final.keys())

['AST_PCT',
 'AST_RATIO',
 'AST_TOV',
 'AST',
 'BLK',
 'BLKA',
 'BLK.1',
 'CFGA',
 'CFGM',
 'CFG_PCT',
 'COMMENT',
 'Capacity',
 'Capacity_pct',
 'City',
 'DEF_RATING',
 'DFGA',
 'DFGM',
 'DFG_PCT',
 'DIST',
 'DRBC',
 'DREB_PCT',
 'DREB',
 'EFG_EVAL',
 'EFG_PCT',
 'EFG_TEAM_AVG',
 'E_DEF_RATING',
 'E_NET_RATING',
 'E_OFF_RATING',
 'E_PACE',
 'E_USG_PCT',
 'FG3A',
 'FG3M',
 'FG3_PCT',
 'FGA',
 'FGM',
 'FG_PCT',
 'FTA',
 'FTAST',
 'FTM',
 'FT_PCT',
 'FT_missed',
 'GAME_DATE',
 'GAME_ID',
 'GAMESCORE',
 'Home/Away',
 'Hometeam',
 'MATCHUP',
 'MATCHUP_AST',
 'MATCHUP_FG3A',
 'MATCHUP_FG3M',
 'MATCHUP_FG3_PCT',
 'MATCHUP_FGA',
 'MATCHUP_FGM',
 'MATCHUP_FG_PCT',
 'MATCHUP_MIN',
 'MATCHUP_TOV',
 'MIN',
 'MIN_FLAT',
 'NET_RATING',
 'OFF_RATING',
 'OPP_PTS_2ND_CHANCE',
 'OPP_PTS_FB',
 'OPP_PTS_OFF_TOV',
 'OPP_PTS_PAINT',
 'ORBC',
 'OREB',
 'OREB_PCT',
 'PACE',
 'PACE_PER40',
 'PARTIAL_POSS',
 'PASS',
 'PCT_AST',
 'PCT_BLK',
 'PCT_BLKA',
 'PCT_DREB',
 'PCT_FG3A',
 'PCT_FG3M',
 'PCT_FGA',
 'PCT_F

In [13]:
features = [
    
#   target
    'WL',
    
#   traditional
    'PTS',
    'AST',
    'PF',
#   'DEF_RATING',
#   'OFF_RATING',
    'FG_PCT',
    'FG3M',
    'FTM',
    'FGM',
    'STL',
    'TOV',
    'Home/Away',
    
#   minutes/usage/possession
    'MIN_FLAT',
    'USG_PCT',
    'POSS',
    
#   point types
    'PTS_2ND_CHANCE',
    'PTS_FB',
    'PTS_OFF_TOV',
    'PTS_PAINT',

#   advanced
    'GAMESCORE',
    'EFG_PCT',
    'TS_PCT',
#   eval => pct above or below team average
    'EFG_EVAL',
    'TS_EVAL',
#   team average
    'EFG_TEAM_AVG',
    'TS_TEAM_AVG',
    
#   granular
    'SPD', # speed
    'DIST', # distance
    'ORBC', # oreb chances
    'DRBC', # dreb chances
    'RBC', # reb chances
    'TCHS', # touches
    'SAST', # secondary assists
    'FTAST', # free throw assists
    'PFD', # personal fouls drawn
    'PASS', # passes
    'CFGM', # contested fgm
    'CFGA', # contested fga
    'CFG_PCT', # contested fg pct
    'UFGM', # uncontested fgm
    'UFGA', # uncontested fga
    'UFG_PCT', # uncontested fg pct
    'DFGM', # defended fgm
    'DFGA', # defended fga
    'DFG_PCT', # defended df pct
    
#   percent of team stats
    'PCT_FGM',
    'PCT_FGA',
    'PCT_FG3M',
    'PCT_FG3A',
    'PCT_FTM',
    'PCT_FTA',
    'PCT_OREB',
    'PCT_DREB',
    'PCT_REB',
    'PCT_AST',
    'PCT_TOV',
    'PCT_STL',
    'PCT_BLK',
    'PCT_BLKA',
    'PCT_PF',
    'PCT_PFD',
    'PCT_PTS',
    
#     arena
#    'Capacity_pct',
#    'Capacity',
    
]

In [14]:
len(features)

61

--------
# svc model
--------

In [15]:
df_svc = df_final[features].copy()

In [16]:
df_svc.head()

Unnamed: 0,WL,PTS,AST,PF,FG_PCT,FG3M,FTM,FGM,STL,TOV,...,PCT_DREB,PCT_REB,PCT_AST,PCT_TOV,PCT_STL,PCT_BLK,PCT_BLKA,PCT_PF,PCT_PFD,PCT_PTS
0,W,8,3,3,0.429,2,0,3,0,1,...,0.077,0.211,0.375,0.167,0.0,0.0,0.5,0.375,0.444,0.19
1,W,12,2,4,0.385,1,1,5,0,2,...,0.083,0.172,0.105,0.286,0.0,0.0,0.667,0.364,0.077,0.171
2,W,10,0,1,0.333,1,1,4,4,0,...,0.143,0.172,0.0,0.0,0.667,0.0,0.5,0.111,0.385,0.192
3,W,9,2,1,0.571,0,1,4,0,3,...,0.138,0.121,0.118,0.3,0.0,1.0,0.0,0.091,0.143,0.134
4,W,7,7,1,0.143,1,2,2,0,3,...,0.154,0.129,0.389,0.333,0.0,0.0,0.25,0.077,0.143,0.103


In [17]:
encoder = LabelEncoder()
encoder.fit(df_svc['WL'])
y = encoder.transform(df_svc.WL)
df_svc['y'] = y

In [18]:
encoder = LabelEncoder()
encoder.fit(df_svc['Home/Away'])
home = encoder.transform(df_svc['Home/Away'])
df_svc['HOME'] = home

In [19]:
df_svc

Unnamed: 0,WL,PTS,AST,PF,FG_PCT,FG3M,FTM,FGM,STL,TOV,...,PCT_AST,PCT_TOV,PCT_STL,PCT_BLK,PCT_BLKA,PCT_PF,PCT_PFD,PCT_PTS,y,HOME
0,W,8,3,3,0.429,2,0,3,0,1,...,0.375,0.167,0.000,0.000,0.500,0.375,0.444,0.190,1,1
1,W,12,2,4,0.385,1,1,5,0,2,...,0.105,0.286,0.000,0.000,0.667,0.364,0.077,0.171,1,1
2,W,10,0,1,0.333,1,1,4,4,0,...,0.000,0.000,0.667,0.000,0.500,0.111,0.385,0.192,1,1
3,W,9,2,1,0.571,0,1,4,0,3,...,0.118,0.300,0.000,1.000,0.000,0.091,0.143,0.134,1,1
4,W,7,7,1,0.143,1,2,2,0,3,...,0.389,0.333,0.000,0.000,0.250,0.077,0.143,0.103,1,1
5,W,16,0,5,0.583,2,0,7,2,1,...,0.000,0.200,0.667,0.000,0.000,0.500,0.143,0.327,1,1
6,W,0,0,0,0.000,0,0,0,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1,1
7,W,11,1,0,0.500,1,0,5,0,1,...,0.143,0.167,0.000,0.333,0.000,0.000,0.111,0.180,1,1
8,W,7,3,2,0.500,2,1,2,0,2,...,0.429,0.200,0.000,0.000,0.000,0.182,0.125,0.152,1,1
9,W,23,3,2,0.529,1,4,9,1,1,...,0.300,0.091,0.250,0.000,0.000,0.167,0.231,0.426,1,1


In [20]:
target = df_svc['y']
data = df_svc.drop(['WL', 'y', 'Home/Away'], axis=1)
feature_names = data.columns

In [21]:
data

Unnamed: 0,PTS,AST,PF,FG_PCT,FG3M,FTM,FGM,STL,TOV,MIN_FLAT,...,PCT_REB,PCT_AST,PCT_TOV,PCT_STL,PCT_BLK,PCT_BLKA,PCT_PF,PCT_PFD,PCT_PTS,HOME
0,8,3,3,0.429,2,0,3,0,1,19,...,0.211,0.375,0.167,0.000,0.000,0.500,0.375,0.444,0.190,1
1,12,2,4,0.385,1,1,5,0,2,28,...,0.172,0.105,0.286,0.000,0.000,0.667,0.364,0.077,0.171,1
2,10,0,1,0.333,1,1,4,4,0,25,...,0.172,0.000,0.000,0.667,0.000,0.500,0.111,0.385,0.192,1
3,9,2,1,0.571,0,1,4,0,3,30,...,0.121,0.118,0.300,0.000,1.000,0.000,0.091,0.143,0.134,1
4,7,7,1,0.143,1,2,2,0,3,29,...,0.129,0.389,0.333,0.000,0.000,0.250,0.077,0.143,0.103,1
5,16,0,5,0.583,2,0,7,2,1,21,...,0.455,0.000,0.200,0.667,0.000,0.000,0.500,0.143,0.327,1
6,0,0,0,0.000,0,0,0,0,0,1,...,0.250,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1
7,11,1,0,0.500,1,0,5,0,1,27,...,0.250,0.143,0.167,0.000,0.333,0.000,0.000,0.111,0.180,1
8,7,3,2,0.500,2,1,2,0,2,25,...,0.091,0.429,0.200,0.000,0.000,0.000,0.182,0.125,0.152,1
9,23,3,2,0.529,1,4,9,1,1,29,...,0.300,0.300,0.091,0.250,0.000,0.000,0.167,0.231,0.426,1


In [22]:
# X_train, X_test, y_train, y_test = train_test_split(data[:5000], target[:5000], random_state=42)

X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [23]:
y_train.head()

13443    1
6311     0
12709    1
14408    1
11633    0
Name: y, dtype: int64

In [24]:
y_test.head()

20252    0
20634    0
516      1
2250     0
5354     1
Name: y, dtype: int64

In [25]:
X_test

Unnamed: 0,PTS,AST,PF,FG_PCT,FG3M,FTM,FGM,STL,TOV,MIN_FLAT,...,PCT_REB,PCT_AST,PCT_TOV,PCT_STL,PCT_BLK,PCT_BLKA,PCT_PF,PCT_PFD,PCT_PTS,HOME
20252,25,3,3,0.643,2,5,9,0,2,37,...,0.100,0.214,0.286,0.000,0.000,0.500,0.136,0.200,0.298,0
20634,7,1,0,0.182,1,2,2,0,2,20,...,0.179,0.250,0.250,0.000,0.000,1.000,0.000,0.375,0.233,1
516,10,2,3,0.500,2,0,4,0,1,23,...,0.042,0.182,0.143,0.000,0.000,0.000,0.231,0.000,0.204,0
2250,19,9,3,0.615,2,1,8,1,7,34,...,0.125,0.450,0.467,0.200,0.000,0.000,0.200,0.059,0.218,1
5354,4,1,0,0.400,0,0,2,0,2,21,...,0.182,0.091,0.400,0.000,0.000,0.333,0.000,0.000,0.078,0
12136,0,0,0,0.000,0,0,0,0,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1
4903,12,1,1,0.800,3,1,4,1,3,17,...,0.333,0.100,0.333,1.000,0.500,0.000,0.250,0.125,0.267,1
2992,14,0,4,0.600,2,0,6,0,3,28,...,0.438,0.000,0.188,0.000,1.000,0.500,0.364,0.154,0.212,0
6239,4,0,0,1.000,0,0,2,0,0,4,...,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0
16791,10,0,1,0.400,0,2,4,0,3,21,...,0.381,0.000,0.429,0.000,0.000,0.200,0.111,0.125,0.270,0


In [26]:
model = SVC(C= 5, 
        gamma= 0.01, 
        kernel= 'rbf',
        verbose=True)

In [27]:
model.fit(X_train, y_train)

[LibSVM]

SVC(C=5, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=True)

In [28]:
predictions_svc = model.predict(X_test)

In [29]:
X_test['predictions_svc'] = predictions_svc
X_test['target'] = target



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [30]:
X_test

Unnamed: 0,PTS,AST,PF,FG_PCT,FG3M,FTM,FGM,STL,TOV,MIN_FLAT,...,PCT_TOV,PCT_STL,PCT_BLK,PCT_BLKA,PCT_PF,PCT_PFD,PCT_PTS,HOME,predictions_svc,target
20252,25,3,3,0.643,2,5,9,0,2,37,...,0.286,0.000,0.000,0.500,0.136,0.200,0.298,0,1,0
20634,7,1,0,0.182,1,2,2,0,2,20,...,0.250,0.000,0.000,1.000,0.000,0.375,0.233,1,0,0
516,10,2,3,0.500,2,0,4,0,1,23,...,0.143,0.000,0.000,0.000,0.231,0.000,0.204,0,1,1
2250,19,9,3,0.615,2,1,8,1,7,34,...,0.467,0.200,0.000,0.000,0.200,0.059,0.218,1,0,0
5354,4,1,0,0.400,0,0,2,0,2,21,...,0.400,0.000,0.000,0.333,0.000,0.000,0.078,0,1,1
12136,0,0,0,0.000,0,0,0,0,0,0,...,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1,1,1
4903,12,1,1,0.800,3,1,4,1,3,17,...,0.333,1.000,0.500,0.000,0.250,0.125,0.267,1,0,0
2992,14,0,4,0.600,2,0,6,0,3,28,...,0.188,0.000,1.000,0.500,0.364,0.154,0.212,0,0,1
6239,4,0,0,1.000,0,0,2,0,0,4,...,0.000,0.000,0.000,0.000,0.000,0.000,1.000,0,0,1
16791,10,0,1,0.400,0,2,4,0,3,21,...,0.429,0.000,0.000,0.200,0.111,0.125,0.270,0,1,0


In [31]:
print(classification_report(y_test, predictions_svc, target_names=['Loss','Win']))

              precision    recall  f1-score   support

        Loss       0.57      0.58      0.57      2725
         Win       0.60      0.59      0.59      2891

    accuracy                           0.58      5616
   macro avg       0.58      0.58      0.58      5616
weighted avg       0.58      0.58      0.58      5616



In [32]:
list(features)

['WL',
 'PTS',
 'AST',
 'PF',
 'FG_PCT',
 'FG3M',
 'FTM',
 'FGM',
 'STL',
 'TOV',
 'Home/Away',
 'MIN_FLAT',
 'USG_PCT',
 'POSS',
 'PTS_2ND_CHANCE',
 'PTS_FB',
 'PTS_OFF_TOV',
 'PTS_PAINT',
 'GAMESCORE',
 'EFG_PCT',
 'TS_PCT',
 'EFG_EVAL',
 'TS_EVAL',
 'EFG_TEAM_AVG',
 'TS_TEAM_AVG',
 'SPD',
 'DIST',
 'ORBC',
 'DRBC',
 'RBC',
 'TCHS',
 'SAST',
 'FTAST',
 'PFD',
 'PASS',
 'CFGM',
 'CFGA',
 'CFG_PCT',
 'UFGM',
 'UFGA',
 'UFG_PCT',
 'DFGM',
 'DFGA',
 'DFG_PCT',
 'PCT_FGM',
 'PCT_FGA',
 'PCT_FG3M',
 'PCT_FG3A',
 'PCT_FTM',
 'PCT_FTA',
 'PCT_OREB',
 'PCT_DREB',
 'PCT_REB',
 'PCT_AST',
 'PCT_TOV',
 'PCT_STL',
 'PCT_BLK',
 'PCT_BLKA',
 'PCT_PF',
 'PCT_PFD',
 'PCT_PTS']

In [33]:
dfpred = pd.concat([df_final, X_test], axis=1)

In [34]:
dfplayerpred = dfpred[['PLAYER_NAME', 'TEAM_NICKNAME', 'MATCHUP', 'predictions_svc', 'target', 'WL']].copy().dropna()

In [35]:
dfplayerpred

Unnamed: 0,PLAYER_NAME,TEAM_NICKNAME,MATCHUP,predictions_svc,target,WL
3,Al Horford,Celtics,BOS vs. PHI,1.0,1.0,W
6,Semi Ojeleye,Celtics,BOS vs. PHI,1.0,1.0,W
17,T.J. McConnell,76ers,PHI @ BOS,0.0,0.0,L
19,Dario Saric,76ers,PHI @ BOS,0.0,0.0,L
31,Klay Thompson,Warriors,GSW vs. OKC,0.0,1.0,W
34,Terrance Ferguson,Thunder,OKC @ GSW,1.0,0.0,L
35,Paul George,Thunder,OKC @ GSW,0.0,0.0,L
41,Eric Bledsoe,Bucks,MIL @ CHA,0.0,1.0,W
42,Malcolm Brogdon,Bucks,MIL @ CHA,1.0,1.0,W
44,Donte DiVincenzo,Bucks,MIL @ CHA,1.0,1.0,W


In [36]:
dfplayerpred['correct_svc'] = 0

In [37]:
for index, row in dfplayerpred.iterrows():
    
    if row['predictions_svc'] == row['target']:
        dfplayerpred.loc[index, 'correct_svc'] = 'correct'
    if row['predictions_svc'] != row['target']:
        dfplayerpred.loc[index, 'correct_svc'] = 'wrong'

In [38]:
dfplayerpred.loc[(dfplayerpred['correct_svc'] == 'correct')]

Unnamed: 0,PLAYER_NAME,TEAM_NICKNAME,MATCHUP,predictions_svc,target,WL,correct_svc
3,Al Horford,Celtics,BOS vs. PHI,1.0,1.0,W,correct
6,Semi Ojeleye,Celtics,BOS vs. PHI,1.0,1.0,W,correct
17,T.J. McConnell,76ers,PHI @ BOS,0.0,0.0,L,correct
19,Dario Saric,76ers,PHI @ BOS,0.0,0.0,L,correct
35,Paul George,Thunder,OKC @ GSW,0.0,0.0,L,correct
42,Malcolm Brogdon,Bucks,MIL @ CHA,1.0,1.0,W,correct
44,Donte DiVincenzo,Bucks,MIL @ CHA,1.0,1.0,W,correct
46,Ersan Ilyasova,Bucks,MIL @ CHA,1.0,1.0,W,correct
47,Brook Lopez,Bucks,MIL @ CHA,1.0,1.0,W,correct
72,Blake Griffin,Pistons,DET vs. BKN,1.0,1.0,W,correct


## grid

In [39]:
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [40]:
model = SVC()

# param_grid = {'C': [0.001, 0.1, 1, 5, 10, 100],
#               'gamma': [0.001, 0.01, 0.1, 1, 10, 100],
#               'kernel':['rbf']}

param_grid = {'C': [0.1, 1, 5],
              'gamma': [0.001, 0.01, 0.1, 1],
              'kernel':['rbf']}

grid = GridSearchCV(model, param_grid, verbose=3)

In [None]:
grid.fit(X_train, y_train)



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................
[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.580, total=  12.9s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   12.9s remaining:    0.0s


[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.583, total=  13.3s
[CV] C=0.1, gamma=0.001, kernel=rbf ..................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   26.3s remaining:    0.0s


[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.583, total=  13.6s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.552, total=  15.7s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.547, total=  13.2s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.549, total=  14.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.505, total=  14.5s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.505, total=  15.6s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.505, total=  16.4s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .

In [None]:
grid.best_params_

In [None]:
grid.best_score_

In [None]:
predictions_grid = grid.predict(X_test)

In [None]:
predictions_grid

In [None]:
X_test['predictions_grid'] = predictions_grid
X_test['target'] = target

In [None]:
print(classification_report(y_test, predictions_grid,
                            target_names=["loss", "win"]))

In [None]:
matrix = confusion_matrix(y_test, predictions_grid)
sns.heatmap(matrix.T, annot=True, fmt='d', cbar=False,
            xticklabels=['Loss', 'Win'],
            yticklabels=['Loss', 'Win'])
plt.xlabel('target label')
plt.ylabel('predicted label');

In [None]:
matrix.T

In [None]:
dfpred = pd.concat([df_final, X_test], axis=1)

In [None]:
dfpred

In [None]:
dfplayerpred = dfpred[['PLAYER_NAME', 'TEAM_NICKNAME', 'MATCHUP', 'predictions_grid', 'target', 'WL']].copy().dropna()

In [None]:
dfplayerpred

In [None]:
dfplayerpred['correct_grid'] = 0
dfplayerpred['wrong_grid'] = 0

In [None]:
for index, row in dfplayerpred.iterrows():
    
    if row[3] == row['target']:
        dfplayerpred.loc[index, 'correct_grid'] = 1
        
    if row[3] != row['target']:
        dfplayerpred.loc[index, 'wrong_grid'] = 1

    print(index, row[6])

In [None]:
dfplayerpred

In [None]:
groupbygrid = dfplayerpred.groupby(by='PLAYER_NAME').sum()[['correct_grid', 'wrong_grid']].copy()

In [None]:
df = pd.DataFrame(groupbygrid)
df['differential_grid'] = 0
df['correct_grid_pct'] = 0
df['wrong_grid_pct'] = 0

In [None]:
for index, row in df.iterrows():
    differential = (row['correct_grid'] - row['wrong_grid']) / (row['correct_grid'] + row['wrong_grid'])
    df.loc[index, 'differential_grid'] = differential
    
    correct_pct = (row['correct_grid']) / (row['correct_grid'] + row['wrong_grid'])
    df.loc[index, 'correct_grid_pct'] = correct_pct

    wrong_pct = (row['wrong_grid']) / (row['correct_grid'] + row['wrong_grid'])
    df.loc[index, 'wrong_grid_pct'] = wrong_pct


In [None]:
df.sort_values(by='correct_grid_pct').tail(20)

In [None]:
df.sort_values(by='wrong_grid_pct').head(20)

----------
# neural net
----------

In [None]:
df_nnet = df_final[features]

In [None]:
data = df_nnet.copy()

In [None]:
data.head()

In [None]:
encoder = LabelEncoder()
encoder.fit(data['WL'])
encoder.classes_
y = encoder.transform(data.WL)
y

In [None]:
encoder = LabelEncoder()
encoder.fit(data['Home/Away'])
home = encoder.transform(data['Home/Away'])
data['HOME'] = home

In [None]:
data.drop(columns=['WL','Home/Away'], inplace=True)
# X = data.values
X = data
X

In [None]:
len(y), len(X)

In [None]:
y_categorical = to_categorical(y)
y_categorical

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, random_state=42)

In [None]:
X_scaler = MinMaxScaler().fit(X_train)
y_scaler = MinMaxScaler().fit(y_train)

# X_scaler = StandardScaler().fit(X_train)
# y_scaler = StandardScaler().fit(y_train)

In [None]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

In [None]:
X_train_scaled[0]

In [None]:
y_train_scaled[0]

In [None]:
X_train_scaled.shape, y_train_scaled.shape

In [None]:
model = Sequential(name='NBA_Model')

# input layer
model.add(Dense(units=50, activation='relu', input_dim=X_train_scaled.shape[1], kernel_initializer='he_normal'))

# hidden layers
model.add(Dense(units=100, activation='relu'))
# model.add(Dense(units=100, activation='relu'))
# model.add(Dense(units=75, activation='relu'))
model.add(Dense(units=50, activation='relu'))
model.add(Dense(units=25, activation='relu'))

# output layer
# model.add(Dense(units=y_train.shape[1], activation='sigmoid'))
model.add(Dense(units=y_train.shape[1], activation='softmax'))

In [None]:
model.summary()

In [None]:
optimizer_sgd = SGD(lr = 0.01, 
                    momentum = 0.9)

In [None]:
model.compile(
#     optimizer='adam',
    optimizer=optimizer_sgd,
    loss='binary_crossentropy',
#     loss='categorical_crossentropy',
    # loss='categorical_hinge',
    metrics=['accuracy']
)

In [None]:
model.fit(
    X_train_scaled,
    y_train_scaled,
    epochs=50,
    shuffle=True,
    verbose=2,
    validation_data=[X_test_scaled, y_test_scaled]
)

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_scaled, verbose=2)
print(f"Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
pd.DataFrame(model.history.history).plot(alpha=.5)
plt.gca().set_ylim(0,1)
plt.show()

In [None]:
predictions_nnet = model.predict(X_test_scaled)
predictions_nnet

In [None]:
# add predictions to df here and merge with grid pred df
X_test['nnet_pred_loss_0'] = predictions_nnet[:,0]
X_test['nnet_pred_win_1'] = predictions_nnet[:,1]
X_test['WL_TARGET'] = y_test[:,1]

In [None]:
X_test

In [None]:
dfprednnet = pd.concat(
    [df_final, X_test], axis=1)[['PLAYER_NAME', 
                                         'TEAM_NICKNAME', 
                                         'MATCHUP',
                                         'GAME_DATE',
                                         'nnet_pred_loss_0', 
                                         'nnet_pred_win_1', 
                                         'WL_TARGET', 
                                         'WL']].copy().dropna()

In [None]:
dfprednnet

In [None]:
dfprednnet['nnet_eval'] = 0
dfprednnet['nnet_total'] = 1

In [None]:
dfprednnet

In [None]:
for index, row in dfprednnet.iterrows():
    
    if dfprednnet.loc[index, 'nnet_pred_loss_0'] > 0.5 and row['WL_TARGET'] == 0.0:
        dfprednnet.loc[index, 'nnet_eval'] = 1
    
    if dfprednnet.loc[index, 'nnet_pred_win_1'] > 0.5 and row['WL_TARGET'] == 1.0:
        dfprednnet.loc[index, 'nnet_eval'] = 1


In [None]:
dfprednnet

In [None]:
dfprednnet['nnet_eval'].value_counts() / len(dfprednnet['nnet_eval'])

In [None]:
len(dfprednnet['nnet_eval'])

In [None]:
groupbynnet = dfprednnet.groupby(by='PLAYER_NAME').sum()[['nnet_eval','nnet_total']].copy()

In [None]:
groupbynnet

In [None]:
dfnnet = pd.DataFrame(groupbynnet)
dfnnet['differential_nnet'] = 0
dfnnet['nnet_wrong'] = 0
dfnnet['nnet_wrong_pct'] = 0
dfnnet['nnet_correct_pct'] = 0

for index, row in dfnnet.iterrows():
    
    total_nnet = row['nnet_total']
    
    correct_nnet = row['nnet_eval']
    
    wrong_nnet = total_nnet - correct_nnet
    dfnnet.loc[index, 'nnet_wrong'] = wrong_nnet
    
    differential_nnet = (correct_nnet - wrong_nnet) / total_nnet
    dfnnet.loc[index, 'differential_nnet'] = differential_nnet
    
    correct_pct = correct_nnet / total_nnet
    dfnnet.loc[index, 'nnet_correct_pct'] = correct_pct

    wrong_pct = wrong_nnet / total_nnet
    dfnnet.loc[index, 'nnet_wrong_pct'] = wrong_pct

In [None]:
# dfnnet.sort_values(by='differential_nnet').head(20)
dfnnet

## predictions df

In [None]:
df

In [None]:
dfnnet

In [None]:
df_prediction_eval = pd.concat([df, dfnnet], axis=1)[['nnet_correct_pct',
                                                      'nnet_wrong_pct',
                                                      'correct_grid_pct',
                                                      'wrong_grid_pct']]

In [None]:
df_prediction_eval

In [None]:
data = df_prediction_eval[['nnet_correct_pct', 
                           'nnet_wrong_pct',
                           'correct_grid_pct',
                           'wrong_grid_pct']].sort_values(by='nnet_correct_pct', ascending=False)[:]

# cannot match order of both subplots properly with name col instead of indexed
# data = df_prediction_eval[['nnet_correct_pct', 
#                            'nnet_wrong_pct',
#                            'correct_grid_pct',
#                            'wrong_grid_pct',
#                           'PLAYER_NAME',
#                           ]]

# data.sort_values(by='PLAYER_NAME', ascending=False, inplace=True)


# create subplot
fig = make_subplots(
    rows=1, cols=2,
#     subplot_titles=('Neural Net', 'SVM'),
    shared_yaxes=True,
    shared_xaxes=True
)

# correct nnet trace
fig.add_trace(go.Bar(
        y=data.index.values,
#         y=data['PLAYER_NAME'],
        x=data['nnet_correct_pct'],
        name='Net Correct %',
        marker_color='teal',
        orientation='h',
        hovertext=data['nnet_correct_pct']),
    row=1, col=1,
)

# wrong nnet trace
fig.add_trace(go.Bar(
        y=data.index.values,
#         y=data['PLAYER_NAME'],
        x=data['nnet_wrong_pct'],
        name='Net: Wrong %',
        marker_color='goldenrod',
        orientation='h',
        hovertext=data['nnet_wrong_pct']),
    row=1, col=1,
)

# correct svm trace
fig.add_trace(go.Bar(
        y=data.index.values,
        x=data['correct_grid_pct'],
        name='SVM: Correct %',
        marker_color='teal',
        orientation='h',
#         hovertext=data['PLAYER_NAME']
    ),
    row=1, col=2,
)

# wrong svm trace
fig.add_trace(go.Bar(
        y=data.index.values,
        x=data['wrong_grid_pct'],
        name='SVM: Wrong %',
        marker_color='goldenrod',
        orientation='h',
#         hovertext=data['PLAYER_NAME']
    ),
    row=1, col=2,
)

# axes, title, etc.
fig.update_layout(
    xaxis=dict(
        showgrid=True,
        showline=True,
        showticklabels=True,
        zeroline=False,
#         domain=[0, 1],
#         title_text='Net',
        titlefont=dict(size=20),
    ),
    
    xaxis2=dict(
        showgrid=True,
        showline=True,
        showticklabels=True,
        zeroline=False,
#         domain=[0, 1],
#         title_text='SVM',
        titlefont=dict(size=20),
    ),
    
    yaxis=dict(
        showgrid=True,
        showline=True,
        showticklabels=True,
        zeroline=True,
#         title_text='Players',
        ticktext=data.index.values,
        tickmode='array',
        titlefont=dict(size=20),
    ),
    title={
        'text': 'Win/Loss Predictions (NBA Awards)',
        'y':.995,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font':{
            'size':30
        }
    },
    
    annotations=[
        go.layout.Annotation(
            x=.5,
            y=-.0125,
            showarrow=False,
            text='Prediction Accuracy per Player',
            xref="paper",
            yref="paper",
            font=dict(
                size=20,
            )
        ),
        
        go.layout.Annotation(
            x=.15,
            y=1.005,
            showarrow=False,
            text='Neural Net',
            xref="paper",
            yref="paper",
            font=dict(
                size=18,
            )
        ),
        
        go.layout.Annotation(
            x=.80,
            y=1.005,
            showarrow=False,
            text='SVM',
            xref="paper",
            yref="paper",
            font=dict(
                size=18,
            )
        ),
    ],
    barmode='stack',
    xaxis_tickformat='%',
    xaxis2_tickformat='%',
    paper_bgcolor='rgb(248, 248, 248)',
    plot_bgcolor='rgb(248, 248, 248)',
    margin=dict(l=75, r=75, t=100, b=75),
    showlegend=True,
    autosize=False,
    width=900, height=5000,
)

fig.show()

In [None]:
print('Neural Net Worst:')
df_prediction_eval.sort_values(by='nnet_correct_pct')

In [None]:
print('SVM Worst:')
df_prediction_eval.sort_values(by='correct_grid_pct')

In [None]:
print('Neural Net Best:')
df_prediction_eval.sort_values(by='nnet_correct_pct', ascending=False)

In [None]:
print('SVM Best:')
df_prediction_eval.sort_values(by='correct_grid_pct', ascending=False)

# random forest

In [None]:
df_forest = df_final[features].copy()

In [None]:
target = df_forest["WL"]
target_names = ["W", "L"]

In [None]:
encoder = LabelEncoder()
encoder.fit(df_forest['Home/Away'])
home = encoder.transform(df_forest['Home/Away'])
df_forest['HOME'] = home

In [None]:
data = df_forest.drop(["WL",'Home/Away'], axis=1)
feature_names = data.columns
data.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data, target, random_state=42)

In [None]:
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, y_train)
clf.score(X_test, y_test)

In [None]:
rf = RandomForestClassifier(n_estimators=200)
rf = rf.fit(X_train, y_train)
rf.score(X_test, y_test)

In [None]:
features = sorted(zip(rf.feature_importances_, feature_names), reverse=True)

In [None]:
df_feature_importance = pd.DataFrame(features)
df_feature_importance.rename(columns={0:'pct', 1:'feature'}, inplace=True)
df_feature_importance.sort_values(by='pct', ascending=False, inplace=True)
df_feature_importance

In [None]:
names = df_feature_importance['feature'][:20]

values = df_feature_importance['pct'][:20]

fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(1,1,1)
yvals = range(len(names))
ax.barh(yvals, values, align='center', alpha=0.4)
ax.set_xlim((0,.06))
plt.yticks(yvals,names)
plt.title('RF Feature Importance: Win/Loss Classification (Top 20)')
plt.tight_layout()

plt.savefig('feature_importance_wl_top20.png', dpi=600)

plt.show()

In [None]:
names = df_feature_importance['feature'].tail(20)

values = df_feature_importance['pct'].tail(20)

fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(1,1,1)
yvals = range(len(names))
ax.barh(yvals, values, align='center', alpha=0.4)
ax.set_xlim((0,.06))
plt.yticks(yvals,names)
plt.title('RF Feature Importance: Win/Loss Classification (Bottom 20)')
plt.tight_layout()

plt.savefig('feature_importance_wl_bottom20.png', dpi=600)

plt.show()