In [2]:
# Import libraries
import pandas as pd
import xgboost as xgb
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from nba_api.stats.endpoints import leaguegamefinder

In [4]:
# Get NBA dataframe from LeagueGameFinder library
lgf = leaguegamefinder.LeagueGameFinder(date_from_nullable='01/31/2020' , league_id_nullable='00')
games = lgf.get_data_frames()[0]
# Select requite columns
games = games[['TEAM_NAME', 'GAME_ID','GAME_DATE', 'MATCHUP', 'WL' ,'PLUS_MINUS' ]]

In [5]:
# Cast and sort by GAME_DATE to date type
games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])
games = games.sort_values('GAME_DATE')

In [6]:
games

Unnamed: 0,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,PLUS_MINUS
9422,Detroit Pistons,0021900720,2020-01-31,DET vs. TOR,L,-13.0
9409,New Orleans Pelicans,0021900723,2020-01-31,NOP vs. MEM,W,28.0
9410,Milwaukee Bucks,0021900724,2020-01-31,MIL vs. DEN,L,-12.0
9411,Dallas Mavericks,0021900722,2020-01-31,DAL @ HOU,L,-7.0
9412,Toronto Raptors,0021900720,2020-01-31,TOR @ DET,W,13.0
...,...,...,...,...,...,...
11,Sacramento Kings,0022300198,2023-11-15,SAC @ LAL,W,15.0
3,Oklahoma City Thunder,0022300201,2023-11-16,OKC @ GSW,W,19.0
2,Golden State Warriors,0022300201,2023-11-16,GSW vs. OKC,L,-19.0
1,Miami Heat,0022300200,2023-11-16,MIA vs. BKN,W,7.0


In [7]:
# calculate for each team the mean of plus minus score for the last 30 games
# closed = 'left' that mean without counting the current game 
games['avg_30_plus_minus'] = games.groupby('TEAM_NAME')['PLUS_MINUS'].transform(lambda x:  x.rolling(30 , closed='left').mean())

In [8]:
games

Unnamed: 0,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,PLUS_MINUS,avg_30_plus_minus
9422,Detroit Pistons,0021900720,2020-01-31,DET vs. TOR,L,-13.0,
9409,New Orleans Pelicans,0021900723,2020-01-31,NOP vs. MEM,W,28.0,
9410,Milwaukee Bucks,0021900724,2020-01-31,MIL vs. DEN,L,-12.0,
9411,Dallas Mavericks,0021900722,2020-01-31,DAL @ HOU,L,-7.0,
9412,Toronto Raptors,0021900720,2020-01-31,TOR @ DET,W,13.0,
...,...,...,...,...,...,...,...
11,Sacramento Kings,0022300198,2023-11-15,SAC @ LAL,W,15.0,-0.366667
3,Oklahoma City Thunder,0022300201,2023-11-16,OKC @ GSW,W,19.0,1.246667
2,Golden State Warriors,0022300201,2023-11-16,GSW vs. OKC,L,-19.0,1.473333
1,Miami Heat,0022300200,2023-11-16,MIA vs. BKN,W,7.0,-1.133333


In [9]:
msk = games['MATCHUP'].str.contains('@')

In [10]:
games_away = games[msk]
games_home = games[~msk]

In [11]:
games_home

Unnamed: 0,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,PLUS_MINUS,avg_30_plus_minus
9422,Detroit Pistons,0021900720,2020-01-31,DET vs. TOR,L,-13.0,
9409,New Orleans Pelicans,0021900723,2020-01-31,NOP vs. MEM,W,28.0,
9410,Milwaukee Bucks,0021900724,2020-01-31,MIL vs. DEN,L,-12.0,
9414,Phoenix Suns,0021900725,2020-01-31,PHX vs. OKC,L,-4.0,
9421,Houston Rockets,0021900722,2020-01-31,HOU vs. DAL,W,7.0,
...,...,...,...,...,...,...,...
17,Toronto Raptors,0022300195,2023-11-15,TOR vs. MIL,L,-16.0,5.900000
18,Chicago Bulls,0022300196,2023-11-15,CHI vs. ORL,L,-2.0,-1.066667
19,Washington Wizards,0022300192,2023-11-15,WAS vs. DAL,L,-13.0,-3.500000
2,Golden State Warriors,0022300201,2023-11-16,GSW vs. OKC,L,-19.0,1.473333


In [12]:
games_away

Unnamed: 0,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,PLUS_MINUS,avg_30_plus_minus
9411,Dallas Mavericks,0021900722,2020-01-31,DAL @ HOU,L,-7.0,
9412,Toronto Raptors,0021900720,2020-01-31,TOR @ DET,W,13.0,
9413,Denver Nuggets,0021900724,2020-01-31,DEN @ MIL,W,12.0,
9416,Memphis Grizzlies,0021900723,2020-01-31,MEM @ NOP,L,-28.0,
9418,Oklahoma City Thunder,0021900725,2020-01-31,OKC @ PHX,W,4.0,
...,...,...,...,...,...,...,...
13,Minnesota Timberwolves,0022300197,2023-11-15,MIN @ PHX,L,-18.0,4.866667
15,Orlando Magic,0022300196,2023-11-15,ORL @ CHI,W,2.0,1.266667
11,Sacramento Kings,0022300198,2023-11-15,SAC @ LAL,W,15.0,-0.366667
3,Oklahoma City Thunder,0022300201,2023-11-16,OKC @ GSW,W,19.0,1.246667


In [13]:
games_merged = pd.merge(games_home , games_away , on='GAME_ID' , suffixes=('_home' , '_away'))

In [14]:
games_merged

Unnamed: 0,TEAM_NAME_home,GAME_ID,GAME_DATE_home,MATCHUP_home,WL_home,PLUS_MINUS_home,avg_30_plus_minus_home,TEAM_NAME_away,GAME_DATE_away,MATCHUP_away,WL_away,PLUS_MINUS_away,avg_30_plus_minus_away
0,Detroit Pistons,0021900720,2020-01-31,DET vs. TOR,L,-13.0,,Toronto Raptors,2020-01-31,TOR @ DET,W,13.0,
1,New Orleans Pelicans,0021900723,2020-01-31,NOP vs. MEM,W,28.0,,Memphis Grizzlies,2020-01-31,MEM @ NOP,L,-28.0,
2,Milwaukee Bucks,0021900724,2020-01-31,MIL vs. DEN,L,-12.0,,Denver Nuggets,2020-01-31,DEN @ MIL,W,12.0,
3,Phoenix Suns,0021900725,2020-01-31,PHX vs. OKC,L,-4.0,,Oklahoma City Thunder,2020-01-31,OKC @ PHX,W,4.0,
4,Houston Rockets,0021900722,2020-01-31,HOU vs. DAL,W,7.0,,Dallas Mavericks,2020-01-31,DAL @ HOU,L,-7.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4707,Toronto Raptors,0022300195,2023-11-15,TOR vs. MIL,L,-16.0,5.900000,Milwaukee Bucks,2023-11-15,MIL @ TOR,W,16.0,-2.400000
4708,Chicago Bulls,0022300196,2023-11-15,CHI vs. ORL,L,-2.0,-1.066667,Orlando Magic,2023-11-15,ORL @ CHI,W,2.0,1.266667
4709,Washington Wizards,0022300192,2023-11-15,WAS vs. DAL,L,-13.0,-3.500000,Dallas Mavericks,2023-11-15,DAL @ WAS,W,13.0,-0.466667
4710,Golden State Warriors,0022300201,2023-11-16,GSW vs. OKC,L,-19.0,1.473333,Oklahoma City Thunder,2023-11-16,OKC @ GSW,W,19.0,1.246667


In [15]:
games_merged['avg_30_plus_minus_diff'] = games_merged['avg_30_plus_minus_home']-games_merged['avg_30_plus_minus_away']

In [16]:
games_merged

Unnamed: 0,TEAM_NAME_home,GAME_ID,GAME_DATE_home,MATCHUP_home,WL_home,PLUS_MINUS_home,avg_30_plus_minus_home,TEAM_NAME_away,GAME_DATE_away,MATCHUP_away,WL_away,PLUS_MINUS_away,avg_30_plus_minus_away,avg_30_plus_minus_diff
0,Detroit Pistons,0021900720,2020-01-31,DET vs. TOR,L,-13.0,,Toronto Raptors,2020-01-31,TOR @ DET,W,13.0,,
1,New Orleans Pelicans,0021900723,2020-01-31,NOP vs. MEM,W,28.0,,Memphis Grizzlies,2020-01-31,MEM @ NOP,L,-28.0,,
2,Milwaukee Bucks,0021900724,2020-01-31,MIL vs. DEN,L,-12.0,,Denver Nuggets,2020-01-31,DEN @ MIL,W,12.0,,
3,Phoenix Suns,0021900725,2020-01-31,PHX vs. OKC,L,-4.0,,Oklahoma City Thunder,2020-01-31,OKC @ PHX,W,4.0,,
4,Houston Rockets,0021900722,2020-01-31,HOU vs. DAL,W,7.0,,Dallas Mavericks,2020-01-31,DAL @ HOU,L,-7.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4707,Toronto Raptors,0022300195,2023-11-15,TOR vs. MIL,L,-16.0,5.900000,Milwaukee Bucks,2023-11-15,MIL @ TOR,W,16.0,-2.400000,8.300000
4708,Chicago Bulls,0022300196,2023-11-15,CHI vs. ORL,L,-2.0,-1.066667,Orlando Magic,2023-11-15,ORL @ CHI,W,2.0,1.266667,-2.333333
4709,Washington Wizards,0022300192,2023-11-15,WAS vs. DAL,L,-13.0,-3.500000,Dallas Mavericks,2023-11-15,DAL @ WAS,W,13.0,-0.466667,-3.033333
4710,Golden State Warriors,0022300201,2023-11-16,GSW vs. OKC,L,-19.0,1.473333,Oklahoma City Thunder,2023-11-16,OKC @ GSW,W,19.0,1.246667,0.226667


In [17]:
games_model = games_merged[['WL_home' , 'avg_30_plus_minus_diff']].dropna()

In [18]:
games_model

Unnamed: 0,WL_home,avg_30_plus_minus_diff
399,W,1.933333
400,L,-5.366667
401,L,-6.233333
402,W,-4.333333
403,L,-8.066667
...,...,...
4707,L,8.300000
4708,L,-2.333333
4709,L,-3.033333
4710,L,0.226667


In [19]:
games_model['WL_home'] = games_model['WL_home'].map({'W':1 , 'L':0})

In [20]:
games_model

Unnamed: 0,WL_home,avg_30_plus_minus_diff
399,1,1.933333
400,0,-5.366667
401,0,-6.233333
402,1,-4.333333
403,0,-8.066667
...,...,...
4707,0,8.300000
4708,0,-2.333333
4709,0,-3.033333
4710,0,0.226667


In [21]:
df_train , df_test = train_test_split(games_model , stratify=games_model['WL_home'] , test_size=0.3 , random_state=7)

In [22]:
df_train.shape

(2928, 2)

In [23]:
df_test.shape

(1256, 2)

In [24]:
target = 'WL_home'

x_train  =df_train.drop(columns=target)
y_train = df_train[target]

x_test  =df_train.drop(columns=target)
y_test = df_train[target]

In [25]:
y_test

1010    1
2250    0
1941    1
1640    0
1111    1
       ..
2538    1
2488    1
4158    0
724     1
1512    1
Name: WL_home, Length: 2928, dtype: int64

## XGBClassifier

In [26]:
clf = xgb.XGBClassifier(random_state=7)
clf.fit(x_train , y_train)

In [27]:
y_pred = clf.predict(x_test)

In [28]:
accuracy_score(y_test , y_pred)

0.6693989071038251

In [30]:
clf.save_model('XGBClassifier.joblib')



## LogisticRegression

In [45]:
clf = LogisticRegression()
clf.fit(x_train, y_train)

In [46]:
y_pred = clf.predict(x_test)

In [47]:
accuracy_score(y_test , y_pred)

In [None]:
clf.save_model('LogisticRegression.joblib')

## SVC

In [48]:
clf = SVC(probability=True)
clf.fit(x_train, y_train)

In [49]:
y_pred = clf.predict(x_test)

In [50]:
accuracy_score(y_test , y_pred)

0.6308060109289617

In [None]:
clf.save_model('SVC.joblib')

## RandomForestClassifier

In [153]:
clf = RandomForestClassifier()
clf.fit(x_train, y_train)

In [154]:
y_pred = clf.predict(x_test)

In [155]:
accuracy_score(y_test , y_pred)

0.7937158469945356

In [None]:
clf.save_model('RandomForestClassifier.joblib')

# Deploy the Model

In [51]:
from joblib import dump , load
dump(clf , 'model_nba.joblib')

['model_nba.joblib']

In [52]:
model_saved = load('model_nba.joblib')

In [53]:
accuracy_score(y_test , model_saved.predict(x_test))

0.6308060109289617

In [41]:
import numpy as np
def predict_games(team_home, team_away):
    gamefinder = leaguegamefinder.LeagueGameFinder(
        date_from_nullable='01/01/2021',
        league_id_nullable='00')
    games = gamefinder.get_data_frames()[0]
    games = games[['TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'PLUS_MINUS']]
    games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])

    msk_home = (games['TEAM_NAME'] == team_home)
    games_30_home = games[msk_home].sort_values('GAME_DATE').tail(30)
    home_plus_minus = games_30_home['PLUS_MINUS'].mean()

    msk_away = (games['TEAM_NAME'] == team_away)
    games_30_away = games[msk_away].sort_values('GAME_DATE').tail(30)
    away_plus_minus = games_30_away['PLUS_MINUS'].mean()

    games_diff = home_plus_minus - away_plus_minus

    predict_home_win = model_saved.predict(np.array([games_diff]))[0]
    predict_winning_probability = model_saved.predict_proba(np.array([games_diff]))[0][1]
    return predict_home_win, predict_winning_probability

In [42]:
predict_games('Boston Celtics','San Antonio Spurs')

(1, 0.91441864)

In [43]:
predict_games('San Antonio Spurs', 'Boston Celtics')

(0, 0.36161807)

In [44]:
predict_games('San Antonio Spurs', 'Dallas Mavericks')

(0, 0.23543288)

In [45]:
predict_games('San Antonio Spurs', 'Boston Celtics')

(0, 0.36161807)

In [46]:
predict_games('Dallas Mavericks', 'Boston Celtics')

(1, 0.5104026)

In [47]:
predict_games("Chicago Bulls", "Memphis Grizzlies")

(1, 0.6563108)

In [48]:
predict_games("Minnesota Timberwolves", "Memphis Grizzlies")

(1, 0.5569656)