In [55]:
#%pip install nba_api
#!pip install "fastapi[all]"
#!pip install uvicorn[standard]

In [56]:
from nba_api.stats.endpoints import leaguegamefinder
import pandas as pd
import numpy as np

In [57]:
from fastapi import FastAPI

In [58]:
from nba_api.stats.endpoints import leaguegamefinder
gamefinder = leaguegamefinder.LeagueGameFinder(date_from_nullable='01/31/2022', league_id_nullable='00')
games = gamefinder.get_data_frames()[0]
games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22022,1610612760,OKC,Oklahoma City Thunder,22200611,2023-01-10,OKC @ MIA,L,241,111,...,0.667,14,30,44,27,8,3,18,27,-1.0
1,22022,1610612744,GSW,Golden State Warriors,22200615,2023-01-10,GSW vs. PHX,L,241,113,...,0.778,16,29,45,24,13,2,15,26,-12.0
2,22022,1610612739,CLE,Cleveland Cavaliers,22200614,2023-01-10,CLE @ UTA,L,240,114,...,0.833,7,30,37,18,7,7,5,25,-2.0
3,22022,1610612766,CHA,Charlotte Hornets,22200613,2023-01-10,CHA @ TOR,L,240,120,...,0.733,6,28,34,33,6,4,9,19,-12.0
4,22022,1610612748,MIA,Miami Heat,22200611,2023-01-10,MIA vs. OKC,W,239,112,...,1.0,13,28,41,18,10,4,19,19,1.0


In [59]:
games = games[['TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'PLUS_MINUS']]

In [60]:
games.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2530 entries, 0 to 2529
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   TEAM_NAME   2530 non-null   object 
 1   GAME_ID     2530 non-null   object 
 2   GAME_DATE   2530 non-null   object 
 3   MATCHUP     2530 non-null   object 
 4   WL          2530 non-null   object 
 5   PLUS_MINUS  2530 non-null   float64
dtypes: float64(1), object(5)
memory usage: 118.7+ KB


In [61]:
#convert game date to app. format
import pandas as pd
games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])

In [62]:
#feature engineering to create a new variable for last 30 game performance
games['avg_30_plus_minus'] = games.groupby('TEAM_NAME')['PLUS_MINUS'].transform(lambda x: x.rolling(30, closed='left').mean())

In [63]:
#Toronto Raptors games
games[games['TEAM_NAME']=='Toronto Raptors'].head(35)

Unnamed: 0,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,PLUS_MINUS,avg_30_plus_minus
10,Toronto Raptors,22200613,2023-01-10,TOR vs. CHA,W,12.0,
41,Toronto Raptors,22200597,2023-01-08,TOR vs. POR,W,12.0,
61,Toronto Raptors,22200582,2023-01-06,TOR vs. NYK,L,-4.0,
102,Toronto Raptors,22200569,2023-01-04,TOR vs. MIL,L,-3.0,
126,Toronto Raptors,22200553,2023-01-02,TOR @ IND,L,-8.0,
174,Toronto Raptors,22200531,2022-12-30,TOR vs. PHX,W,12.0,
183,Toronto Raptors,22200526,2022-12-29,TOR vs. MEM,L,-13.0,
215,Toronto Raptors,22200509,2022-12-27,TOR vs. LAC,L,-11.0,
264,Toronto Raptors,22200484,2022-12-23,TOR @ CLE,W,11.0,
299,Toronto Raptors,22200471,2022-12-21,TOR @ NYK,W,7.0,


In [64]:
#feature engeering to create variable for home & away games
msk = games['MATCHUP'].str.contains('@')
games_away = games[msk]
games_home = games[~msk]

In [65]:
#merging duplicate entries 
games_merged = pd.merge(games_home, games_away, on='GAME_ID', suffixes=('_home', '_away'))

In [66]:
#feature engineering to create a new variable
games_merged['avg_30_plus_minus_diff'] = games_merged['avg_30_plus_minus_home'] - games_merged['avg_30_plus_minus_away']

In [67]:
games_merged[['WL_home', 'avg_30_plus_minus_diff']]

Unnamed: 0,WL_home,avg_30_plus_minus_diff
0,L,
1,W,
2,W,
3,L,
4,W,
...,...,...
1260,W,7.900000
1261,L,-8.500000
1262,W,-8.733333
1263,W,-6.300000


In [68]:
#mapping 1 and 0 for the variables
games_model = games_merged[['WL_home', 'avg_30_plus_minus_diff']].dropna()
games_model['WL_home'] = games_model['WL_home'].map({'W': 1, 'L': 0})

## Developing hypertuned prediction model

In [69]:
#importing library and creating train-test split
from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(games_model, stratify=games_model['WL_home'], test_size=0.2, random_state=7)

In [70]:
#assigning values in train & test dataset
target = 'WL_home'
X_train = df_train.drop(columns=target)
y_train = df_train[target]

X_test = df_test.drop(columns=target)
y_test= df_test[target]

In [71]:
#using Xgboost for calcuation
import xgboost as xgb
clf = xgb.XGBClassifier(use_label_encoder=False, random_state=7)
clf.fit(X_train, y_train)

In [72]:
#predicting values based on test values
from sklearn.metrics import accuracy_score
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.559748427672956

In [73]:
#hyperparam tuning to improve performance
from sklearn.model_selection import RandomizedSearchCV
from sklearn.utils.fixes import loguniform

hyp_params = {'learning_rate': loguniform(0.0001, 1),
              'max_depth': [2, 3, 4, 5, 6, 7, 8, 9],
              'subsample': [0.7, 0.8, 0.9, 1.0],
              'n_estimators': [50, 100, 150, 200]}


random_hyp = RandomizedSearchCV(estimator=clf, 
                                param_distributions=hyp_params, 
                                n_iter=20, 
                                cv=7,
                                scoring='accuracy',
                                random_state=7)

In [74]:
#fiting the model on test data
random_hyp.fit(X_train, y_train)

In [75]:
#finding best params
random_hyp.best_params_

{'learning_rate': 0.025246179740589205,
 'max_depth': 2,
 'n_estimators': 150,
 'subsample': 0.7}

In [76]:
#fitting the new model on test data
model_hyp = random_hyp.best_estimator_
y_pred_hyp = model_hyp.predict(X_test)
accuracy_score(y_test, y_pred_hyp)

0.5408805031446541

## Saving the predictive model 

In [77]:
#creating model file
from joblib import dump, load
dump(model_hyp, 'model_nba.joblib') 
model_saved = load('model_nba.joblib') 

In [78]:
#checking the accuracy
accuracy_score(y_test, model_saved.predict(X_test))

0.5408805031446541

In [79]:
team_home='Toronto Raptors'
team_away='Brooklyn Net'

import numpy as np
gamefinder = leaguegamefinder.LeagueGameFinder(date_from_nullable='01/01/2021',
                                           league_id_nullable='00')
games = gamefinder.get_data_frames()[0]
games = games[['TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'PLUS_MINUS']]
games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])

msk_home = (games['TEAM_NAME'] == team_home)
games_30_home = games[msk_home].sort_values('GAME_DATE').tail(30)
home_plus_minus = games_30_home['PLUS_MINUS'].mean()

msk_away = (games['TEAM_NAME'] == team_away)
games_30_away = games[msk_away].sort_values('GAME_DATE').tail(30)
away_plus_minus = games_30_away['PLUS_MINUS'].mean()

games_diff=home_plus_minus - away_plus_minus

predict_home_win=model_saved.predict(np.array([games_diff]))[0]
predict_winning_probability=model_saved.predict_proba(np.array([games_diff]))[0][1]

In [80]:
#creating a function to intake team names & predicting the win probability
## 1 is home team win, 0 is away team win. 
## 

def predict_games(team_home, team_away):
    gamefinder = leaguegamefinder.LeagueGameFinder(
        date_from_nullable='01/01/2021',
        league_id_nullable='00')
    games = gamefinder.get_data_frames()[0]
    games = games[
        ['TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'PLUS_MINUS']]
    games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])

    msk_home = (games['TEAM_NAME'] == team_home)
    games_30_home = games[msk_home].sort_values('GAME_DATE').tail(30)
    home_plus_minus = games_30_home['PLUS_MINUS'].mean()

    msk_away = (games['TEAM_NAME'] == team_away)
    games_30_away = games[msk_away].sort_values('GAME_DATE').tail(30)
    away_plus_minus = games_30_away['PLUS_MINUS'].mean()

    games_diff = home_plus_minus - away_plus_minus

    predict_home_win = model_saved.predict(np.array([games_diff]))[0]
    predict_winning_probability = model_saved.predict_proba(np.array([games_diff]))[0][1]
    return predict_home_win, predict_winning_probability

In [81]:
predict_games('Toronto Raptors','Brooklyn Nets')

(0, 0.42692325)