In [1]:
import pandas as pd
import numpy as np


%pip install nba_api

Note: you may need to restart the kernel to use updated packages.


In [2]:
from nba_api.stats.static import teams

# get_teams returns a list of 30 dictionaries, each an NBA team.
nba_teams = teams.get_teams()

bulls = [team for team in nba_teams if team["full_name"] == "Chicago Bulls"][0]
bulls_id = bulls['id']

bulls_id

1610612741

In [3]:
from nba_api.stats.endpoints import teamgamelog

seasons = ['2023-24', '2022-23', '2021-22']

bulls_games = pd.DataFrame()

for season in seasons:
    season_log = teamgamelog.TeamGameLog(team_id=bulls_id, season=season)
    bulls_games = pd.concat([bulls_games, season_log.get_data_frames()[0]], ignore_index=True)

bulls_games['MATCHUP'] = bulls_games['MATCHUP'].str[-3:]
bulls_games

Unnamed: 0,Team_ID,Game_ID,GAME_DATE,MATCHUP,WL,W,L,W_PCT,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1610612741,0022300468,"JAN 03, 2024",NYK,L,15,21,0.417,240,37,...,0.813,11,28,39,24,9,7,11,17,100
1,1610612741,0022300459,"JAN 02, 2024",PHI,L,15,20,0.429,240,37,...,0.696,19,32,51,17,9,7,18,21,97
2,1610612741,0022300442,"DEC 30, 2023",PHI,W,15,19,0.441,240,40,...,0.600,12,41,53,21,5,5,16,15,105
3,1610612741,0022300422,"DEC 28, 2023",IND,L,14,19,0.424,240,40,...,0.773,13,34,47,19,7,4,7,20,104
4,1610612741,0022300408,"DEC 26, 2023",ATL,W,14,18,0.438,240,42,...,0.871,18,30,48,23,10,2,8,17,118
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,1610612741,0022100067,"OCT 28, 2021",NYK,L,4,1,0.800,240,36,...,0.909,5,32,37,25,6,5,7,18,103
196,1610612741,0022100046,"OCT 25, 2021",TOR,W,4,0,1.000,240,39,...,0.955,5,24,29,24,11,3,13,16,111
197,1610612741,0022100030,"OCT 23, 2021",DET,W,3,0,1.000,240,35,...,0.842,3,39,42,24,13,11,12,17,97
198,1610612741,0022100022,"OCT 22, 2021",NOP,W,2,0,1.000,240,47,...,0.800,11,35,46,32,9,7,14,14,128


In [4]:
#eda

import seaborn as sns



In [5]:
bulls_games['GAME_DATE'] = pd.to_datetime(bulls_games['GAME_DATE'], format='%b %d, %Y')
bulls_games['GAME_DATE'] = bulls_games['GAME_DATE'].dt.year

In [6]:


bulls_games['WL'] = bulls_games['WL'].replace({'W': 1, 'L': 0})

dummy_variables = pd.get_dummies(bulls_games['MATCHUP'])

bulls_games = pd.concat([bulls_games, dummy_variables], axis=1)

bulls_games = bulls_games.drop('MATCHUP', axis=1)

bulls_games

Unnamed: 0,Team_ID,Game_ID,GAME_DATE,WL,W,L,W_PCT,MIN,FGM,FGA,...,OKC,ORL,PHI,PHX,POR,SAC,SAS,TOR,UTA,WAS
0,1610612741,0022300468,2024,0,15,21,0.417,240,37,89,...,0,0,0,0,0,0,0,0,0,0
1,1610612741,0022300459,2024,0,15,20,0.429,240,37,96,...,0,0,1,0,0,0,0,0,0,0
2,1610612741,0022300442,2023,1,15,19,0.441,240,40,79,...,0,0,1,0,0,0,0,0,0,0
3,1610612741,0022300422,2023,0,14,19,0.424,240,40,97,...,0,0,0,0,0,0,0,0,0,0
4,1610612741,0022300408,2023,1,14,18,0.438,240,42,92,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,1610612741,0022100067,2021,0,4,1,0.800,240,36,84,...,0,0,0,0,0,0,0,0,0,0
196,1610612741,0022100046,2021,1,4,0,1.000,240,39,81,...,0,0,0,0,0,0,0,1,0,0
197,1610612741,0022100030,2021,1,3,0,1.000,240,35,84,...,0,0,0,0,0,0,0,0,0,0
198,1610612741,0022100022,2021,1,2,0,1.000,240,47,89,...,0,0,0,0,0,0,0,0,0,0


In [11]:
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn import metrics
from sklearn.linear_model import LinearRegression, LogisticRegression
import matplotlib as plt

X = bulls_games.drop(['WL', 'Team_ID', 'Game_ID', 'GAME_DATE', 'MIN'], axis=1)
y = bulls_games['WL']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 7)

In [12]:
# Logistic Regression Model

log_reg = LogisticRegression(solver='lbfgs', max_iter=2000)
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)

print('Logistic Regression Confusion Matrix:')
print(metrics.confusion_matrix(y_test, y_pred))

kfold= KFold(n_splits=8, shuffle=False)

cv_results= cross_val_score(log_reg, X_train, y_train, cv=kfold, scoring= 'accuracy')
class_report = metrics.classification_report(y_test, y_pred)

print("Cross Validation Score:", cv_results.mean())
print("\nClassification Report:\n", class_report)

Logistic Regression Confusion Matrix:
[[17  0]
 [ 7 16]]
Cross Validation Score: 0.85

Classification Report:
               precision    recall  f1-score   support

           0       0.71      1.00      0.83        17
           1       1.00      0.70      0.82        23

    accuracy                           0.82        40
   macro avg       0.85      0.85      0.82        40
weighted avg       0.88      0.82      0.82        40



In [16]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

rf = RandomForestRegressor(n_estimators=100, random_state=7)

rf.fit(X_train, y_train)

In [18]:
y_pred = rf.predict(X_test)
mse = metrics.mean_squared_error(y_test, y_pred)

print(f'\nMean Squared Error: {mse}')


Mean Squared Error: 0.198265


In [20]:
# Get feature importances
importances = rf.feature_importances_

# Convert the importances into a DataFrame
importances_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': importances
})

# Sort the DataFrame by importance
importances_df = importances_df.sort_values(by='Importance', ascending=False)

# Print the DataFrame
print(importances_df)
print(bulls_games['WL'].describe())

    Feature  Importance
20      PTS    0.323255
14      REB    0.104022
13     DREB    0.088591
2     W_PCT    0.053600
4       FGA    0.050344
16      STL    0.042107
5    FG_PCT    0.042025
19       PF    0.038419
8   FG3_PCT    0.030284
3       FGM    0.026587
18      TOV    0.020336
17      BLK    0.020299
1         L    0.018709
11   FT_PCT    0.018424
9       FTM    0.014850
10      FTA    0.014239
46      TOR    0.013161
15      AST    0.012731
6      FG3M    0.012379
7      FG3A    0.009137
12     OREB    0.007617
0         W    0.007531
42      PHX    0.004138
28      DET    0.003935
45      SAS    0.003704
22      BKN    0.003233
21      ATL    0.002531
31      IND    0.002124
41      PHI    0.001816
36      MIL    0.001636
32      LAC    0.001435
30      HOU    0.001361
25      CLE    0.001042
34      MEM    0.000896
24      CHA    0.000802
39      OKC    0.000580
23      BOS    0.000571
47      UTA    0.000506
33      LAL    0.000237
40      ORL    0.000234
38      NYK    0