# Game Data 
The purpose of this notebook is to gather game data to be used to predict outcome based on a logistic regression model.


In [1]:
from nba_api.stats.static import teams

nba_teams = teams.get_teams()
# Select the dictionary for the Celtics, which contains their team ID
process = [team for team in nba_teams if team['abbreviation'] == 'PHI'][0]
process_id = process['id']

In [2]:
from nba_api.stats.endpoints import leaguegamefinder

# Query for games where the Celtics were playing
gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=process_id)
# The first DataFrame of those returned is what we want.
games = gamefinder.get_data_frames()[0]


In [3]:
games

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22021,1610612755,PHI,Philadelphia 76ers,0022100869,2022-02-15,PHI vs. BOS,L,239,87,...,0.846,12,28,40,17,7.0,2,9,17,-48.0
1,22021,1610612755,PHI,Philadelphia 76ers,0022100849,2022-02-12,PHI vs. CLE,W,242,103,...,0.810,8,32,40,24,13.0,5,11,21,10.0
2,22021,1610612755,PHI,Philadelphia 76ers,0022100840,2022-02-11,PHI vs. OKC,W,239,100,...,0.808,15,40,55,19,7.0,10,11,16,13.0
3,22021,1610612755,PHI,Philadelphia 76ers,0022100815,2022-02-08,PHI vs. PHX,L,241,109,...,0.944,6,38,44,24,7.0,4,14,22,-5.0
4,22021,1610612755,PHI,Philadelphia 76ers,0022100802,2022-02-06,PHI @ CHI,W,241,119,...,0.810,4,34,38,29,5.0,5,9,17,11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3473,21983,1610612755,PHL,Philadelphia 76ers,0028300050,1983-11-05,PHL @ NJN,W,240,119,...,0.730,19,26,45,24,12.0,9,20,25,
3474,21983,1610612755,PHL,Philadelphia 76ers,0028300042,1983-11-04,PHL vs. MIL,L,240,94,...,0.815,17,27,44,21,2.0,4,21,27,
3475,21983,1610612755,PHL,Philadelphia 76ers,0028300025,1983-11-01,PHL vs. CHI,W,240,99,...,0.700,12,39,51,27,9.0,19,22,19,
3476,21983,1610612755,PHL,Philadelphia 76ers,0028300017,1983-10-29,PHL @ IND,W,265,124,...,0.745,18,31,49,27,14.0,5,27,32,


In [4]:
games.columns

Index(['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT',
       'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS'],
      dtype='object')

In [5]:
len(games)

3478

In [6]:
import pandas as pd
model = pd.DataFrame.from_dict(games)

In [7]:
model = model.drop(columns=['TEAM_NAME', 'TEAM_ABBREVIATION', 'GAME_DATE','TEAM_ID','MATCHUP', 'GAME_ID', 'SEASON_ID'])

In [8]:
model

Unnamed: 0,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,L,239,87,23,80,0.288,8,32,0.250,33,...,0.846,12,28,40,17,7.0,2,9,17,-48.0
1,W,242,103,38,79,0.481,10,22,0.455,17,...,0.810,8,32,40,24,13.0,5,11,21,10.0
2,W,239,100,36,93,0.387,7,26,0.269,21,...,0.808,15,40,55,19,7.0,10,11,16,13.0
3,L,241,109,41,89,0.461,10,32,0.313,17,...,0.944,6,38,44,24,7.0,4,14,22,-5.0
4,W,241,119,45,84,0.536,12,24,0.500,17,...,0.810,4,34,38,29,5.0,5,9,17,11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3473,W,240,119,46,90,0.511,0,0,,27,...,0.730,19,26,45,24,12.0,9,20,25,
3474,L,240,94,35,73,0.479,2,3,0.667,22,...,0.815,17,27,44,21,2.0,4,21,27,
3475,W,240,99,39,80,0.488,0,0,,21,...,0.700,12,39,51,27,9.0,19,22,19,
3476,W,265,124,44,83,0.530,1,1,1.000,35,...,0.745,18,31,49,27,14.0,5,27,32,


In [9]:
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt 
plt.rc("font", size=14)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)

In [11]:
model["WL"].replace({"W": 1, "L": 0}, inplace=True)
print(model)

       WL  MIN  PTS  FGM  FGA  FG_PCT  FG3M  FG3A  FG3_PCT  FTM  ...  FT_PCT  \
0     0.0  239   87   23   80   0.288     8    32    0.250   33  ...   0.846   
1     1.0  242  103   38   79   0.481    10    22    0.455   17  ...   0.810   
2     1.0  239  100   36   93   0.387     7    26    0.269   21  ...   0.808   
3     0.0  241  109   41   89   0.461    10    32    0.313   17  ...   0.944   
4     1.0  241  119   45   84   0.536    12    24    0.500   17  ...   0.810   
...   ...  ...  ...  ...  ...     ...   ...   ...      ...  ...  ...     ...   
3473  1.0  240  119   46   90   0.511     0     0      NaN   27  ...   0.730   
3474  0.0  240   94   35   73   0.479     2     3    0.667   22  ...   0.815   
3475  1.0  240   99   39   80   0.488     0     0      NaN   21  ...   0.700   
3476  1.0  265  124   44   83   0.530     1     1    1.000   35  ...   0.745   
3477  1.0  240  117   42   82   0.512     0     0      NaN   33  ...   0.786   

      OREB  DREB  REB  AST   STL  BLK  

In [13]:
model["WL"].sum()

1631.0