In [1]:
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.static import players, teams
import pandas as pd
import time

In [2]:
# Getting information on NBA teams from NBA API

nba_teams = teams.get_teams()
print(f'Number of teams fetched: {len(nba_teams)}')

Number of teams fetched: 30


In [3]:
# Setting the team information to a dataframe

teams_df = pd.DataFrame(nba_teams)

In [4]:
teams_df.head()

Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Atlanta,1949
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966


In [5]:
# I'm going to start by looking for the full information from one team using (my favorite team) the Boston Celtics

celtics = [team for team in nba_teams if team['abbreviation'] == 'BOS'][0]
celtics_id = celtics['id']

In [6]:
# Using the leaguegamefinder endpoint to find information on all celtics games using the team ID from the NBA teaams dataframe
games1 = leaguegamefinder.LeagueGameFinder(team_id_nullable=celtics_id)

# The first dataframe returned is the one that I need
games1df = games1.get_data_frames()[0]

In [7]:
# Copying the dataframe in case of a mistake in the future

games = games1df.copy()

In [8]:
type(games)

pandas.core.frame.DataFrame

In [9]:
# Getting a look at the data returned

games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,42020,1610612738,BOS,Boston Celtics,42000115,2021-06-01,BOS @ BKN,L,240,109,...,0.783,13,31,44,18,3,12,11,21,-14.0
1,42020,1610612738,BOS,Boston Celtics,42000114,2021-05-30,BOS vs. BKN,L,241,126,...,0.905,11,23,34,24,8,4,10,21,-15.0
2,42020,1610612738,BOS,Boston Celtics,42000113,2021-05-28,BOS vs. BKN,W,240,125,...,0.826,13,33,46,21,5,3,13,25,6.0
3,42020,1610612738,BOS,Boston Celtics,42000112,2021-05-25,BOS @ BKN,L,239,108,...,0.783,15,28,43,23,10,4,15,20,-22.0
4,42020,1610612738,BOS,Boston Celtics,42000111,2021-05-22,BOS @ BKN,L,243,93,...,0.769,11,29,40,19,6,12,12,18,-11.0


In [10]:
# Checking total number of games returned

games.shape

(3575, 28)

In [11]:
# Looping through the rest of the teams to get all the game information and adding it to the dataframe with the Celtics game data
# I skipped over the Celtics in this loop to avoid adding duplicate data
# Also added some wait time to avoid being timed out after rapid requests

for team in nba_teams:
    team_id = team['id']
    if team_id == celtics_id:
        continue
    games_find = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id)
    time.sleep(1)
    temp = games_find.get_data_frames()[0]
    games = pd.concat([games,temp], copy=False)
    time.sleep(2)

In [12]:
# Getting a look at the full game data
 
games

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,42020,1610612738,BOS,Boston Celtics,0042000115,2021-06-01,BOS @ BKN,L,240,109,...,0.783,13.0,31.0,44.0,18,3.0,12,11,21,-14.0
1,42020,1610612738,BOS,Boston Celtics,0042000114,2021-05-30,BOS vs. BKN,L,241,126,...,0.905,11.0,23.0,34.0,24,8.0,4,10,21,-15.0
2,42020,1610612738,BOS,Boston Celtics,0042000113,2021-05-28,BOS vs. BKN,W,240,125,...,0.826,13.0,33.0,46.0,21,5.0,3,13,25,6.0
3,42020,1610612738,BOS,Boston Celtics,0042000112,2021-05-25,BOS @ BKN,L,239,108,...,0.783,15.0,28.0,43.0,23,10.0,4,15,20,-22.0
4,42020,1610612738,BOS,Boston Celtics,0042000111,2021-05-22,BOS @ BKN,L,243,93,...,0.769,11.0,29.0,40.0,19,6.0,12,12,18,-11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2690,21988,1610612766,CHH,Charlotte Hornets,0028800062,1988-11-12,CHH @ ATL,L,238,111,...,0.813,14.0,21.0,35.0,30,12.0,2,19,26,
2691,21988,1610612766,CHH,Charlotte Hornets,0028800052,1988-11-11,CHH @ WAS,L,240,87,...,0.760,11.0,32.0,43.0,22,9.0,1,23,26,
2692,21988,1610612766,CHH,Charlotte Hornets,0028800024,1988-11-08,CHH vs. LAC,W,240,117,...,0.738,17.0,38.0,55.0,28,9.0,1,17,31,
2693,21988,1610612766,CHH,Charlotte Hornets,0028800015,1988-11-05,CHH @ DET,L,240,85,...,1.000,21.0,19.0,40.0,18,8.0,6,11,21,


In [13]:
# Resetting the index to avoid duplicates
games.reset_index(inplace=True)

In [14]:
# Just in case I change something...

games_backup = games.copy()

In [15]:
# Checking for any null values in the dataframe

games.isnull().sum()

index                    0
SEASON_ID                0
TEAM_ID                  0
TEAM_ABBREVIATION        0
TEAM_NAME                0
GAME_ID                  0
GAME_DATE                0
MATCHUP                  0
WL                      20
MIN                      0
PTS                      0
FGM                      0
FGA                      0
FG_PCT                   6
FG3M                     0
FG3A                     2
FG3_PCT               1043
FTM                      0
FTA                      0
FT_PCT                   7
OREB                     4
DREB                     4
REB                      5
AST                      0
STL                      2
BLK                      0
TOV                      0
PF                       0
PLUS_MINUS           29704
dtype: int64

In [16]:
# There are some null values showing up in the data, best to get a look at that

games[games.isnull().any(axis=1)]

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
179,179,22019,1610612738,BOS,Boston Celtics,1521900035,2019-07-08,BOS vs. CLE,,13,...,,2.0,6.0,8.0,0,1.0,0,3,2,0.0
1434,1434,12007,1610612738,BOS,Boston Celtics,0010700072,2007-10-19,BOS vs. NJN,,120,...,0.750,3.0,15.0,18.0,8,5.0,2,12,20,3.0
1438,1438,22007,1610612738,BOS,Boston Celtics,1520700043,2007-07-14,BOS vs. DAL,L,199,...,0.643,10.0,28.0,38.0,8,3.0,9,20,30,
1439,1439,22007,1610612738,BOS,Boston Celtics,1520700039,2007-07-13,BOS @ MIL,W,200,...,0.500,15.0,23.0,38.0,14,4.0,4,18,20,
1440,1440,22007,1610612738,BOS,Boston Celtics,1520700028,2007-07-11,BOS @ CHN,W,199,...,0.692,20.0,23.0,43.0,11,10.0,4,17,34,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96903,2690,21988,1610612766,CHH,Charlotte Hornets,0028800062,1988-11-12,CHH @ ATL,L,238,...,0.813,14.0,21.0,35.0,30,12.0,2,19,26,
96904,2691,21988,1610612766,CHH,Charlotte Hornets,0028800052,1988-11-11,CHH @ WAS,L,240,...,0.760,11.0,32.0,43.0,22,9.0,1,23,26,
96905,2692,21988,1610612766,CHH,Charlotte Hornets,0028800024,1988-11-08,CHH vs. LAC,W,240,...,0.738,17.0,38.0,55.0,28,9.0,1,17,31,
96906,2693,21988,1610612766,CHH,Charlotte Hornets,0028800015,1988-11-05,CHH @ DET,L,240,...,1.000,21.0,19.0,40.0,18,8.0,6,11,21,


In [17]:
games.columns

Index(['index', 'SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME',
       'GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA',
       'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB',
       'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS'],
      dtype='object')

In [18]:
# Dropping the plus/minus column as there are a lot of missing values and it won't be useful

games.drop('PLUS_MINUS', axis=1, inplace=True)

In [19]:
games.isnull().sum()

index                   0
SEASON_ID               0
TEAM_ID                 0
TEAM_ABBREVIATION       0
TEAM_NAME               0
GAME_ID                 0
GAME_DATE               0
MATCHUP                 0
WL                     20
MIN                     0
PTS                     0
FGM                     0
FGA                     0
FG_PCT                  6
FG3M                    0
FG3A                    2
FG3_PCT              1043
FTM                     0
FTA                     0
FT_PCT                  7
OREB                    4
DREB                    4
REB                     5
AST                     0
STL                     2
BLK                     0
TOV                     0
PF                      0
dtype: int64

In [20]:
# Replacing the null values in FG_PCT with the percentage of FGM/FGA

games['FG_PCT'].fillna(games['FGM'] / games['FGA'], inplace=True)

In [21]:
games.isnull().sum()

index                   0
SEASON_ID               0
TEAM_ID                 0
TEAM_ABBREVIATION       0
TEAM_NAME               0
GAME_ID                 0
GAME_DATE               0
MATCHUP                 0
WL                     20
MIN                     0
PTS                     0
FGM                     0
FGA                     0
FG_PCT                  6
FG3M                    0
FG3A                    2
FG3_PCT              1043
FTM                     0
FTA                     0
FT_PCT                  7
OREB                    4
DREB                    4
REB                     5
AST                     0
STL                     2
BLK                     0
TOV                     0
PF                      0
dtype: int64

In [22]:
games[games['FG_PCT'].isnull()==True]

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
12467,330,12017,1610612741,CHI,Chicago Bulls,11700012,2017-10-03,CHI @ NOP,,0,...,0,,0.0,0.0,0.0,0,0.0,0,0,0
20421,1349,22007,1610612743,DEN,Denver Nuggets,1520700012,2007-07-08,DEN @ LAC,,0,...,0,,0.0,1.0,1.0,0,0.0,0,0,0
23758,1293,22008,1610612744,GSW,Golden State Warriors,1520800001,2008-07-11,GSW vs. PHI,,0,...,0,,0.0,0.0,,0,0.0,0,0,0
37561,1243,12009,1610612748,MIA,Miami Heat,10900007,2009-10-05,MIA @ DET,,5,...,6,0.667,0.0,2.0,2.0,0,0.0,0,0,2
63497,1499,12005,1610612756,PHX,Phoenix Suns,10500109,2005-10-27,DET @ DAL,O,24,...,0,,0.0,0.0,0.0,0,0.0,0,0,0
87575,163,22019,1610612764,WAS,Washington Wizards,1521900060,2019-07-11,WAS vs. ATL,,0,...,0,,0.0,0.0,0.0,0,0.0,0,0,0


In [23]:
null_threes = games[games['FG3_PCT'].isnull()==True]['FG3A']

In [24]:
null_threes.value_counts()

0.0    1041
Name: FG3A, dtype: int64

In [25]:
# It looks like the nulls in the FG3_PCT column are coming from games where teams attempted no three point shots

games['FG3_PCT'].fillna(0.0, inplace=True)

In [26]:
games.isnull().sum()

index                 0
SEASON_ID             0
TEAM_ID               0
TEAM_ABBREVIATION     0
TEAM_NAME             0
GAME_ID               0
GAME_DATE             0
MATCHUP               0
WL                   20
MIN                   0
PTS                   0
FGM                   0
FGA                   0
FG_PCT                6
FG3M                  0
FG3A                  2
FG3_PCT               0
FTM                   0
FTA                   0
FT_PCT                7
OREB                  4
DREB                  4
REB                   5
AST                   0
STL                   2
BLK                   0
TOV                   0
PF                    0
dtype: int64

In [27]:
games[games['FG_PCT'].isnull()==True]['FGA']

12467    0
20421    0
23758    0
37561    0
63497    0
87575    0
Name: FGA, dtype: int64

In [28]:
# Same with the FG_PCT, games where teams attempted no field goals

games['FG_PCT'].fillna(0.0, inplace=True)

In [29]:
# Checking where FG3A is null

games[games['FG3A'].isnull()==True]

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
25690,3225,21984,1610612744,GOS,Golden State Warriors,28400895,1985-04-06,GOS vs. KCK,W,240,...,52,0.904,,,,27,4.0,7,5,23
72039,3143,21984,1610612758,KCK,Kansas City Kings,28400895,1985-04-06,KCK @ GOS,L,198,...,25,0.84,,,,28,6.0,1,8,31


In [30]:
# The null information comes from the same game, on April 6, 1985 between the Golden State Warriors and the Kansas City Kings
# In that game the kings shot 0 for 2 from the three point line, and the Warriors shot 2 for 6

# Warriors
games.at[25690, 'FG3A'] = 6
games.at[25690, 'FG3_PCT'] = 0.333

# Kings
games.at[72039, 'FG3A'] = 2
games.at[72039, 'FG3_PCT'] = 0.0

In [31]:
games.isnull().sum()

index                 0
SEASON_ID             0
TEAM_ID               0
TEAM_ABBREVIATION     0
TEAM_NAME             0
GAME_ID               0
GAME_DATE             0
MATCHUP               0
WL                   20
MIN                   0
PTS                   0
FGM                   0
FGA                   0
FG_PCT                0
FG3M                  0
FG3A                  0
FG3_PCT               0
FTM                   0
FTA                   0
FT_PCT                7
OREB                  4
DREB                  4
REB                   5
AST                   0
STL                   2
BLK                   0
TOV                   0
PF                    0
dtype: int64

In [32]:
# Checking nulls in win/loss

games[games['WL'].isnull()==True]

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
179,179,22019,1610612738,BOS,Boston Celtics,1521900035,2019-07-08,BOS vs. CLE,,13,...,0,,2.0,6.0,8.0,0,1.0,0,3,2
1434,1434,12007,1610612738,BOS,Boston Celtics,10700072,2007-10-19,BOS vs. NJN,,120,...,12,0.75,3.0,15.0,18.0,8,5.0,2,12,20
3575,0,42020,1610612737,ATL,Atlanta Hawks,42000204,2021-06-14,ATL vs. PHI,,24,...,2,0.0,2.0,8.0,10.0,4,2.0,0,0,3
3735,160,22019,1610612737,ATL,Atlanta Hawks,1521900023,2019-07-07,ATL vs. MIN,,21,...,2,0.5,1.0,2.0,3.0,1,0.0,1,4,2
10555,159,22019,1610612740,NOP,New Orleans Pelicans,1521900008,2019-07-05,NOP vs. NYK,,17,...,2,1.0,2.0,8.0,10.0,1,0.0,2,0,1
12467,330,12017,1610612741,CHI,Chicago Bulls,11700012,2017-10-03,CHI @ NOP,,0,...,0,,0.0,0.0,0.0,0,0.0,0,0,0
16775,1152,22009,1610612742,DAL,Dallas Mavericks,1520900004,2009-07-10,DAL vs. MIL,,22,...,4,0.5,1.0,3.0,4.0,1,3.0,2,2,0
20317,1245,12008,1610612743,DEN,Denver Nuggets,10800035,2008-10-11,DEN @ PHX,,240,...,38,0.474,15.0,36.0,51.0,7,11.0,5,13,26
20421,1349,22007,1610612743,DEN,Denver Nuggets,1520700012,2007-07-08,DEN @ LAC,,0,...,0,,0.0,1.0,1.0,0,0.0,0,0,0
23758,1293,22008,1610612744,GSW,Golden State Warriors,1520800001,2008-07-11,GSW vs. PHI,,0,...,0,,0.0,0.0,,0,0.0,0,0,0


In [33]:
games[games['GAME_ID']=='1521900035']

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
179,179,22019,1610612738,BOS,Boston Celtics,1521900035,2019-07-08,BOS vs. CLE,,13,...,0,,2.0,6.0,8.0,0,1.0,0,3,2
180,180,22019,1610612738,BOS,Boston Celtics,1521900035,2019-07-08,BOS vs. CLE,W,187,...,10,0.9,11.0,25.0,36.0,23,13.0,8,14,15
7119,147,22019,1610612739,CLE,Cleveland Cavaliers,1521900035,2019-07-08,CLE @ BOS,L,199,...,20,0.75,10.0,21.0,31.0,15,6.0,3,20,11


In [34]:
# Looks like a duplicate game with more info on index 180, dropping row 179

games.drop(179, axis=0, inplace=True)

In [35]:
games[games['GAME_ID']=='0010700072']

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
1434,1434,12007,1610612738,BOS,Boston Celtics,10700072,2007-10-19,BOS vs. NJN,,120,...,12,0.75,3.0,15.0,18.0,8,5.0,2,12,20
46841,1281,12007,1610612751,NJN,New Jersey Nets,10700072,2007-10-19,NJN @ BOS,,120,...,18,0.722,7.0,12.0,19.0,7,3.0,1,11,13


In [36]:
# Checking points
games[games['GAME_ID']=='0010700072']['PTS']

1434     36
46841    33
Name: PTS, dtype: int64

In [37]:
# Upon looking this game up it sems it was cencelled at halftime due to condensation on the court but counted as a win for the Celtics

# Celtics
games.at[1434, 'WL'] = 'W'

# Nets
games.at[46841, 'WL'] = 'L'

In [38]:
games[games['GAME_ID']=='0010700072']

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
1434,1434,12007,1610612738,BOS,Boston Celtics,10700072,2007-10-19,BOS vs. NJN,W,120,...,12,0.75,3.0,15.0,18.0,8,5.0,2,12,20
46841,1281,12007,1610612751,NJN,New Jersey Nets,10700072,2007-10-19,NJN @ BOS,L,120,...,18,0.722,7.0,12.0,19.0,7,3.0,1,11,13


In [39]:
games[games['GAME_ID']=='0011700012']

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
10743,347,12017,1610612740,NOP,New Orleans Pelicans,11700012,2017-10-03,NOP vs. CHI,L,230,...,32,0.781,6.0,38.0,44.0,25,10.0,6,15,17
12467,330,12017,1610612741,CHI,Chicago Bulls,11700012,2017-10-03,CHI @ NOP,,0,...,0,,0.0,0.0,0.0,0,0.0,0,0,0
12468,331,12017,1610612741,CHI,Chicago Bulls,11700012,2017-10-03,CHI @ NOP,W,240,...,13,0.692,6.0,39.0,45.0,33,9.0,3,15,26


In [40]:
# This game was a win for the bulls, dropping the row with bad information

games.drop(12466, axis=0, inplace=True)

In [41]:
games[games['GAME_ID']=='0010800035']

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
20317,1245,12008,1610612743,DEN,Denver Nuggets,10800035,2008-10-11,DEN @ PHX,,240,...,38,0.474,15.0,36.0,51.0,7,11.0,5,13,26
63187,1189,12008,1610612756,PHX,Phoenix Suns,10800035,2008-10-11,PHX vs. DEN,,239,...,29,0.69,22.0,30.0,52.0,8,5.0,11,19,27


In [42]:
# This game was a win for Denver

# Nuggets
games.at[20317, 'WL'] = 'W'

# Suns
games.at[63187, 'WL'] = 'L'

In [43]:
games[games['GAME_ID']=='1521900007']

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
22616,151,22019,1610612744,GSW,Golden State Warriors,1521900007,2019-07-05,GSW vs. CHA,L,200,...,9,0.889,8.0,28.0,36.0,19,9.0,3,13,15
94364,151,22019,1610612766,CHA,Charlotte Hornets,1521900007,2019-07-05,CHA @ GSW,,17,...,4,0.5,0.0,2.0,2.0,0,0.0,0,0,2
94365,152,22019,1610612766,CHA,Charlotte Hornets,1521900007,2019-07-05,CHA @ GSW,W,183,...,16,0.875,11.0,30.0,41.0,14,7.0,6,16,13


In [44]:
# The remaining null entries in WL are duplicate rows with bad data

games.dropna(subset=['WL'], axis=0, inplace=True)

In [45]:
games.isnull().sum()

index                0
SEASON_ID            0
TEAM_ID              0
TEAM_ABBREVIATION    0
TEAM_NAME            0
GAME_ID              0
GAME_DATE            0
MATCHUP              0
WL                   0
MIN                  0
PTS                  0
FGM                  0
FGA                  0
FG_PCT               0
FG3M                 0
FG3A                 0
FG3_PCT              0
FTM                  0
FTA                  0
FT_PCT               1
OREB                 4
DREB                 4
REB                  4
AST                  0
STL                  2
BLK                  0
TOV                  0
PF                   0
dtype: int64

In [46]:
# Checking nulls in FT_PCT

games[games['FT_PCT'].isnull()==True]

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
63497,1499,12005,1610612756,PHX,Phoenix Suns,10500109,2005-10-27,DET @ DAL,O,24,...,0,,0.0,0.0,0.0,0,0.0,0,0,0


In [47]:
# It looks like this is null because no free throws were attempted

games.at[63497, 'FT_PCT'] = 0.0

In [48]:
# Checking nulls in offensive rebounds

games[games['OREB'].isnull()==True]

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
25690,3225,21984,1610612744,GOS,Golden State Warriors,28400895,1985-04-06,GOS vs. KCK,W,240,...,52,0.904,,,,27,4.0,7,5,23
25767,3302,21984,1610612744,GOS,Golden State Warriors,28400006,1984-10-26,GOS vs. PHX,L,235,...,39,0.718,,,,20,9.0,8,17,30
72039,3143,21984,1610612758,KCK,Kansas City Kings,28400895,1985-04-06,KCK @ GOS,L,198,...,25,0.84,,,,28,6.0,1,8,31
85068,3489,21983,1610612762,UTH,Utah Jazz,28300147,1983-11-23,UTH vs. CHI,L,240,...,41,0.902,,,,18,9.0,6,4,24


In [49]:
# Checking nulls in Defensive rebounds

games[games['DREB'].isnull()==True]

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
25690,3225,21984,1610612744,GOS,Golden State Warriors,28400895,1985-04-06,GOS vs. KCK,W,240,...,52,0.904,,,,27,4.0,7,5,23
25767,3302,21984,1610612744,GOS,Golden State Warriors,28400006,1984-10-26,GOS vs. PHX,L,235,...,39,0.718,,,,20,9.0,8,17,30
72039,3143,21984,1610612758,KCK,Kansas City Kings,28400895,1985-04-06,KCK @ GOS,L,198,...,25,0.84,,,,28,6.0,1,8,31
85068,3489,21983,1610612762,UTH,Utah Jazz,28300147,1983-11-23,UTH vs. CHI,L,240,...,41,0.902,,,,18,9.0,6,4,24


In [52]:
# In the first game the Warriors had 14 Offensive rebounds, 33 Defensive rebounds, and 47 total rebounds
games.at[25690, 'OREB'] = 14
games.at[25690, 'DREB'] = 33
games.at[25690, 'REB'] = 47

# In the second game the Warriors had 9 Offensive rebounds, 24 Defensive rebounds, and 33 total rebounds
games.at[25767, 'OREB'] = 9
games.at[25767, 'DREB'] = 24
games.at[25767, 'REB'] = 33

# In the third(Same game as first) game the Kings had 7 Offensive rebounds, 21 Defensive rebounds, and 28 total rebounds
games.at[72039, 'OREB'] = 7
games.at[72039, 'DREB'] = 21
games.at[72039, 'REB'] = 28

# In the fourth game the Jazz had 23 Offensive rebounds, 32 Defensive rebounds, and 55 total rebounds
games.at[85068, 'OREB'] = 23
games.at[85068, 'DREB'] = 32
games.at[85068, 'REB'] = 55

In [53]:
games.isnull().sum()

index                0
SEASON_ID            0
TEAM_ID              0
TEAM_ABBREVIATION    0
TEAM_NAME            0
GAME_ID              0
GAME_DATE            0
MATCHUP              0
WL                   0
MIN                  0
PTS                  0
FGM                  0
FGA                  0
FG_PCT               0
FG3M                 0
FG3A                 0
FG3_PCT              0
FTM                  0
FTA                  0
FT_PCT               0
OREB                 0
DREB                 0
REB                  0
AST                  0
STL                  2
BLK                  0
TOV                  0
PF                   0
dtype: int64

In [54]:
# Checking nulls in steals

games[games['STL'].isnull()==True]

Unnamed: 0,index,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,...,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF
6924,3349,21983,1610612737,ATL,Atlanta Hawks,28300531,1984-02-04,ATL vs. PHL,W,240,...,16,0.875,15.0,22.0,37.0,28,,6,8,22
61952,3363,21983,1610612755,PHL,Philadelphia 76ers,28300531,1984-02-04,PHL @ ATL,L,240,...,27,0.815,14.0,20.0,34.0,18,,6,6,20


In [55]:
# No steals were recorded in this game, setting both to 0

# Hawks
games.at[6924, 'STL'] = 0

# Sixers
games.at[61952, 'STL'] = 0

In [56]:
games.isnull().sum()

index                0
SEASON_ID            0
TEAM_ID              0
TEAM_ABBREVIATION    0
TEAM_NAME            0
GAME_ID              0
GAME_DATE            0
MATCHUP              0
WL                   0
MIN                  0
PTS                  0
FGM                  0
FGA                  0
FG_PCT               0
FG3M                 0
FG3A                 0
FG3_PCT              0
FTM                  0
FTA                  0
FT_PCT               0
OREB                 0
DREB                 0
REB                  0
AST                  0
STL                  0
BLK                  0
TOV                  0
PF                   0
dtype: int64

In [58]:
# Dropping the redundant index column

games.drop('index', axis=1, inplace=True)

In [59]:
# Saving dataframe to CSV to work with

games.to_csv('nba_game_stats.csv', index=False)