In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from datetime import date
import requests

import seaborn as sns
sns.set()

import nba_api
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder

# Data Gathering and Cleaning

In [2]:
#2015-2016 regular season start and end dates
start_15_16 = '2015-10-27'
end_15_16 = '2016-04-13'

#2016-2017 regular season start and end dates
start_16_17 = '2016-10-25'
end_16_17 = '2017-04-12'

#2016-2017 regular season start and end dates
start_17_18 = '2017-10-17'
end_17_18 = '2018-04-11'

#2018-2019 regular season start and end dates
start_18_19 = '2018-10-16'
end_18_19 = '2019-04-10'

In [3]:
#df containing team info
teams_df = pd.DataFrame(teams.get_teams())
teams_df.rename(columns={'id':'TEAM_ID'}, inplace=True)

# teams_df.head()

In [4]:
# Get **all** the games so we can filter to an individual GAME_ID
result = leaguegamefinder.LeagueGameFinder()
all_games = result.get_data_frames()[0]

# all_games.head()

In [5]:
#match NBA teams from 'teams_df' with 'all_games' to remove
#all non NBA-games from 'all_games' 
nba_games = pd.merge(teams_df, all_games, on = 'TEAM_ID', how = 'inner')

#remove columns in merged df coming from 'teams_df'
nba_games = nba_games.drop(list(teams_df.columns)[1:],1)

# nba_games.head(3)

In [6]:
#function searches rows in 'nba_games' df sharing same 'GAME_ID' and sums PTS
#to obtain sum of final score
def total_pts_game(id_game):
    return np.sum(nba_games.loc[nba_games['GAME_ID'] == str(id_game)][['PTS']])

In [7]:
#e.g. of applying above function
# total_pts_game(nba_games['GAME_ID'][0])

In [8]:
#uses 'total_pts_game' function to create df of sum of final score for
#all games in 'nba_games' df
total_points = pd.DataFrame(list(map(lambda x : total_pts_game(nba_games['GAME_ID'][x]), list(range(len(nba_games))))))

In [9]:
#appends new column to 'nba_games' df containing combined final score per game
nba_games['COMBINED_FINAL_SCORE'] = total_points
# nba_games.head(4)

In [10]:
#Converts 'MATCHUP' feature elements to 1 when playing at home and 0 when playing away
Home_Away = nba_games['MATCHUP'].str.contains('vs').astype(int)
nba_games['MATCHUP'] = Home_Away

In [11]:
#Deletes respective columns from 'nba_games' df
nba_games = nba_games.drop(['SEASON_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'WL', 'PLUS_MINUS'], axis=1)

In [12]:
#function searches rows in 'nba_games' df sharing same 'GAME_ID' and gets 
#opponent's PTS

def opponent_stats(id_game, id_team):
    opp_stat = nba_games.loc[nba_games['GAME_ID'] == id_game]
    opp_stat = opp_stat.drop(opp_stat[opp_stat['TEAM_ID']== id_team].index, axis=0)
    opp_stat = opp_stat.drop(['GAME_DATE', 'COMBINED_FINAL_SCORE'], 1)
    opp_stat.columns = np.array(['OPP_TEAM_ID', 'GAME_ID','OPP_MATCHUP', 'OPP_MIN', 'OPP_PTS', 'OPP_FGM',\
    'OPP_FGA', 'OPP_FG_PCT', 'OPP_FG3M', 'OPP_FG3A', 'OPP_FG3_PCT', 'OPP_FTM', 'OPP_FTA', 'OPP_FT_PCT',\
    'OPP_OREB', 'OPP_DREB', 'OPP_REB', 'OPP_AST', 'OPP_STL', 'OPP_BLK', 'OPP_TOV', 'OPP_PF'])
    return opp_stat
    

In [13]:
#uses 'opponent_stats' function above to create df of all opponent statistics for every
#team listed in 'nba_games' df
opp_stats = pd.concat(list(map(lambda x : opponent_stats(nba_games['GAME_ID'][x],nba_games['TEAM_ID'][x]), list(range(len(nba_games))))), ignore_index=True)


In [14]:
#appends 'opp_stats' df as new columns of 'nba_games' df s.t. opponent team stats
#are now included in same row for a given team and game
nba_games = pd.merge(nba_games, opp_stats, on = 'GAME_ID', how = 'inner')

In [15]:
#removes all instances wherein same team appears to have played itself
#due to redundancy in merging df process
nba_games = nba_games[nba_games.TEAM_ID != nba_games.OPP_TEAM_ID]

In [16]:
#rearanges first two columns
nba_games = nba_games[['GAME_ID','TEAM_ID', 'GAME_DATE', 'MATCHUP', 'MIN', 'PTS', 'FGM',
       'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT',
       'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'COMBINED_FINAL_SCORE', 'OPP_TEAM_ID', 'OPP_MATCHUP', 'OPP_MIN',
       'OPP_PTS', 'OPP_FGM', 'OPP_FGA', 'OPP_FG_PCT', 'OPP_FG3M', 'OPP_FG3A',
       'OPP_FG3_PCT', 'OPP_FTM', 'OPP_FTA', 'OPP_FT_PCT', 'OPP_OREB',
       'OPP_DREB', 'OPP_REB', 'OPP_AST', 'OPP_STL', 'OPP_BLK', 'OPP_TOV',
       'OPP_PF']]

In [17]:
#sorts 'nba_games' df according to 'GAME_ID'
nba_games = nba_games.sort_values('GAME_ID')

In [18]:
#reset index
nba_games = nba_games.set_index([list(range(len(nba_games)))])

In [19]:
# #flips df s.t. oldest games appear head head and more recent games appear in tail
# nba_games.iloc[:] = nba_games.iloc[::-1].values


In [20]:
#mask to filter dates within range
mask_15_16 = (nba_games['GAME_DATE'] >= start_15_16) & (nba_games['GAME_DATE'] <= end_15_16)
mask_16_17 = (nba_games['GAME_DATE'] >= start_16_17) & (nba_games['GAME_DATE'] <= end_16_17)
mask_17_18 = (nba_games['GAME_DATE'] >= start_17_18) & (nba_games['GAME_DATE'] <= end_17_18)
mask_18_19 = (nba_games['GAME_DATE'] >= start_18_19) & (nba_games['GAME_DATE'] <= end_18_19)

In [21]:
#regular season games held between given seasons
nba_games_15_16 = nba_games.loc[mask_15_16]
nba_games_16_17 = nba_games.loc[mask_16_17]
nba_games_17_18 = nba_games.loc[mask_17_18]
nba_games_18_19 = nba_games.loc[mask_18_19]

In [22]:
#nba_games_15_16.head()

In [23]:
#store respective df seasons as elements of list from 0-3
#2015-16 -> [0]
#2016-17 -> [1]
#2017-18 -> [2]
#2018-19 -> [3]

nba_seasons = [nba_games_15_16, nba_games_16_17, nba_games_17_18, nba_games_18_19]


In [24]:
nba_seasons[0]



Unnamed: 0,GAME_ID,TEAM_ID,GAME_DATE,MATCHUP,MIN,PTS,FGM,FGA,FG_PCT,FG3M,...,OPP_FTA,OPP_FT_PCT,OPP_OREB,OPP_DREB,OPP_REB,OPP_AST,OPP_STL,OPP_BLK,OPP_TOV,OPP_PF
4466,0021500001,1610612765,2015-10-27,0,239,106,37,96,0.385,12,...,15,0.800,7,33,40,22,9,4,15,25
4467,0021500001,1610612737,2015-10-27,1,239,94,37,82,0.451,8,...,26,0.769,23,36,59,23,5,3,15,15
4468,0021500002,1610612739,2015-10-27,0,240,95,38,94,0.404,9,...,23,0.696,7,40,47,13,6,10,13,22
4469,0021500002,1610612741,2015-10-27,1,240,97,37,87,0.425,7,...,17,0.588,11,39,50,26,5,7,10,21
4470,0021500003,1610612744,2015-10-27,1,241,111,41,96,0.427,9,...,27,0.704,8,25,33,21,9,3,18,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6921,0021501228,1610612762,2016-04-13,0,240,96,39,83,0.470,9,...,15,0.867,8,39,47,19,6,3,13,17
6922,0021501229,1610612756,2016-04-13,1,240,114,45,99,0.455,7,...,12,0.833,6,34,40,23,9,6,15,20
6923,0021501229,1610612746,2016-04-13,0,241,105,43,96,0.448,9,...,22,0.773,18,40,58,26,10,5,16,15
6924,0021501230,1610612757,2016-04-13,1,241,107,39,89,0.438,11,...,18,0.667,20,33,53,24,10,6,20,19
