In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from datetime import date
import requests

import nba_api
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder

In [14]:
#2015-2016 regular season start and end dates
start_15_16 = '2015-10-27'
end_15_16 = '2016-04-13'

#2016-2017 regular season start and end dates
start_16_17 = '2016-10-25'
end_16_17 = '2017-04-12'

#2016-2017 regular season start and end dates
start_17_18 = '2017-10-17'
end_17_18 = '2018-04-11'

#2018-2019 regular season start and end dates
start_18_19 = '2018-10-16'
end_18_19 = '2019-04-10'

In [15]:
teams_df = pd.DataFrame(teams.get_teams())
teams_df.rename(columns={'id':'TEAM_ID'}, inplace=True)

#teams_df.head()

In [16]:
# Get **all** the games so we can filter to an individual GAME_ID
result = leaguegamefinder.LeagueGameFinder()
all_games = result.get_data_frames()[0]

#all_games.head()

In [17]:
#match NBA teams from 'teams_df' with 'all_games' to remove
#all non NBA-games from 'all_games' 
nba_games = pd.merge(teams_df, all_games, on = 'TEAM_ID', how = 'inner')

#remove columns in merged df coming from 'teams_df'
nba_games = nba_games.drop(list(teams_df.columns)[1:],1)

nba_games.head(3)

Unnamed: 0,abbreviation,SEASON_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,ATL,22019,ATL,Atlanta Hawks,21900969,2020-03-11,ATL vs. NYK,L,265,131,...,0.808,15,38,53,26,6,3,17,25,0.0
1,ATL,22019,ATL,Atlanta Hawks,21900957,2020-03-09,ATL vs. CHA,W,290,143,...,0.593,12,41,53,33,1,5,15,26,5.0
2,ATL,22019,ATL,Atlanta Hawks,21900943,2020-03-07,ATL @ MEM,L,240,101,...,0.857,14,32,46,23,9,2,14,24,-17.0


In [18]:
#function searches rows in 'nba_games' df sharing same 'GAME_ID' and sums PTS
#to obtain sum of final score
def total_pts_game(id_game):
    return np.sum(nba_games.loc[nba_games['GAME_ID'] == str(id_game)][['PTS']])

In [19]:
#e.g. of applying above function
#total_pts_game(nba_games['GAME_ID'][0])

In [20]:
#uses 'total_pts_game' function to create df of sum of final score for
#all games in 'nba_games' df
total_points = pd.DataFrame(list(map(lambda x : total_pts_game(nba_games['GAME_ID'][x]), list(range(len(nba_games))))))

In [21]:
#appends new column to 'nba_games' df containing combined final score per game
nba_games['COMBINED_FINAL_SCORE'] = total_points
nba_games.head()

Unnamed: 0,abbreviation,SEASON_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,COMBINED_FINAL_SCORE
0,ATL,22019,ATL,Atlanta Hawks,21900969,2020-03-11,ATL vs. NYK,L,265,131,...,15,38,53,26,6,3,17,25,0.0,267
1,ATL,22019,ATL,Atlanta Hawks,21900957,2020-03-09,ATL vs. CHA,W,290,143,...,12,41,53,33,1,5,15,26,5.0,281
2,ATL,22019,ATL,Atlanta Hawks,21900943,2020-03-07,ATL @ MEM,L,240,101,...,14,32,46,23,9,2,14,24,-17.0,219
3,ATL,22019,ATL,Atlanta Hawks,21900930,2020-03-06,ATL @ WAS,L,239,112,...,6,30,36,25,9,4,17,25,-6.0,230
4,ATL,22019,ATL,Atlanta Hawks,21900905,2020-03-02,ATL vs. MEM,L,239,88,...,16,27,43,20,6,8,17,21,-39.0,215


In [22]:
#mask to filter dates within range
mask_15_16 = (nba_games['GAME_DATE'] >= start_15_16) & (nba_games['GAME_DATE'] <= end_15_16)
mask_16_17 = (nba_games['GAME_DATE'] >= start_16_17) & (nba_games['GAME_DATE'] <= end_16_17)
mask_17_18 = (nba_games['GAME_DATE'] >= start_17_18) & (nba_games['GAME_DATE'] <= end_17_18)
mask_18_19 = (nba_games['GAME_DATE'] >= start_18_19) & (nba_games['GAME_DATE'] <= end_18_19)

In [23]:
#regular season games held between given seasons
nba_games_15_16 = nba_games.loc[mask_15_16]
nba_games_16_17 = nba_games.loc[mask_16_17]
nba_games_17_18 = nba_games.loc[mask_17_18]
nba_games_18_19 = nba_games.loc[mask_18_19]

In [24]:
nba_games_15_16.head()

Unnamed: 0,abbreviation,SEASON_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,COMBINED_FINAL_SCORE
377,ATL,22015,ATL,Atlanta Hawks,21501221,2016-04-13,ATL @ WAS,L,240,98,...,9,38,47,22,13,5,22,21,-11.0,207
378,ATL,22015,ATL,Atlanta Hawks,21501203,2016-04-11,ATL @ CLE,L,240,94,...,10,32,42,23,8,6,15,18,-15.0,203
379,ATL,22015,ATL,Atlanta Hawks,21501188,2016-04-09,ATL vs. BOS,W,240,118,...,5,39,44,31,10,10,17,22,11.0,225
380,ATL,22015,ATL,Atlanta Hawks,21501173,2016-04-07,ATL vs. TOR,W,239,95,...,5,36,41,23,4,12,13,19,8.0,182
381,ATL,22015,ATL,Atlanta Hawks,21501157,2016-04-05,ATL vs. PHX,W,242,103,...,13,37,50,26,16,3,16,21,13.0,193


In [25]:
nba_games_15_16.loc[nba_games['TEAM_ABBREVIATION'] == 'LAL'].head()

Unnamed: 0,abbreviation,SEASON_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,COMBINED_FINAL_SCORE
6640,LAL,22015,LAL,Los Angeles Lakers,21501228,2016-04-13,LAL vs. UTA,W,239,101,...,8,39,47,19,6,3,13,17,5.0,197
6641,LAL,22015,LAL,Los Angeles Lakers,21501209,2016-04-11,LAL @ OKC,L,240,79,...,10,38,48,15,8,1,15,20,-33.0,191
6642,LAL,22015,LAL,Los Angeles Lakers,21501195,2016-04-10,LAL @ HOU,L,239,110,...,7,33,40,25,10,3,18,15,-20.0,240
6643,LAL,22015,LAL,Los Angeles Lakers,21501184,2016-04-08,LAL @ NOP,L,240,102,...,10,24,34,18,9,5,13,23,-8.0,212
6644,LAL,22015,LAL,Los Angeles Lakers,21501172,2016-04-06,LAL vs. LAC,L,241,81,...,16,33,49,12,6,1,12,16,-10.0,172


In [26]:
nba_games.columns

Index(['abbreviation', 'SEASON_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME',
       'GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA',
       'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB',
       'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS',
       'COMBINED_FINAL_SCORE'],
      dtype='object')