In [78]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from datetime import date
import requests

import nba_api
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder

## Define season dates

In [79]:
#2015-2016 regular season start and end dates
start_15_16 = '2015-10-27'
end_15_16 = '2016-04-13'

#2016-2017 regular season start and end dates
start_16_17 = '2016-10-25'
end_16_17 = '2017-04-12'

#2016-2017 regular season start and end dates
start_17_18 = '2017-10-17'
end_17_18 = '2018-04-11'

#2018-2019 regular season start and end dates
start_18_19 = '2018-10-16'
end_18_19 = '2019-04-10'

## Team Info

In [77]:
teams_df = pd.DataFrame(teams.get_teams())
teams_df.rename(columns={'id':'TEAM_ID'}, inplace=True)
teams_df.head()

Unnamed: 0,abbreviation,city,full_name,TEAM_ID,nickname,state,year_founded
0,ATL,Atlanta,Atlanta Hawks,1610612737,Hawks,Atlanta,1949
1,BOS,Boston,Boston Celtics,1610612738,Celtics,Massachusetts,1946
2,CLE,Cleveland,Cleveland Cavaliers,1610612739,Cavaliers,Ohio,1970
3,NOP,New Orleans,New Orleans Pelicans,1610612740,Pelicans,Louisiana,2002
4,CHI,Chicago,Chicago Bulls,1610612741,Bulls,Illinois,1966


## Get all the games in the LeagueGameFinder

In [81]:
# Get **all** the games so we can filter to an individual GAME_ID
result = leaguegamefinder.LeagueGameFinder()
all_games = result.get_data_frames()[0]
all_games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22019,1612709926,MHU,Memphis Hustle,2021900026,2020-03-15,MHU vs. IWA,,120,56,...,1.0,7,17,24,13,5,1,4,7,15.6
1,22019,1612709911,IWA,Iowa Wolves,2021900026,2020-03-15,IWA @ MHU,,107,41,...,0.833,2,15,17,8,1,7,11,8,-16.2
2,22019,1612709920,RAP,Raptors 905,2021900566,2020-03-11,RAP vs. CPS,W,240,129,...,0.731,14,43,57,23,9,10,21,26,12.6
3,22019,1612709911,IWA,Iowa Wolves,2021900567,2020-03-11,IWA @ RGV,W,240,128,...,1.0,8,36,44,34,11,4,17,22,8.6
4,22019,1612709929,CPS,College Park Skyhawks,2021900566,2020-03-11,CPS @ RAP,L,240,116,...,0.538,8,29,37,27,10,8,15,31,-12.2


In [82]:
#match NBA teams from 'teams_df' with 'all_games' to remove
#all non NBA-games from 'all_games' 
nba_games = pd.merge(teams_df, all_games, on = 'TEAM_ID', how = 'inner')

#remove columns in merged df coming from 'teams_df'
nba_games = nba_games.drop(list(teams_df.columns)[1:],1)

nba_games.head()

Unnamed: 0,abbreviation,SEASON_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,ATL,22019,ATL,Atlanta Hawks,21900969,2020-03-11,ATL vs. NYK,L,265,131,...,0.808,15,38,53,26,6,3,17,25,0.0
1,ATL,22019,ATL,Atlanta Hawks,21900957,2020-03-09,ATL vs. CHA,W,290,143,...,0.593,12,41,53,33,1,5,15,26,5.0
2,ATL,22019,ATL,Atlanta Hawks,21900943,2020-03-07,ATL @ MEM,L,240,101,...,0.857,14,32,46,23,9,2,14,24,-17.0
3,ATL,22019,ATL,Atlanta Hawks,21900930,2020-03-06,ATL @ WAS,L,239,112,...,0.667,6,30,36,25,9,4,17,25,-6.0
4,ATL,22019,ATL,Atlanta Hawks,21900905,2020-03-02,ATL vs. MEM,L,239,88,...,0.938,16,27,43,20,6,8,17,21,-39.0


In [83]:
#mask to filter dates within range
mask_15_16 = (nba_games['GAME_DATE'] >= start_15_16) & (nba_games['GAME_DATE'] <= end_15_16)
mask_16_17 = (nba_games['GAME_DATE'] >= start_16_17) & (nba_games['GAME_DATE'] <= end_16_17)
mask_17_18 = (nba_games['GAME_DATE'] >= start_17_18) & (nba_games['GAME_DATE'] <= end_17_18)
mask_18_19 = (nba_games['GAME_DATE'] >= start_18_19) & (nba_games['GAME_DATE'] <= end_18_19)

In [84]:
#regular season games held between given seasons
nba_games_15_16 = nba_games.loc[mask_15_16]
nba_games_16_17 = nba_games.loc[mask_16_17]
nba_games_17_18 = nba_games.loc[mask_17_18]
nba_games_18_19 = nba_games.loc[mask_18_19]

In [85]:
nba_games_15_16.head()

Unnamed: 0,abbreviation,SEASON_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
377,ATL,22015,ATL,Atlanta Hawks,21501221,2016-04-13,ATL @ WAS,L,240,98,...,0.742,9,38,47,22,13,5,22,21,-11.0
378,ATL,22015,ATL,Atlanta Hawks,21501203,2016-04-11,ATL @ CLE,L,240,94,...,0.533,10,32,42,23,8,6,15,18,-15.0
379,ATL,22015,ATL,Atlanta Hawks,21501188,2016-04-09,ATL vs. BOS,W,240,118,...,0.818,5,39,44,31,10,10,17,22,11.0
380,ATL,22015,ATL,Atlanta Hawks,21501173,2016-04-07,ATL vs. TOR,W,239,95,...,0.81,5,36,41,23,4,12,13,19,8.0
381,ATL,22015,ATL,Atlanta Hawks,21501157,2016-04-05,ATL vs. PHX,W,242,103,...,0.737,13,37,50,26,16,3,16,21,13.0


In [86]:
nba_games_15_16.keys()

Index(['abbreviation', 'SEASON_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME',
       'GAME_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA',
       'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB',
       'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS'],
      dtype='object')

In [None]:
#so we want to add a column to the df which contains the total points scored by both teams
unique_games = nba_games_15_16.groupby('GAME_ID').sum()

In [91]:
#lets start to build our features


Unnamed: 0_level_0,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
TEAM_ABBREVIATION,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
ATL,19823,8433,3168,6923,37.607,815,2326,28.835,1282,1638,63.797,679,2772,3451,2100,747,486,1191,1570,295.8
BKN,19767,8089,3136,6920,37.227,531,1508,28.572,1286,1699,61.675,863,2614,3477,1829,627,332,1174,1476,-605.0
BOS,19793,8669,3216,7318,36.091,717,2142,27.495,1520,1929,64.673,950,2733,3683,1981,752,348,1104,1796,263.0
CHA,19871,8479,3036,6922,35.995,873,2410,29.647,1534,1941,65.206,734,2869,3603,1778,595,438,973,1487,223.0
CHI,19905,8335,3165,7170,36.344,651,1753,30.248,1354,1720,64.125,907,2889,3796,1870,495,470,1093,1545,-121.0
CLE,19859,8555,3171,6888,37.88,880,2428,29.721,1333,1783,61.43,873,2777,3650,1861,551,317,1056,1666,492.0
DAL,19990,8388,3064,6900,36.473,806,2342,28.127,1454,1831,65.025,751,2781,3532,1813,560,306,1011,1595,-25.2
DEN,19844,8355,3093,7003,36.273,656,1943,27.62,1513,1974,63.126,941,2718,3659,1858,609,395,1162,1723,-254.0
DET,19879,8361,3111,7087,36.11,740,2148,28.191,1399,2095,55.364,1021,2777,3798,1594,573,304,1063,1557,52.0
GSW,19873,9421,3489,7159,40.055,1077,2592,33.967,1366,1790,62.963,816,2972,3788,2373,689,498,1219,1701,891.0
