1. Extract teams by id and abbreviations
2. Extract game data for each team
3. Split into seasons (format: xxxx = year)
    Note: 4xxxx - playoff, 2 - reg, 1 - preseason, non-summer league games gameID start with 00

In [25]:
from nba_api.stats.endpoints import commonplayerinfo, playercareerstats, leaguegamefinder, commonteamroster
from nba_api.stats.static import players, teams
from os import path
import pandas as pd
import pickle

In [26]:
""" for saving and loading things to a pickle file. Don't need to add extension to the file name """
def save_obj(obj, filename, dirname='pickle_files', ):
    with open(path.join(dirname, filename + '.pkl'), 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(filename, dirname='pickle_files'):
    with open(path.join(dirname, filename + '.pkl'), 'rb') as f:
        return pickle.load(f)

In [16]:
teams_df = pd.DataFrame(teams.get_teams())
teams_df

Unnamed: 0,abbreviation,city,full_name,id,nickname,state,year_founded
0,ATL,Atlanta,Atlanta Hawks,1610612737,Hawks,Atlanta,1949
1,BOS,Boston,Boston Celtics,1610612738,Celtics,Massachusetts,1946
2,CLE,Cleveland,Cleveland Cavaliers,1610612739,Cavaliers,Ohio,1970
3,NOP,New Orleans,New Orleans Pelicans,1610612740,Pelicans,Louisiana,2002
4,CHI,Chicago,Chicago Bulls,1610612741,Bulls,Illinois,1966
5,DAL,Dallas,Dallas Mavericks,1610612742,Mavericks,Texas,1980
6,DEN,Denver,Denver Nuggets,1610612743,Nuggets,Colorado,1976
7,GSW,Golden State,Golden State Warriors,1610612744,Warriors,California,1946
8,HOU,Houston,Houston Rockets,1610612745,Rockets,Texas,1967
9,LAC,Los Angeles,Los Angeles Clippers,1610612746,Clippers,California,1970


In [4]:
team_ids = teams_df.loc[:, ("id","abbreviation")]
team_ids

Unnamed: 0,id,abbreviation
0,1610612737,ATL
1,1610612738,BOS
2,1610612739,CLE
3,1610612740,NOP
4,1610612741,CHI
5,1610612742,DAL
6,1610612743,DEN
7,1610612744,GSW
8,1610612745,HOU
9,1610612746,LAC


In [5]:
team_ids["abbreviation"]

0     ATL
1     BOS
2     CLE
3     NOP
4     CHI
5     DAL
6     DEN
7     GSW
8     HOU
9     LAC
10    LAL
11    MIA
12    MIL
13    MIN
14    BKN
15    NYK
16    ORL
17    IND
18    PHI
19    PHX
20    POR
21    SAC
22    SAS
23    OKC
24    TOR
25    UTA
26    MEM
27    WAS
28    DET
29    CHA
Name: abbreviation, dtype: object

In [None]:
# *** should only have to run the loading cell below ***
# get team info into dataframes
# store dictionary key = team abbreviation, value = df with game data
import time

team_names = {}
for i in range(len(team_ids)): 
    temp_id = team_ids["id"][i]
    temp_team_name = team_ids["abbreviation"][i]
    print('getting team: {0}...'.format(temp_team_name))
    team_names[temp_team_name] = leaguegamefinder.LeagueGameFinder(team_id_nullable=temp_id).get_data_frames()[0]
    time.sleep(1)

In [27]:
""" save dictionary to a file called team_names under pickle_files folder"""
save_obj(team_names, 'team_names')

In [28]:
""" load the team_names dictionary in pickle_files """
team_names = load_obj('team_names')

In [35]:
""" gets the season games given the season and abbreviation of the team """
""" Season is based on the year that the season STARTED in. I.E 2016-2017 has seasonID 2016"""
def get_season_team(season, team_abr, season_class):
    season = str(season)
    return team_names[team_abr].loc[team_names[team_abr]["SEASON_ID"].str.contains(season) and \
                                    team_names[team_abr]['GAME_ID'].str.startswith('00')]

In [76]:
""" 
get the 2018 season games for the Atlanta Hawks. 
None of the season IDs start with 4 cuz they suck and didn't make the playoffs sorry Trae
"""
atl_reg_season = get_season_team(2018, "ATL")
atl_reg_season
#len(atl_reg_season.index)
# atl_reg_season
# atl_reg_season.iloc[90]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
15,22018,1610612737,ATL,Atlanta Hawks,0021801220,2019-04-10,ATL vs. IND,L,240,134,...,0.816,22,39,61,29,5.0,7,17,25,-1.0
16,22018,1610612737,ATL,Atlanta Hawks,0021801202,2019-04-07,ATL @ MIL,L,240,107,...,0.526,9,39,48,25,2.0,3,11,28,-8.0
17,22018,1610612737,ATL,Atlanta Hawks,0021801181,2019-04-05,ATL @ ORL,L,240,113,...,0.677,10,28,38,21,16.0,4,14,21,-36.0
18,22018,1610612737,ATL,Atlanta Hawks,0021801168,2019-04-03,ATL vs. PHI,W,240,130,...,0.786,11,33,44,29,7.0,7,11,26,8.0
19,22018,1610612737,ATL,Atlanta Hawks,0021801162,2019-04-02,ATL @ SAS,L,240,111,...,0.650,11,32,43,26,13.0,2,11,18,-6.0
20,22018,1610612737,ATL,Atlanta Hawks,0021801145,2019-03-31,ATL vs. MIL,W,265,136,...,0.588,15,43,58,36,10.0,10,9,13,1.0
21,22018,1610612737,ATL,Atlanta Hawks,0021801131,2019-03-29,ATL vs. POR,L,239,98,...,0.929,8,33,41,20,5.0,7,10,22,-20.0
22,22018,1610612737,ATL,Atlanta Hawks,0021801113,2019-03-26,ATL @ NOP,W,239,130,...,0.885,9,32,41,30,13.0,4,16,26,10.0
23,22018,1610612737,ATL,Atlanta Hawks,0021801088,2019-03-23,ATL vs. PHI,W,240,129,...,0.762,15,34,49,28,8.0,7,12,25,2.0
24,22018,1610612737,ATL,Atlanta Hawks,0021801076,2019-03-21,ATL vs. UTA,W,239,117,...,0.696,9,34,43,26,8.0,5,16,23,3.0


In [30]:
gamefinder.loc[gamefinder["SEASON_ID"].str.contains("2018")]

NameError: name 'gamefinder' is not defined

In [31]:
team_names["ATL"].columns

Index(['SEASON_ID', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT',
       'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB',
       'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PLUS_MINUS'],
      dtype='object')