In [3]:
import pickle
from os import path
import pandas as pd
from datetime import datetime

In [4]:
with open(path.join('game_data.pkl'), 'rb') as f:
    data = pickle.load(f)

In [5]:
# Filter regular season data
# All regular season games have game_ids starting with '002'
reg_season_data = {k: v for k, v in data.items() if str(k[:3]) == '002'}

In [6]:
# Sort games by season
games_by_season = {}
for year in range(1983, 2020):
    season = str(year) + '-' + str(year + 1)
    games_by_season[season] = []

for game in reg_season_data.values():
    game_date = datetime.strptime(game['game_date'], '%Y-%m-%d')
    if game_date.month <= 6:
        season = str(game_date.year - 1) + '-' + str(game_date.year)
    else:
        season = str(game_date.year) + '-' + str(game_date.year + 1)
    games_by_season[season].append(game)

In [7]:
def get_team_stats_for_game(game, team_name, team_status):
    # get regular season stats
    reg_stats = pd.DataFrame(dict(game[team_status + '_team_stats']), index=[0]).drop('MIN', axis=1)
    stats = reg_stats
    
    # check advanced stats exist
    if not game['advanced_team_stats'].empty:
        adv_stats = game['advanced_team_stats'][game['advanced_team_stats']['TEAM_ABBREVIATION'] == team_name].reset_index(drop=True).drop('MIN', axis=1)
        stats = pd.concat([reg_stats, adv_stats], axis=1)
        
    return stats

season_games_by_team = {}

for season in games_by_season:
    season_games_by_team[season] = {}
    
    for game in games_by_season[season]:
        home_team_name = game['home_team']
        home_team_stats = get_team_stats_for_game(game, home_team_name, 'home')
        if home_team_name in season_games_by_team[season]:
            season_games_by_team[season][home_team_name] = pd.concat([season_games_by_team[season][home_team_name], home_team_stats])
        else:
            season_games_by_team[season][home_team_name] = home_team_stats

        away_team_name = game['away_team']
        away_team_stats = get_team_stats_for_game(game, away_team_name, 'away')
        if away_team_name in season_games_by_team[season]:
            season_games_by_team[season][away_team_name] = pd.concat([season_games_by_team[season][away_team_name], away_team_stats])
        else:
            season_games_by_team[season][away_team_name] = away_team_stats

In [8]:
# season_averages_by_team = {}

# for season in season_games_by_team:
#     season_averages_by_team[season] = {}
#     for team in season_games_by_team[season]:
#         season_averages_by_team[season][team] = season_games_by_team[season][team].mean()

In [30]:
season_averages_by_team = pd.DataFrame()

avoid = ['GAME_ID', 'TEAM_ID', 'TEAM_NAME','TEAM_ABBREVIATION', 'TEAM_CITY']
for season in season_games_by_team:
    for team in season_games_by_team[season]:
        print(team, season)
        season_average = pd.DataFrame({'season': season, 'team': team}, index=[0])
        team_season_stats = season_games_by_team[season][team]

        for col in team_season_stats.columns:
            if col not in avoid:
                season_average[col] = team_season_stats[col].mean()
        
        season_averages_by_team = season_averages_by_team.append(season_average)

ATL 1983-1984
DET 1983-1984
NJN 1983-1984
IND 1983-1984
CHI 1983-1984
WAS 1983-1984
NYK 1983-1984
BOS 1983-1984
PHL 1983-1984
HOU 1983-1984
DAL 1983-1984
SAN 1983-1984
KCK 1983-1984
CLE 1983-1984
MIL 1983-1984
POR 1983-1984
LAL 1983-1984
DEN 1983-1984
UTH 1983-1984
SDC 1983-1984
PHX 1983-1984
GOS 1983-1984
SEA 1983-1984
ATL 1984-1985
IND 1984-1985
CHI 1984-1985
NYK 1984-1985
WAS 1984-1985
PHL 1984-1985
DET 1984-1985
MIL 1984-1985
NJN 1984-1985
CLE 1984-1985
KCK 1984-1985
POR 1984-1985
SEA 1984-1985
GOS 1984-1985
BOS 1984-1985
SAN 1984-1985
DEN 1984-1985
LAC 1984-1985
LAL 1984-1985
UTH 1984-1985
DAL 1984-1985
PHX 1984-1985
HOU 1984-1985
ATL 1985-1986
IND 1985-1986
NJN 1985-1986
CHI 1985-1986
WAS 1985-1986
CLE 1985-1986
PHL 1985-1986
MIL 1985-1986
DAL 1985-1986
SAN 1985-1986
NYK 1985-1986
BOS 1985-1986
DEN 1985-1986
DET 1985-1986
LAL 1985-1986
UTH 1985-1986
SEA 1985-1986
POR 1985-1986
SAC 1985-1986
HOU 1985-1986
GOS 1985-1986
LAC 1985-1986
PHX 1985-1986
BOS 1986-1987
ATL 1986-1987
CHI 19

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


 1996-1997
DET 1996-1997
CHH 1996-1997
SAC 1996-1997
LAC 1996-1997
POR 1996-1997
TOR 1996-1997
DAL 1996-1997
ORL 1996-1997
CHI 1996-1997
SEA 1996-1997
UTA 1996-1997
BOS 1996-1997
MIL 1996-1997
CLE 1996-1997
LAL 1996-1997
GSW 1996-1997
HOU 1996-1997
SAS 1996-1997
PHX 1996-1997
DEN 1996-1997
VAN 1996-1997
WAS 1996-1997
MIA 1996-1997
ATL 1997-1998
MIA 1997-1998
CHH 1997-1998
IND 1997-1998
PHI 1997-1998
WAS 1997-1998
NYK 1997-1998
MIN 1997-1998
HOU 1997-1998
TOR 1997-1998
NJN 1997-1998
DET 1997-1998
CHI 1997-1998
ORL 1997-1998
MIL 1997-1998
BOS 1997-1998
CLE 1997-1998
DEN 1997-1998
VAN 1997-1998
SEA 1997-1998
PHX 1997-1998
POR 1997-1998
SAS 1997-1998
GSW 1997-1998
DAL 1997-1998
UTA 1997-1998
LAL 1997-1998
SAC 1997-1998
LAC 1997-1998
ATL 1998-1999
WAS 1998-1999
MIA 1998-1999
CLE 1998-1999
IND 1998-1999
NYK 1998-1999
MIL 1998-1999
BOS 1998-1999
TOR 1998-1999
ORL 1998-1999
DET 1998-1999
CHH 1998-1999
VAN 1998-1999
NJN 1998-1999
PHI 1998-1999
CHI 1998-1999
PHX 1998-1999
LAC 1998-1999
HOU 1998-

IND 2016-2017
ATL 2016-2017
CHA 2016-2017
CLE 2016-2017
BOS 2016-2017
BKN 2016-2017
CHI 2016-2017
PHI 2016-2017
PHX 2016-2017
MIL 2016-2017
WAS 2016-2017
POR 2016-2017
MEM 2016-2017
SAS 2016-2017
TOR 2016-2017
GSW 2016-2017
DAL 2016-2017
ORL 2016-2017
MIA 2016-2017
LAC 2016-2017
SAC 2016-2017
DEN 2016-2017
UTA 2016-2017
HOU 2016-2017
NYK 2016-2017
DET 2016-2017
NOP 2016-2017
MIN 2016-2017
OKC 2016-2017
LAL 2016-2017
ATL 2017-2018
PHI 2017-2018
BOS 2017-2018
WAS 2017-2018
MIA 2017-2018
ORL 2017-2018
MIN 2017-2018
HOU 2017-2018
GSW 2017-2018
SAC 2017-2018
UTA 2017-2018
MIL 2017-2018
CHA 2017-2018
OKC 2017-2018
CHI 2017-2018
IND 2017-2018
TOR 2017-2018
PHX 2017-2018
LAL 2017-2018
DET 2017-2018
CLE 2017-2018
MEM 2017-2018
NYK 2017-2018
NOP 2017-2018
SAS 2017-2018
BKN 2017-2018
DEN 2017-2018
LAC 2017-2018
POR 2017-2018
DAL 2017-2018
ATL 2018-2019
IND 2018-2019
MIL 2018-2019
ORL 2018-2019
PHI 2018-2019
SAS 2018-2019
POR 2018-2019
NOP 2018-2019
UTA 2018-2019
HOU 2018-2019
BOS 2018-2019
MEM 20

In [36]:
season_averages_by_team.reset_index(drop=True).to_feather('season_averages_by_team.data')