## 1. Data Munging

### Preliminary setup

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import urllib
import json
import pandas as pd

In [3]:
# Pandas config
pd.options.display.max_rows = 1600
pd.options.display.max_columns = 50

# Pyplot config
plt.rcParams.update({'font.size': 14})

In [4]:
# NHL season data
start_year = 1917 # 1917 is the first year with data
end_year = 2022
all_years = range(start_year, end_year, 1)
seasons = ['{}{}'.format(year, year+1) for year in all_years]
    
# Cup winners
#cup_winners = pd.read_csv('cup-winners.csv', names=['year', 'team', 'abbr'])
#cup_winners.loc[3] = ['2020', 'To Be Determined', 'TBD']
#cup_winners.index = seasons

In [5]:
req = urllib.request.urlopen('https://statsapi.web.nhl.com/api/v1/teams')
req_json = req.read().decode()
req_teams = dict(json.loads(req_json))['teams']
req_names = [team['name'] for team in req_teams]

In [6]:
# NHL Team Abbreviation Index

req = urllib.request.urlopen('https://statsapi.web.nhl.com/api/v1/teams')
req_json = req.read().decode()
req_teams = dict(json.loads(req_json))['teams']
req_names = [team['name'] for team in req_teams]

teams_long = ['Anaheim Ducks',
              'Arizona Coyotes',
              'Atlanta Flames',
              'Atlanta Thrashers',
              'Boston Bruins',
              'Brooklyn Americans',
              'Buffalo Sabres',
              'Calgary Flames',
              'California Golden Seals',
              'Carolina Hurricanes',
              'Chicago Blackhawks',
              'Cleveland Barons',
              'Colorado Avalanche',
              'Colorado Rockies',
              'Columbus Blue Jackets',
              'Dallas Stars',
              'Detroit Cougars',
              'Detroit Falcons',
              'Detroit Red Wings',
              'Edmonton Oilers',
              'Florida Panthers',
              'Hamilton Tigers',
              'Hartford Whalers',
              'Kansas City Scouts',
              'Los Angeles Kings',
              'Minnesota North Stars',
              'Minnesota Wild',
              'Montréal Canadiens',
              'Montreal Maroons',
              'Montreal Wanderers',
              'Nashville Predators',
              'New Jersey Devils',
              'New York Americans',
              'New York Islanders',
              'New York Rangers',
              'Oakland Seals',
              'Ottawa Senators',
              'Ottawa Senators (1917)',
              'Phoenix Coyotes',
              'Philadelphia Flyers',
              'Philadelphia Quakers',
              'Pittsburgh Penguins',
              'Pittsburgh Pirates',
              'Quebec Bulldogs',
              'Quebec Nordiques',
              'San Jose Sharks',
              'Seattle Kraken',
              'St. Louis Blues',
              'St. Louis Eagles',
              'Tampa Bay Lightning',
              'Toronto Arenas',
              'Toronto Maple Leafs',
              'Toronto St. Patricks',
              'Vancouver Canucks',
              'Vegas Golden Knights',
              'Washington Capitals',
              'Winnipeg Jets',
              'Winnipeg Jets (1979)']

teams_short = ['ANA',
               'ARI',
               'AFM',
               'ATL',
               'BOS',
               'BRK',
               'BUF',
               'CGY',
               'CGS',
               'CAR',
               'CHI',
               'CLE',
               'COL',
               'CLR',
               'CBJ',
               'DAL',
               'DCG',
               'DFL',
               'DET',
               'EDM',
               'FLA',
               'HAM',
               'HFD',
               'KCS',
               'LAK',
               'MNS',
               'MIN',
               'MTL',
               'MMR',
               'MWN',
               'NSH',
               'NJD',
               'NYA',
               'NYI',
               'NYR',
               'OAK',
               'OTT',
               'SEN',
               'PHX',
               'PHI',
               'QUA',
               'PIT',
               'PIT',
               'QBD',
               'QUE',
               'SJS',
               'SEA',
               'STL',
               'SLE',
               'TBL',
               'TAN',
               'TOR',
               'TSP',
               'VAN',
               'VGK',
               'WSH',
               'WPG', 
               'WIN']

team_index = dict(zip(teams_long, teams_short))

### Clean build for current season

In [7]:
### THIS IS A CLEAN BUILD FOR THE CURRENT SEASON, JUST A TEMPLATE ###

# First get the team data into a dataframe
teams_req = urllib.request.urlopen('https://statsapi.web.nhl.com/api/v1/teams?expand=team.stats')
teams_json = teams_req.read().decode()
teams_meta = dict(json.loads(teams_json))
teams_dict = {key['abbreviation']:key for key in teams_meta['teams']}
teams_df = pd.DataFrame.from_dict(teams_dict)

stats_dict = {team:teams_df[team]['teamStats'][0]['splits'][0]['stat'] for team in teams_df.columns}
stats_df = pd.DataFrame.from_dict(stats_dict, orient='index')

# Next we can get player rosters
players_req = urllib.request.urlopen('https://statsapi.web.nhl.com/api/v1/teams?expand=team.roster')
players_json = players_req.read().decode()
players_meta = dict(json.loads(players_json))

### Pulling and building for all seasons

#### Team data

In [8]:
# Pull team data for all seasons from NHL API
teams_reqs = [urllib.request.urlopen( \
              'https://statsapi.web.nhl.com/api/v1/teams?expand=team.stats&season={}'.format( \
              season)) for season in seasons]

In [9]:
# Decode into readable JSON-like format
teams_jsons = [req.read().decode() for req in teams_reqs]

In [10]:
# Load in as JSON and collect appropriate segments into dictionary
teams_metas = [dict(json.loads(fread))['teams'] for fread in teams_jsons]

In [11]:
# Create list of all team data for each season
teams_dicts = [{key['abbreviation']:key for key in season} for season in teams_metas]

In [12]:
# Create master dictionary for all teams for each season
season_dict = dict(zip(seasons, teams_dicts))

In [13]:
# Create stats dictionary
stats_dict = {}

for key in season_dict:
    # Some seasons have no stats available, NaN results in KeyError
    try:
        stats_dict[key] = {season_dict[key][team]['abbreviation'] : \
                       season_dict[key][team]['teamStats'][0]['splits'][0]['stat'] \
                       for team in season_dict[key]}
    except KeyError:
    # Populate teams one at a time in years where some teams have no stats
        stats_dict[key] = {}
        for team in season_dict[key]:
            try:
                stats_dict[key][team] = season_dict[key][team]['teamStats'][0]['splits'][0]['stat']
            except KeyError:
                # Empty dictionary for teams with no stats (rather than NaN)
                stats_dict[key][team] = {key:0 for key in season_dict['20002001']['COL']['teamStats'][0]['splits'][0]['stat']}

In [14]:
# Create master dictionary of seasonal stats dataframes for each season
stats_dfs = {key:pd.DataFrame.from_dict(stats_dict[key], orient='index') for key in stats_dict}

In [15]:
# Append a new boolean array column to each seasonal dataframe to indicate Stanley Cup winner
#for key in stats_dfs:
#    win_bool = stats_dfs[key].index == cup_winners.loc[key]['abbr']
#    stats_dfs[key]['cupWin'] = win_bool

#### Player data (via ID lookup)

In [16]:
# Pull the roster data? 
player_reqs = [urllib.request.urlopen( \
              'https://statsapi.web.nhl.com/api/v1/teams?expand=team.roster&season={}'.format( \
              season)) for season in seasons]

In [17]:
# Decode into readable JSON-like format
player_jsons = [req.read().decode() for req in player_reqs]

In [18]:
# Load in as JSON and collect appropriate segments into dictionary
player_metas = [dict(json.loads(fread))['teams'] for fread in player_jsons]

In [19]:
# Create a dictionary that connects each season to each team that played that season, and connect each of those teams to their corresponding roster data
rosters = []

for season in player_metas:
    try:
        rosters.append({team['abbreviation']:team['roster']['roster'] for team in season})
        
    except KeyError:
        team_dict = {}
        for team in season:
            try:
                team_dict[team['abbreviation']] = team['roster']['roster']
            except KeyError:
                team_dict[team['abbreviation']] = {}
        rosters.append(team_dict)

season_dicts = dict(zip(seasons, rosters))

In [20]:
# Gather all of the player IDs and their full names
all_ids = [season_dicts[season][team][x]['person']['id'] for season in seasons for team in season_dicts[season].keys() for x in range(len(season_dicts[season][team]))]
all_names = [season_dicts[season][team][x]['person']['fullName'] for season in seasons for team in season_dicts[season].keys() for x in range(len(season_dicts[season][team]))]

# Then combine them into a dictionary
id_dict = dict(zip(all_names, all_ids))

In [34]:
# Create a function that returns a player's year-by-year statistics

def get_player_df(player):
    # Make API request and parse into python data structure
    stat_req = urllib.request.urlopen('https://statsapi.web.nhl.com/api/v1/people/{}/stats?stats=yearByYear'.format(id_dict[player]))
    stat_json = stat_req.read().decode()
    player_stat = dict(json.loads(stat_json))['stats'][0]['splits']
    
    # Convert to pandas dataframe with team column
    player_stats = [player_season['stat'] for player_season in player_stat if player_season['league']['name'] == 'National Hockey League']
    player_seasons = [player_season['season'] for player_season in player_stat if player_season['league']['name'] == 'National Hockey League']
    player_teams = [player_season['team']['name'] for player_season in player_stat if player_season['league']['name'] == 'National Hockey League']
    teams_abb = [team_index[team] for team in player_teams]
    player_df = pd.DataFrame(player_stats, index=player_seasons)
    player_df.insert(loc=0, column='team', value=teams_abb)
    
    return player_df

In [41]:
player_df = get_player_df('Jaromir Jagr')

#### END