## Testing out some code to hit the NHL API


Resources

* https://towardsdatascience.com/nhl-analytics-with-python-6390c5d3206d
* https://nhl-api-explorer.herokuapp.com/
* https://gitlab.com/dword4/nhlapi
* https://gitlab.com/dword4/nhlapi/-/blob/master/stats-api.md#game-ids
* https://github.com/dword4/nhlapi

Game IDs
* The first 4 digits identify the season of the game (ie. 2017 for the 2017-2018 season). 
* The next 2 digits give the type of game, where 01 = preseason, 02 = regular season, 03 = playoffs, 04 = all-star. 
* The final 4 digits identify the specific game number. 
* For regular season and preseason games, this ranges from 0001 to the number of games played. (1271 for seasons with 31 teams (2017 and onwards) and 1230 for seasons with 30 teams). 
* For playoff games, the 2nd digit of the specific number gives the round of the playoffs, the 3rd digit specifies the matchup, and the 4th digit specifies the game (out of 7).

In [336]:
# Load dependencies
import requests
import pickle
import pandas as pd
import json

In [337]:
# Set up the API call variables
year = '2021'
season_type = '02' 
max_game_id = 1290
boxscore_url = '/boxscore'
livefeed_url = '/feed/live'
game_url = 'http://statsapi.web.nhl.com/api/v1/game/'

In [365]:
# Test hitting the boxscore and live feed endpoints, save as JSON files
def request_get_data(url, 
                    year, 
                    season_type, 
                    max_game_id, 
                    endpoint,
                    empty_list = [],
                    zfill = 4, 
                    multiple_games = True):

    # If multiple_games param is true, the get data for n number of games (uses for loop)
    if multiple_games == True:    
        for i in range(1, max_game_id, 1):
            game_id = str(url) + str(year) + str(season_type) + str(i).zfill(zfill) + str(endpoint)
            r = requests.get(game_id)
            data = r.json()
            data['game_id'] = game_id
            empty_list.append(data)

    # If multiple_games param is not true, then get data for the 1 game (no for loop)
    else:
        game_id = str(url) + str(year) + str(season_type) + str(max_game_id).zfill(zfill) + str(endpoint)
        r = requests.get(game_id)
        data = r.json()
        data['game_id'] = game_id
        empty_list.append(data)

    # Rename to game_data_json
    game_data_list = empty_list

    return game_data_list

# Function that takes in a list, converts to json, then saves a copy of that json file in the local folder
def save_as_json(list, 
                 file_name = 'data'):

    # Save as json file in same folder the notebook is run (relative)
    with open(f'{file_name}.json', 'w') as f:
        json.dump(list, f)

    # Convert list to json
    j_data = json.dumps(list)
 
    
    return j_data

In [366]:
# For a single game, save json files for both the boxscore and livefeed api
single_game_id = 5

# Boxscore
boxscore_list = request_get_data(game_url, 
                                year, 
                                season_type, 
                                single_game_id, 
                                boxscore_url,
                                multiple_games = True)
boxscore_json = save_as_json(boxscore_list, 'test_data/boxscore')

# Livefeed
livefeed_list = request_get_data(game_url, 
                                year, 
                                season_type, 
                                single_game_id, 
                                livefeed_url,
                                multiple_games = True)
livefeed_json = save_as_json(livefeed_list, 'test_data/livefeed')

In [367]:
max_game_id = 1290

z_id = str(game_url) + str(year) + str(season_type) + str(max_game_id).zfill(4) + str(boxscore_url)
for i in range(0, max_game_id, 1):
    z_id = str(game_url) + str(year) + str(season_type) + str(i).zfill(4) + str(boxscore_url)
    print(f'game: {i}, game_id: {z_id}')

game: 0, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020000/boxscore
game: 1, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020001/boxscore
game: 2, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020002/boxscore
game: 3, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020003/boxscore
game: 4, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020004/boxscore
game: 5, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020005/boxscore
game: 6, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020006/boxscore
game: 7, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020007/boxscore
game: 8, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020008/boxscore
game: 9, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020009/boxscore
game: 10, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020010/boxscore
game: 11, game_id: http://statsapi.web.nhl.com/api/v1/game/2021020011/boxscore
game: 12, game_id: http://statsapi.web.nhl.com/api/v1/game/202

In [368]:
# Setup json object
json_object = json.loads(boxscore_json)

games = len(json_object)
for game in range(1, (games), 1):
    # HOME TEAM: For each game, get the homes team's player information
    print(game)

1
2
3


In [371]:
# Setup json object
json_object = json.loads(boxscore_json)

# Set up iterable variables
games = len(json_object)
#player_id = list(json_object[0]['teams']['home']['players'].keys())
raw = []

# Loop over every game
for game in range(0, (games), 1):

    # Loop over each home player in each team
    for player in list(json_object[game]['teams']['home']['players'].keys()):
        player_data = json_object[game]['teams']['home']['players'][player]['person']
        player_data ["home_team"] = 1
        player_data["pid"] = player
        raw.append(player_data)

    # Loop over each away player in each team
    for player in list(json_object[game]['teams']['away']['players'].keys()):
        player_data = json_object[game]['teams']['away']['players'][player]['person']
        player_data ["home_team"] = 0
        player_data["pid"] = player
        raw.append(player_data)

# Convert to pandas dataframe
df1 = pd.DataFrame.from_dict(raw)

# Flatten currentTeam
df2 = pd.concat([df1.drop(['currentTeam'], axis=1), df1['currentTeam'].apply(pd.Series)], axis=1)
df2 = df2.rename(columns={"id": "team_id", "name": "team_name", "link": "team_link"})

# Flatten primaryPosition
df2 = pd.concat([df2.drop(['primaryPosition'], axis=1), df2['primaryPosition'].apply(pd.Series)], axis=1)
df2 = df2.rename(columns={"code": "pos_code", "name": "pos_name", "type": "pos_type", "abbreviation": "pos_abbr"})

# See the data
df2.head(3)

Unnamed: 0,team_id,fullName,team_link,firstName,lastName,primaryNumber,birthDate,currentAge,birthCity,birthCountry,...,home_team,pid,birthStateProvince,team_id.1,team_name,team_link.1,pos_code,pos_name,pos_type,pos_abbr
0,8480172,Jan Rutta,/api/v1/people/8480172,Jan,Rutta,44,1990-07-29,31,Pisek,CZE,...,1,ID8480172,,14,Tampa Bay Lightning,/api/v1/teams/14,D,Defenseman,Defenseman,D
1,8478519,Anthony Cirelli,/api/v1/people/8478519,Anthony,Cirelli,71,1997-07-15,24,Etobicoke,CAN,...,1,ID8478519,ON,14,Tampa Bay Lightning,/api/v1/teams/14,C,Center,Forward,C
2,8478416,Erik Cernak,/api/v1/people/8478416,Erik,Cernak,81,1997-05-28,24,Kosice,SVK,...,1,ID8478416,,14,Tampa Bay Lightning,/api/v1/teams/14,D,Defenseman,Defenseman,D


In [372]:
df2.groupby('team_name').count()

Unnamed: 0_level_0,team_id,fullName,team_link,firstName,lastName,primaryNumber,birthDate,currentAge,birthCity,birthCountry,...,rosterStatus,home_team,pid,birthStateProvince,team_id,team_link,pos_code,pos_name,pos_type,pos_abbr
team_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Montréal Canadiens,21,21,21,21,21,21,21,21,21,21,...,21,21,21,18,21,21,21,21,21,21
New York Rangers,22,22,22,22,22,22,22,22,22,22,...,22,22,22,14,22,22,22,22,22,22
Pittsburgh Penguins,22,22,22,22,22,22,22,22,22,22,...,22,22,22,18,22,22,22,22,22,22
Seattle Kraken,22,22,22,22,22,22,22,22,22,22,...,22,22,22,17,22,22,22,22,22,22
Tampa Bay Lightning,21,21,21,21,21,21,21,21,21,21,...,21,21,21,13,21,21,21,21,21,21
Toronto Maple Leafs,21,21,21,21,21,21,21,21,21,21,...,21,21,21,16,21,21,21,21,21,21
Vegas Golden Knights,22,22,22,22,22,22,22,22,22,22,...,22,22,22,17,22,22,22,22,22,22
Washington Capitals,22,22,22,22,22,22,22,22,22,22,...,22,22,22,14,22,22,22,22,22,22


In [357]:
# Boxscore JSON
# Get the number of shots, goals and icetime by game for each skater
json_object = json.loads(boxscore_json)

# first layer = the game (number)
# second layerS

data = []
empty_pd = pd.DataFrame()
player_id = list(json_object[0]['teams']['home']['players'].keys())

for i in player_id:
    player_data = {
        'player_id': i,
        'player_name': json_object[0]['teams']['home']['players'][i]['person']['fullName'],
        'player_pos': json_object[0]['teams']['home']['players'][i]['person']['primaryPosition']['name'],
        #'goals': json_object[0]['teams']['home']['players'][i]['stats']['skaterStats'].keys()
        #'shots': json_object[0]['teams']['home']['players'][i]['stats']['skaterStats']['shots'],
        #'toi': json_object[0]['teams']['home']['players'][i]['stats']['skaterStats']['timeOnIce']
    }
    data.append(player_data)
        

data

[{'player_id': 'ID8480172',
  'player_name': 'Jan Rutta',
  'player_pos': 'Defenseman'},
 {'player_id': 'ID8478519',
  'player_name': 'Anthony Cirelli',
  'player_pos': 'Center'},
 {'player_id': 'ID8478416',
  'player_name': 'Erik Cernak',
  'player_pos': 'Defenseman'},
 {'player_id': 'ID8474567',
  'player_name': 'Zach Bogosian',
  'player_pos': 'Defenseman'},
 {'player_id': 'ID8470621',
  'player_name': 'Corey Perry',
  'player_pos': 'Right Wing'},
 {'player_id': 'ID8473986',
  'player_name': 'Alex Killorn',
  'player_pos': 'Left Wing'},
 {'player_id': 'ID8476453',
  'player_name': 'Nikita Kucherov',
  'player_pos': 'Right Wing'},
 {'player_id': 'ID8475167',
  'player_name': 'Victor Hedman',
  'player_pos': 'Defenseman'},
 {'player_id': 'ID8479525',
  'player_name': 'Ross Colton',
  'player_pos': 'Center'},
 {'player_id': 'ID8476883',
  'player_name': 'Andrei Vasilevskiy',
  'player_pos': 'Goalie'},
 {'player_id': 'ID8474151',
  'player_name': 'Ryan McDonagh',
  'player_pos': 'Defens

In [358]:
# Boxscore JSON
# Get the number of shots, goals and icetime by game for each skater
json_object = json.loads(boxscore_json)

# first layer = the game (number)
# second layerS

raw_list = []
player_id = list(json_object[0]['teams']['home']['players'].keys())

# Get the boxscore stats for each player
for i in player_id:
    #person = json_object[0]['teams']['home']['players'][i]['person']['fullName']
    #data["fullName"] = person
    data["pid"] = i
    data = json_object[0]['teams']['home']['players'][i]['stats']
    raw_list.append(data)

# Flatten the pandas dataframe
df= pd.json_normalize(raw_list, sep='_')

# Show first few rows
df.head(3)

TypeError: list indices must be integers or slices, not str

In [359]:
json_object[0]['teams']['home']['players'].keys()

dict_keys(['ID8480172', 'ID8478519', 'ID8478416', 'ID8474567', 'ID8470621', 'ID8473986', 'ID8476453', 'ID8475167', 'ID8479525', 'ID8476883', 'ID8474151', 'ID8477930', 'ID8479984', 'ID8474564', 'ID8479410', 'ID8474034', 'ID8476292', 'ID8478472', 'ID8478010', 'ID8479383', 'ID8479390'])

In [360]:
for key in json_object[0]['teams']['home']['players']:
    print(key)

ID8480172
ID8478519
ID8478416
ID8474567
ID8470621
ID8473986
ID8476453
ID8475167
ID8479525
ID8476883
ID8474151
ID8477930
ID8479984
ID8474564
ID8479410
ID8474034
ID8476292
ID8478472
ID8478010
ID8479383
ID8479390
