# NHL API Scraper v1.1 (Beta)

A hockey scraper for the new NHL API. The `game_id` parameter is the same used to call the NHL API. This script tries to imporve on the speed and efficiency of the v1.0 which was largely smashed together just to get something to work. The `hockey_scraper` library now works again and is currently faster than this scraper but may as well use this scraper as I can get the exact data that I want from it. 

### Implemented Functions

- `get_away_roster(game_id)`: returns a dictionary with player names and IDs of the away roster.
- `get_home_roster(game_id)`: returns a dictionary with player names and IDs of the home roster.
- `get_game_roster(game_id)`: returns a dictionary with player names and IDs of both teams.

- `get_away_positions(game_id)`: returns a dictionary with player IDs and player positions of the away roster.
- `get_home_positions(game_id)`: returns a dictionary with player IDs and player positions of the home roster.
- `get_game_positions(game_id)`: returns a dictionary with player IDs and player positions of both teams.

- `get_play_by_play(game_id)`: returns a pandas DataFrame of the play-by-play. 
- `get_multi_play_by_play([list of game ids])`: returns a pandas DataFrame of the play-by-play of all the listed games. 


### Other Functions to Implement

- ?

<br>

### Methods of Improvement

- Faster for loop methods (use 'for item in list' rather than 'for i in range(len(list))'
- Cutting out unnecessary loop run throughs
- Differentiation of strength state into how many skaters on ice

<br>

In [1]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
game_id = 2023020172

## Player Names and IDs Dictionary

In [3]:
def get_away_roster(game_id):
    
    url = 'https://api-web.nhle.com/v1/gamecenter/{}/play-by-play'.format(game_id)
    
    try:
        pbp = requests.get(url)
        pbp_data = pbp.json()
        
    except Exception as e:
        print('URL does not exist for Game_Id {}'.format(game_id)) 
        
    else:
        game_roster, awayTeam_Id, away_roster = pbp_data.get('rosterSpots'), pbp_data.get('awayTeam').get('id'), {}

        for spot in game_roster:
            if spot.get('teamId') == awayTeam_Id:
                playerName = ''.join([spot.get('firstName').get('default'),' ',spot.get('lastName').get('default')]).upper()
                away_roster.update({spot.get('playerId') : playerName})
                
        return away_roster

In [4]:
# Testing
# get_away_roster(2023020200) 

In [5]:
def get_home_roster(game_id):
    
    url = 'https://api-web.nhle.com/v1/gamecenter/{}/play-by-play'.format(game_id)
    
    try:
        pbp = requests.get(url)
        pbp_data = pbp.json()
        
    except Exception as e:
        print('URL does not exist for Game_Id {}'.format(game_id)) 
        
    else:
        game_roster, homeTeam_Id, home_roster = pbp_data.get('rosterSpots'), pbp_data.get('homeTeam').get('id'), {}

        for spot in game_roster:
            if spot.get('teamId') == homeTeam_Id:
                playerName = ''.join([spot.get('firstName').get('default'),' ',spot.get('lastName').get('default')]).upper()
                home_roster.update({spot.get('playerId') : playerName})
                
        return home_roster

In [6]:
# Testing
# get_home_roster(2023020001)

In [7]:
def get_game_roster(game_id):
    
    url = 'https://api-web.nhle.com/v1/gamecenter/{}/play-by-play'.format(game_id)
    
    try:
        pbp = requests.get(url)
        pbp_data = pbp.json()
        
    except Exception as e:
        print('URL does not exist for Game_Id {}'.format(game_id)) 
        
    else:
        game_roster, team_rosters = pbp_data.get('rosterSpots'), {}
        for spot in game_roster:
            playerName = ''.join([spot.get('firstName').get('default'),' ',spot.get('lastName').get('default')]).upper()
            team_rosters.update({spot.get('playerId') : playerName})
            
    return team_rosters

In [8]:
# Testing
# get_game_roster(2023020001)

<br>

## Getting Player Positions

For the purposes of my model, skaters are classified as either forwards or defencemen with no differentiation between centers or wingers. 

In [9]:
def get_away_positions(game_id):
    
    url = 'https://api-web.nhle.com/v1/gamecenter/{}/play-by-play'.format(game_id)
    forwards = ['C','R','L']
    
    try:
        pbp = requests.get(url)
        pbp_data = pbp.json()
        
    except Exception as e:
        print('URL does not exist for Game_Id {}'.format(game_id)) 
        
    else:
        game_roster, positions = pbp_data.get('rosterSpots'), {}
        for spot in game_roster:
            if spot.get('teamId') == pbp_data.get('awayTeam').get('id'):
                if spot.get('positionCode') in forwards:
                    positions.update({ spot.get('playerId') : 'F'})
                else:
                    positions.update({ spot.get('playerId') : spot.get('positionCode')})

        return positions

In [10]:
# Testing
# get_away_positions(2023020001)

In [11]:
def get_home_positions(game_id):
    
    url = 'https://api-web.nhle.com/v1/gamecenter/{}/play-by-play'.format(game_id)
    forwards = ['C','R','L']
    
    try:
        pbp = requests.get(url)
        pbp_data = pbp.json()
        
    except Exception as e:
        print('URL does not exist for Game_Id {}'.format(game_id)) 
        
    else:
        game_roster, positions = pbp_data.get('rosterSpots'), {}
        for spot in game_roster:
            if spot.get('teamId') == pbp_data.get('homeTeam').get('id'):
                if spot.get('positionCode') in forwards:
                    positions.update({ spot.get('playerId') : 'F'})
                else:
                    positions.update({ spot.get('playerId') : spot.get('positionCode')})

        return positions

In [12]:
# Testing
# get_home_positions(2023020001)

In [13]:
def get_game_positions(game_id):
    
    url = 'https://api-web.nhle.com/v1/gamecenter/{}/play-by-play'.format(game_id)
    forwards = ['C','R','L']
    
    try:
        pbp = requests.get(url)
        pbp_data = pbp.json()
        
    except Exception as e:
        print('URL does not exist for Game_Id {}'.format(game_id)) 
        
    else:
        game_roster, positions = pbp_data.get('rosterSpots'), {}
        for spot in game_roster:
            if spot.get('positionCode') in forwards:
                positions.update({ spot.get('playerId') : 'F'})
            else:
                positions.update({ spot.get('playerId') : spot.get('positionCode')})

        return positions

In [14]:
# Testing
# get_game_positions(2023020001)

<br>

### Getting Goalies (Helper Function)

In [15]:
def get_goalies_id(game_id):
    
    roster = get_game_positions(game_id)
    
    try:
        goalies = [key for key in roster.keys() if roster.get(key) == 'G']
    
    except Exception as e:
        print('Unable to get goalie IDs for Game_Id {}'.format(game_id))
        
    else:    
        return goalies

In [16]:
# Tester
# get_goalies_id(2023020105)

<br>

### Known Event typeCodes

In [17]:
typeCodes = {502 : 'FACEOFF', 503 : 'HIT', 504 : 'GIVEAWAY', 505 : 'GOAL', 506 : 'SHOT_ON_GOAL', 507 : 'MISSED_SHOT',
             508 : 'BLOCKED_SHOT', 509 : 'PENALTY', 516 : 'STOPPAGE', 520 : 'PERIOD_START', 521 : 'PERIOD_END',
             523 : 'SHOOTOUT_COMPLETE', 524 : 'GAME_END', 525 : 'TAKEAWAY', 535 : 'DELAYED_PENALTY', 
             537 : 'FAILED_SHOT_ATTEMPT'}

shotCodes = [505,506,507,508]

<br>

## Play-By-Play DataFrame

This combines the play-by-play and shift data. Most of the chosen columns were inspired by Harry Shomer's public hockey scraper that I had used previously. 

In [18]:
def parse_event(play_dict):
    
    event_dict_keys = ['Period','Event_tc','Event','Time_Elapsed','Strength','Ev_Zone','Type','Ev_Team','p1_name','p1_ID',
                       'p2_name','p2_ID','p3_name','p3_ID','xC','yC']
    
    event_dict = dict()
    
    # Common play items across all plays
    event_dict['Period'] = play_dict['period']
    event_dict['Event_tc'] = play_dict['typeCode']
    event_dict['Event'] = play_dict['typeDescKey'].upper()
    event_dict['Time_Elapsed'] = play_dict['timeRemaining']
    event_dict['Strength'] = play_dict['situationCode']
    event_dict['sort_order'] = play_dict['sortOrder']
      
    # Below is applicable for FACEOFF, HIT, GIVEAWAY, GOAL, SHOT_ON_GOAL, MISSED_SHOT, BLOCKED_SHOT, PENALTY, TAKEAWAY, DELAYED_PENALTY    
    if 'details' in play_dict.keys(): 
        if 'zoneCode' in play_dict['details'].keys():
            event_dict['Ev_Zone'] = play_dict['details']['zoneCode']
        if 'xCoord' in play_dict['details'].keys():
            event_dict['xC'] = play_dict['details']['xCoord']
            event_dict['yC'] = play_dict['details']['yCoord']
        
        if event_dict['Event_tc'] == 502: # Faceoffs
            if 'winningPlayerId' in play_dict['details'].keys():
                event_dict['p1_ID'] = play_dict['details']['winningPlayerId']
                event_dict['Ev_Team'] = play_dict['details']['eventOwnerTeamId']
            if 'losingPlayerId' in play_dict['details'].keys():
                event_dict['p2_ID'] = play_dict['details']['losingPlayerId']
            
        if event_dict['Event_tc'] == 503: # Hits
            if 'hittingPlayerId' in play_dict['details'].keys():
                event_dict['p1_ID'] = play_dict['details']['hittingPlayerId']
                event_dict['Ev_Team'] = play_dict['details']['eventOwnerTeamId']
            if 'hitteePlayerId' in play_dict['details'].keys():
                event_dict['p2_ID'] = play_dict['details']['hitteePlayerId']
  
        if event_dict['Event_tc'] == 504: # Giveaways
            if 'playerId' in play_dict['details'].keys():
                event_dict['p1_ID'] = play_dict['details']['playerId']
                event_dict['Ev_Team'] = play_dict['details']['eventOwnerTeamId']
            
        if event_dict['Event_tc'] in [505,506,507,508]: # Goals, Shots_On_Goal, Missed_Shots, Blocked_Shots
            if 'scoringPlayerId' in play_dict['details'].keys():
                event_dict['p1_ID'] = play_dict['details']['scoringPlayerId']
            if 'shootingPlayerId' in play_dict['details'].keys():
                event_dict['p1_ID'] = play_dict['details']['shootingPlayerId']
                event_dict['Ev_Team'] = play_dict['details']['eventOwnerTeamId']
            if 'assist1PlayerId' in play_dict['details'].keys():
                event_dict['p2_ID'] = play_dict['details']['assist1PlayerId']
            if 'assist2PlayerId' in play_dict['details'].keys():
                event_dict['p3_ID'] = play_dict['details']['assist2PlayerId']
            if 'blockingPlayerId' in play_dict['details'].keys():
                event_dict['p2_ID'] = play_dict['details']['blockingPlayerId']
            if 'type' in play_dict['details'].keys():
                event_dict['Type'] = play_dict['details']['shotType'].upper()
            if 'homeScore' in play_dict['details'].keys():
                event_dict['Home_Score'] = play_dict['details']['homeScore']
                event_dict['Away_Score'] = play_dict['details']['awayScore']
            
        if event_dict['Event_tc'] == 509: # Penalties
            if 'committedByPlayerId' in play_dict['details'].keys():
                event_dict['p1_ID'] = play_dict['details']['committedByPlayerId']
                event_dict['Ev_Team'] = play_dict['details']['eventOwnerTeamId']
            if 'drawnByPlayerId' in play_dict['details'].keys():
                event_dict['p2_ID'] = play_dict['details']['drawnByPlayerId']
            if 'descKey' in play_dict['details'].keys():
                event_dict['Type'] = play_dict['details']['typeCode'] + ' for ' + play_dict['details']['descKey'].upper()
            
        if event_dict['Event_tc'] == 525: # Takeaways
            if 'playerId' in play_dict['details'].keys():
                event_dict['p1_ID'] = play_dict['details']['playerId']
                event_dict['Ev_Team'] = play_dict['details']['eventOwnerTeamId']
            
        if event_dict['Event_tc'] == 535: # Delayed Penalties
            if 'eventOwnerTeamId' in play_dict['details'].keys():
                event_dict['Ev_Team'] = play_dict['details']['eventOwnerTeamId']
            
        # Failed_Shot_Attempts?
        
    return event_dict

In [19]:
def get_pbp_improvement_beta(game_id):
    
    url = 'https://api-web.nhle.com/v1/gamecenter/{}/play-by-play'.format(game_id)
    
    try:
        pbp = requests.get('https://api-web.nhle.com/v1/gamecenter/'+str(game_id)+'/play-by-play')
        pbp_data = pbp.json()
    
    except Exception as e:
        print('Unable to get play-by-play for Game_Id {}'.format(game_id))
        return None
        
    else:
        plays = pbp_data['plays']
        events = [parse_event(play) for play in plays]
        pbp_df = pd.DataFrame(events)
        
        awayTeam_Id, homeTeam_Id, game_roster = pbp_data.get('awayTeam').get('id'), pbp_data.get('homeTeam').get('id'), get_game_roster(game_id)
        goalie_ids = get_goalies_id(game_id)
        NoneType = type(None)
    
        away_on_ice_player_ids = ['awayPlayer1_id','awayPlayer2_id','awayPlayer3_id','awayPlayer4_id','awayPlayer5_id',
                                  'awayPlayer6_id']
        home_on_ice_player_ids = ['homePlayer1_id','homePlayer2_id','homePlayer3_id','homePlayer4_id','homePlayer5_id',
                                  'homePlayer6_id']
    
        cols = ['Game_Id','Date','Period','Event_tc','Event','Time_Elapsed','Strength','Ev_Zone','Type','Ev_Team','Away_Team',
                'Home_Team','p1_name','p1_ID','p2_name','p2_ID','p3_name','p3_ID','awayPlayer1','awayPlayer1_id','awayPlayer2',
                'awayPlayer2_id','awayPlayer3','awayPlayer3_id','awayPlayer4','awayPlayer4_id','awayPlayer5','awayPlayer5_id',
                'awayPlayer6','awayPlayer6_id','homePlayer1','homePlayer1_id','homePlayer2','homePlayer2_id','homePlayer3',
                'homePlayer3_id','homePlayer4','homePlayer4_id','homePlayer5','homePlayer5_id','homePlayer6','homePlayer6_id',
                'Away_Score','Home_Score','Away_Goalie','Away_Goalie_Id','Home_Goalie','Home_Goalie_Id','xC','yC']
        
        pbp_df['Away_Team'] = pbp_data.get('awayTeam').get('abbrev')
        pbp_df['Home_Team'] = pbp_data.get('homeTeam').get('abbrev')
        for col in cols:
            if col not in pbp_df.columns:
                pbp_df[col] = None
        
        # Adding Shift Data
        shifts = requests.get('https://api.nhle.com/stats/rest/en/shiftcharts?cayenneExp=gameId={}'.format(game_id))

        shift_data = shifts.json()
        shift_data.get('data')

        homePlayerList = ['homePlayer1_id','homePlayer2_id','homePlayer3_id','homePlayer4_id','homePlayer5_id','homePlayer6_id']
        homePlayerList_names = ['homePlayer1','homePlayer2','homePlayer3','homePlayer4','homePlayer5','homePlayer6']
        awayPlayerList = ['awayPlayer1_id','awayPlayer2_id','awayPlayer3_id','awayPlayer4_id','awayPlayer5_id','awayPlayer6_id']
        awayPlayerList_names = ['awayPlayer1','awayPlayer2','awayPlayer3','awayPlayer4','awayPlayer5','awayPlayer6']

        for i in range(len(shift_data.get('data'))):

            shift = shift_data.get('data')[i]

            period = shift.get('period')
            shift_start = datetime.strptime(shift.get('startTime'),'%M:%S')
            shift_end = datetime.strptime(shift.get('endTime'),'%M:%S')

            for j in range(len(pbp_df)):
                
                if j == 0:
                    pbp_df.at[j,'Away_Score'] = 0
                    pbp_df.at[j,'Home_Score'] = 0
                elif pbp_df.at[j,'Event_tc'] != 505:
                    pbp_df.at[j,'Away_Score'] = pbp_df.at[j-1,'Away_Score']
                    pbp_df.at[j,'Home_Score'] = pbp_df.at[j-1,'Home_Score']

                time_elapsed = datetime.strptime(pbp_df.at[j,'Time_Elapsed'],'%M:%S')

                awayPlayerList_loop = [pbp_df.at[j,'awayPlayer1_id'],pbp_df.at[j,'awayPlayer2_id'],pbp_df.at[j,'awayPlayer3_id'],
                                       pbp_df.at[j,'awayPlayer4_id'],pbp_df.at[j,'awayPlayer5_id'],pbp_df.at[j,'awayPlayer6_id']]
                homePlayerList_loop = [pbp_df.at[j,'homePlayer1_id'],pbp_df.at[j,'homePlayer2_id'],pbp_df.at[j,'homePlayer3_id'],
                                       pbp_df.at[j,'homePlayer4_id'],pbp_df.at[j,'homePlayer5_id'],pbp_df.at[j,'homePlayer6_id']]

                if (period == pbp_df.at[j,'Period']) & (shift_start <= time_elapsed < shift_end):

                    if shift.get('teamId') == awayTeam_Id:
                        if shift.get('playerId') not in awayPlayerList_loop:
                            for k in range(len(awayPlayerList)):
                                if pd.isna(pbp_df.at[j,awayPlayerList[k]]) == True:
                                    pbp_df.at[j,awayPlayerList[k]] = shift.get('playerId')
                                    pbp_df.at[j,awayPlayerList_names[k]] = game_roster.get(shift.get('playerId'))
                                    break

                    if shift.get('teamId') == homeTeam_Id:
                        if shift.get('playerId') not in homePlayerList_loop:
                            for k in range(len(homePlayerList)):
                                if pd.isna(pbp_df.at[j,homePlayerList[k]]) == True:
                                    pbp_df.at[j,homePlayerList[k]] = shift.get('playerId')
                                    pbp_df.at[j,homePlayerList_names[k]] = game_roster.get(shift.get('playerId'))
                                    break
                                    
                for p_id in awayPlayerList_loop:
                    if p_id in goalie_ids:
                        pbp_df.at[j,'Away_Goalie_Id'] = p_id
                        pbp_df.at[j,'Away_Goalie'] = game_roster.get(p_id)
                        
                for p_id in homePlayerList_loop:
                    if p_id in goalie_ids:
                        pbp_df.at[j,'Home_Goalie_Id'] = p_id
                        pbp_df.at[j,'Home_Goalie'] = game_roster.get(p_id)
        
        return pd.DataFrame(pbp_df)

<br>

### Best Current Scraper

In [20]:
def get_play_by_play(game_id):
    
    url = 'https://api-web.nhle.com/v1/gamecenter/{}/play-by-play'.format(game_id)
    
    try:
        pbp = requests.get('https://api-web.nhle.com/v1/gamecenter/'+str(game_id)+'/play-by-play')
        pbp_data = pbp.json()
    
    except Exception as e:
        print('Unable to get play-by-play for Game_Id {}'.format(game_id))
        
    else:
        print('Scraping Game Id',game_id) # Print status message
    
        awayTeam_Id, homeTeam_Id, game_roster = pbp_data.get('awayTeam').get('id'), pbp_data.get('homeTeam').get('id'), get_game_roster(game_id)
        goalie_ids = get_goalies_id(game_id)
        NoneType = type(None)
    
        away_on_ice_player_ids = ['awayPlayer1_id','awayPlayer2_id','awayPlayer3_id','awayPlayer4_id','awayPlayer5_id',
                                  'awayPlayer6_id']
        home_on_ice_player_ids = ['homePlayer1_id','homePlayer2_id','homePlayer3_id','homePlayer4_id','homePlayer5_id',
                                  'homePlayer6_id']
    
        cols = ['Game_Id','Date','Period','Event_tc','Event','Time_Elapsed','Strength','Ev_Zone','Type','Ev_Team','Away_Team',
                'Home_Team','p1_name','p1_ID','p2_name','p2_ID','p3_name','p3_ID','awayPlayer1','awayPlayer1_id','awayPlayer2',
                'awayPlayer2_id','awayPlayer3','awayPlayer3_id','awayPlayer4','awayPlayer4_id','awayPlayer5','awayPlayer5_id',
                'awayPlayer6','awayPlayer6_id','homePlayer1','homePlayer1_id','homePlayer2','homePlayer2_id','homePlayer3',
                'homePlayer3_id','homePlayer4','homePlayer4_id','homePlayer5','homePlayer5_id','homePlayer6','homePlayer6_id',
                'Away_Score','Home_Score','Away_Goalie','Away_Goalie_Id','Home_Goalie','Home_Goalie_Id','xC','yC']

        pbp_df = pd.DataFrame(index=np.arange(len(pbp_data.get('plays'))),columns=cols)
        
        pbp_df['Game_Id'] = pbp_data.get('id')
        pbp_df['Date'] = pbp_data.get('gameDate')
        pbp_df['Away_Team'] = pbp_data.get('awayTeam').get('abbrev')
        pbp_df['Home_Team'] = pbp_data.get('homeTeam').get('abbrev')
    
        for i in range(len(pbp_data.get('plays'))):
            
            this_play = pbp_data.get('plays')[i]
    
            pbp_df.at[i,'Period'] = this_play.get('period')
            pbp_df.at[i,'Event_tc'] = this_play.get('typeCode')
            pbp_df.at[i,'Event'] = typeCodes.get(this_play.get('typeCode'))
            pbp_df.at[i,'Time_Elapsed'] = this_play.get('timeInPeriod')
            pbp_df.at[i,'Strength'] = str(this_play.get('situationCode'))
    
            if i == 0:
                pbp_df.at[i,'Away_Score'] = 0
                pbp_df.at[i,'Home_Score'] = 0
            elif this_play.get('typeCode') != 505:
                pbp_df.at[i,'Away_Score'] = pbp_df.at[i-1,'Away_Score']
                pbp_df.at[i,'Home_Score'] = pbp_df.at[i-1,'Home_Score']
   
            if this_play.get('typeCode') in [502,503,504,505,506,507,508,509,525,537]: # If Event has xC and yC
                pbp_df.at[i,'xC'] = this_play.get('details').get('xCoord')
                pbp_df.at[i,'yC'] = this_play.get('details').get('yCoord')
                pbp_df.at[i,'Ev_Zone'] = this_play.get('details').get('zoneCode')
                if this_play.get('details').get('eventOwnerTeamId') == awayTeam_Id:
                    pbp_df.at[i,'Ev_Team'] = pbp_data.get('awayTeam').get('abbrev')
                else:
                    pbp_df.at[i,'Ev_Team'] = pbp_data.get('homeTeam').get('abbrev')
            
            if this_play.get('typeCode') == 502: # If it's a faceoff
                pbp_df.at[i,'p1_ID'] = this_play.get('details').get('winningPlayerId')
                pbp_df.at[i,'p1_name'] = game_roster.get(this_play.get('details').get('winningPlayerId'))
                pbp_df.at[i,'p2_ID'] = this_play.get('details').get('losingPlayerId')
                pbp_df.at[i,'p2_name'] = game_roster.get(this_play.get('details').get('losingPlayerId'))
        
            if this_play.get('typeCode') == 503: # If it's a hit
                pbp_df.at[i,'p1_ID'] = this_play.get('details').get('hittingPlayerId')
                pbp_df.at[i,'p1_name'] = game_roster.get(this_play.get('details').get('hittingPlayerId'))
                pbp_df.at[i,'p2_ID'] = this_play.get('details').get('hitteePlayerId')
                pbp_df.at[i,'p2_name'] = game_roster.get(this_play.get('details').get('hitteePlayerId'))
    
            if this_play.get('typeCode') == 504: # If it's a giveaway
                pbp_df.at[i,'p1_ID'] = this_play.get('details').get('playerId')
                pbp_df.at[i,'p1_name'] = game_roster.get(this_play.get('details').get('playerId'))
            
            if this_play.get('typeCode') in shotCodes: # If the play is a shot type
                if this_play.get('details').get('eventOwnerTeamId') == pbp_data.get('awayTeam').get('id'): # Away team shooting
                    pbp_df.at[i,'Home_Goalie_Id'] = this_play.get('details').get('goalieInNetId')
                    pbp_df.at[i,'Home_Goalie'] = game_roster.get(this_play.get('details').get('goalieInNetId'))     
                if this_play.get('details').get('eventOwnerTeamId') == pbp_data.get('homeTeam').get('id'): # Home team shooting
                    pbp_df.at[i,'Away_Goalie_Id'] = this_play.get('details').get('goalieInNetId')
                    pbp_df.at[i,'Away_Goalie'] = game_roster.get(this_play.get('details').get('goalieInNetId'))            
        
            if this_play.get('typeCode') == 505: # If it's a goal
                if type(this_play.get('details').get('shotType')) != NoneType: # If shotType is available
                    pbp_df.at[i,'Type'] = this_play.get('details').get('shotType').upper()
                pbp_df.at[i,'p1_ID'] = this_play.get('details').get('scoringPlayerId')
                pbp_df.at[i,'p1_name'] = game_roster.get(this_play.get('details').get('scoringPlayerId'))
                pbp_df.at[i,'p2_ID'] = this_play.get('details').get('assist1PlayerId')
                pbp_df.at[i,'p2_name'] = game_roster.get(this_play.get('details').get('assist1PlayerId'))
                pbp_df.at[i,'p3_ID'] = this_play.get('details').get('assist2PlayerId')
                pbp_df.at[i,'p3_name'] = game_roster.get(this_play.get('details').get('assist2PlayerId'))
                pbp_df.at[i,'Away_Score'] = this_play.get('details').get('awayScore')
                pbp_df.at[i,'Home_Score'] = this_play.get('details').get('homeScore')

            if this_play.get('typeCode') == 506: # If it's a shot on goal
                if type(this_play.get('details').get('shotType')) != NoneType: # If shotType is not available
                    pbp_df.at[i,'Type'] = this_play.get('details').get('shotType').upper()
                pbp_df.at[i,'p1_ID'] = this_play.get('details').get('shootingPlayerId')
                pbp_df.at[i,'p1_name'] = game_roster.get(this_play.get('details').get('shootingPlayerId'))

            if this_play.get('typeCode') == 507: # If it's a missed shot
                if type(this_play.get('details').get('shotType')) != NoneType: # If shotType is not available
                    pbp_df.at[i,'Type'] = this_play.get('details').get('shotType').upper()
                pbp_df.at[i,'p1_ID'] = this_play.get('details').get('shootingPlayerId')
                pbp_df.at[i,'p1_name'] = game_roster.get(this_play.get('details').get('shootingPlayerId'))

            if this_play.get('typeCode') == 508: # If it's blocked shot
                pbp_df.at[i,'p1_ID'] = this_play.get('details').get('blockingPlayerId')
                pbp_df.at[i,'p1_name'] = game_roster.get(this_play.get('details').get('blockingPlayerId'))
                pbp_df.at[i,'p2_ID'] = this_play.get('details').get('shootingPlayerId')
                pbp_df.at[i,'p2_name'] = game_roster.get(this_play.get('details').get('shootingPlayerId'))  
        
            if this_play.get('typeCode') == 509: # If it's a penalty
                pbp_df.at[i,'Type'] = this_play.get('details').get('typeCode') + ' for ' + this_play.get('details').get('descKey').upper()
                pbp_df.at[i,'p1_ID'] = this_play.get('details').get('committedByPlayerId')
                pbp_df.at[i,'p1_name'] = game_roster.get(this_play.get('details').get('committedByPlayerId'))
                pbp_df.at[i,'p2_ID'] = this_play.get('details').get('drawnByPlayerId')
                pbp_df.at[i,'p2_name'] = game_roster.get(this_play.get('details').get('drawnByPlayerId'))

            if this_play.get('typeCode') == 516: # If it's a stoppage
                pbp_df.at[i,'Type'] = this_play.get('details').get('reason').upper()

            if this_play.get('typeCode') == 525: # If it's a takeaway
                pbp_df.at[i,'p1_ID'] = this_play.get('details').get('playerId')
                pbp_df.at[i,'p1_name'] = game_roster.get(this_play.get('details').get('playerId'))

    #      if this_play.get('typeCode') == 535: # If it's a delayed penalty


            if this_play.get('typeCode') == 537: # If it's a failed shot attempt
                if type(this_play.get('details').get('shotType')) != NoneType: # If shotType is not available
                    pbp_df.at[i,'Type'] = this_play.get('details').get('shotType').upper()
                pbp_df.at[i,'p1_ID'] = this_play.get('details').get('shootingPlayerId')
                pbp_df.at[i,'p1_name'] = game_roster.get(this_play.get('details').get('shootingPlayerId'))

        # Adding Shift Data
        shifts = requests.get('https://api.nhle.com/stats/rest/en/shiftcharts?cayenneExp=gameId={}'.format(game_id))

        shift_data = shifts.json()
        shift_data.get('data')

        homePlayerList = ['homePlayer1_id','homePlayer2_id','homePlayer3_id','homePlayer4_id','homePlayer5_id','homePlayer6_id']
        homePlayerList_names = ['homePlayer1','homePlayer2','homePlayer3','homePlayer4','homePlayer5','homePlayer6']
        awayPlayerList = ['awayPlayer1_id','awayPlayer2_id','awayPlayer3_id','awayPlayer4_id','awayPlayer5_id','awayPlayer6_id']
        awayPlayerList_names = ['awayPlayer1','awayPlayer2','awayPlayer3','awayPlayer4','awayPlayer5','awayPlayer6']

        for i in range(len(shift_data.get('data'))):

            shift = shift_data.get('data')[i]

            period = shift.get('period')
            shift_start = datetime.strptime(shift.get('startTime'),'%M:%S')
            shift_end = datetime.strptime(shift.get('endTime'),'%M:%S')

            for j in range(len(pbp_df)):

                time_elapsed = datetime.strptime(pbp_df.at[j,'Time_Elapsed'],'%M:%S')

                awayPlayerList_loop = [pbp_df.at[j,'awayPlayer1_id'],pbp_df.at[j,'awayPlayer2_id'],pbp_df.at[j,'awayPlayer3_id'],
                                       pbp_df.at[j,'awayPlayer4_id'],pbp_df.at[j,'awayPlayer5_id'],pbp_df.at[j,'awayPlayer6_id']]
                homePlayerList_loop = [pbp_df.at[j,'homePlayer1_id'],pbp_df.at[j,'homePlayer2_id'],pbp_df.at[j,'homePlayer3_id'],
                                       pbp_df.at[j,'homePlayer4_id'],pbp_df.at[j,'homePlayer5_id'],pbp_df.at[j,'homePlayer6_id']]

                if (period == pbp_df.at[j,'Period']) & (shift_start <= time_elapsed < shift_end):

                    if shift.get('teamId') == awayTeam_Id:
                        if shift.get('playerId') not in awayPlayerList_loop:
                            for k in range(len(awayPlayerList)):
                                if pd.isna(pbp_df.at[j,awayPlayerList[k]]) == True:
                                    pbp_df.at[j,awayPlayerList[k]] = shift.get('playerId')
                                    pbp_df.at[j,awayPlayerList_names[k]] = game_roster.get(shift.get('playerId'))
                                    break

                    if shift.get('teamId') == homeTeam_Id:
                        if shift.get('playerId') not in homePlayerList_loop:
                            for k in range(len(homePlayerList)):
                                if pd.isna(pbp_df.at[j,homePlayerList[k]]) == True:
                                    pbp_df.at[j,homePlayerList[k]] = shift.get('playerId')
                                    pbp_df.at[j,homePlayerList_names[k]] = game_roster.get(shift.get('playerId'))
                                    break
                                
        return pbp_df

In [21]:
# %%time

# # Testing
# get_play_by_play(game_id)

<br>

## Getting Play-By-Play for Multiple Games

In [22]:
def get_multi_play_by_play(range_of_ids):
    
    df = get_play_by_play(range_of_ids[0])
    
    for i in range(1,len(range_of_ids)):
        df2 = get_play_by_play(range_of_ids[i])
        df = pd.concat([df,df2],axis=0)
        
    return df

In [23]:
# Testing
# get_multi_play_by_play(game_ids)

<br>