In [2]:
import pandas as pd 
import requests
import os
import json
import numpy as np

In [3]:
#pull data from NHL API
game_url = 'https://statsapi.web.nhl.com/api/v1/game/2023010071/feed/live'
response = requests.get(game_url)
game_info = response.json()

In [4]:
from IPython.display import JSON
JSON(game_info)

<IPython.core.display.JSON object>

In [5]:
#create empty lists to pull from json and create dataframe 
team_id = []
home_team = []
period = []
event_id = []
event_index = []
event_type = []
x_coor = [] 
y_coor = []

In [6]:
#pull data from JSON into a list - for event type 
live_game_info = game_info['liveData']['plays']['allPlays']

for i in range(len(live_game_info)):
    #only want the data for shots (shot on goal, missed shots, blocked shots, goals)
    if (live_game_info[i]['result']['eventTypeId']) in ['SHOT', 'MISSED_SHOT', 'BLOCKED_SHOT', 'GOAL']:
        team_id.append(live_game_info[i]['team']['id'])
        period.append(live_game_info[i]['about']['period'])
        event_index.append(live_game_info[i]['about']['eventIdx'])
        event_id.append(live_game_info[i]['about']['eventId'])
        event_type.append(live_game_info[i]['result']['eventTypeId'])
        x_coor.append(live_game_info[i]['coordinates']['x'])
        y_coor.append(live_game_info[i]['coordinates']['y'])
    else:
        pass 

In [7]:
#create dataframe
game_data = pd.DataFrame(list(zip(team_id, period, event_index, event_id, event_type, x_coor, y_coor)),
               columns =['team_id', 'period', 'event_index', 'event_id', 'event_type', 'x_coor', 'y_coor'])

In [8]:
#change team for opponent-blocked shots as the team is recorded as the team that blocked the shot not the one who shot the puck 
#set columns as variables 
team  = game_data['team_id']
event = game_data['event_type']
    
#list of conditions 
conditions = [
    (team == 8) & (event == 'BLOCKED_SHOT')
    ,(team == 10) & (event == 'BLOCKED_SHOT')
    ]
    
#list of values to return 
value = [
        10
        ,8
    ]  
        
#creates a new column in the dataframe based on the conditions 
game_data['team_id'] = np.select(conditions, value, game_data['team_id'])

In [10]:
#alter the team for the 3 teammate blocks - as they now be the opposite team aas we just switched all blocks 
#teammate blocks were determined by usinh the NHL's play-by-play info
game_data.at[26,'team_id'] = 8
game_data.at[32,'team_id'] = 8
game_data.at[39,'team_id'] = 10

#play-by-play had this shot as MTL shooting blocked by TOR - it was actually TOR shooting blocked by MTL 
game_data.at[74,'team_id'] = 10

In [12]:
#add home team info to dataframe 
#Toronto (10) is the home team, Montreal (8) is the away time
game_data['home_team'] = np.where(game_data['team_id']== 10, 1, 0)

In [13]:
game_data.head(2)

Unnamed: 0,team_id,period,event_index,event_id,event_type,x_coor,y_coor,home_team
0,8,1,4,54,SHOT,56.0,9.0,0
1,8,1,6,55,SHOT,95.0,21.0,0


In [14]:
#create empty lists to pull from json and create dataframe 
period = []
h_rinkside = []
a_rinkside = []

In [15]:
#pull data from JSON into a list - for rinksides 
periods = game_info['liveData']['linescore']['periods']

for i in range(len(periods)):
    period.append(periods[i]['num'])
    h_rinkside.append(periods[i]['home']['rinkSide'])
    a_rinkside.append(periods[i]['away']['rinkSide'])

In [16]:
#create dataframe - period location information 
#create dataframe
location_info = pd.DataFrame(list(zip(period, h_rinkside, a_rinkside)),
               columns =['period', 'h_rinkside', 'a_rinkside'])

In [17]:
location_info.head()

Unnamed: 0,period,h_rinkside,a_rinkside
0,1,right,left
1,2,left,right
2,3,right,left
3,4,left,right


In [18]:
def team_rinkside(df):
    '''defines which side of the rink the home/away team is on each period'''
    
    #set columns as variables 
    home_team  = df['home_team']
    period = df['period']
    
    #list of conditions 
    conditions = [
         (home_team == 1) & (period == 1)
        ,(home_team == 1) & (period == 2)
        ,(home_team == 1) & (period == 3)
        ,(home_team == 1) & (period == 4)
        ,(home_team == 0) & (period == 1)
        ,(home_team == 0) & (period == 2)
        ,(home_team == 0) & (period == 3)
        ,(home_team == 0) & (period == 4)
    ]
    
     #list of values to return 
    rinkside = [
         'right'
        ,'left'
        ,'right'
        ,'left'
        ,'left'
        ,'right'
        ,'left'
        ,'right'
    ]  
        
    #creates a new column in the dataframe based on the conditions 
    df['rinkside'] = np.select(conditions, rinkside, 'ERROR')

In [19]:
team_rinkside(game_data)

In [20]:
game_data.head(2)

Unnamed: 0,team_id,period,event_index,event_id,event_type,x_coor,y_coor,home_team,rinkside
0,8,1,4,54,SHOT,56.0,9.0,0,left
1,8,1,6,55,SHOT,95.0,21.0,0,left


In [22]:
#save the dataframe to csv
#game_data.to_csv('game_data.csv')