In [6]:
import pandas as pd
import json
import os
import numpy as np
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests

In [7]:
def cal_game_seconds(period_time, period):
    return int(period_time.split(':')[0]) * 60 + int(period_time.split(':')[1]) + (period-1)*1200

def find_oppenent_net(zoneCode, xCoord):
    if zoneCode == "O":
        return 89 if xCoord > 0 else -89
    elif zoneCode == "D":
        return 89 if xCoord < 0 else -89
    return None

def is_empty_net(situation_code, is_home_team_shot):
    return 1 - int(situation_code[0]) if is_home_team_shot else 1 -  int(situation_code[3])

In [8]:
df = pd.DataFrame()

def process_raw(game_id):
    url = f"https://api-web.nhle.com/v1/gamecenter/{game_id}/play-by-play"
    response = requests.get(url)
    data = json.loads(response.text)

    result = []
    home_team = data['homeTeam']['abbrev']
    team_mapping = {
        data['homeTeam']['id']: home_team,
        data['awayTeam']['id']: data['awayTeam']['abbrev']
    }    

    for event in data['plays']:
        try:
            if event['typeDescKey'] in ['shot-on-goal', 'goal']:
                shot = dict()

                shot['game_id'] = data['id']
                shot['eventId'] = event['eventId']
                shot['event'] = event['typeDescKey']
                shot['period'] = event['period']
                shot['period_time']= event['timeInPeriod']
                shot['game_seconds'] = cal_game_seconds(shot['period_time'], shot['period'])
                shot['period_remaining'] = event['timeRemaining']

                shot['team'] = team_mapping[event['details'].get('eventOwnerTeamId')]
                shot['x_coordinate'] = event['details'].get('xCoord')
                shot['y_coordinate'] = event['details'].get('yCoord')

                # shot['shooter_id'] = event['details'].get('shootingPlayerId')
                # shot['goalie_id'] = event['details'].get('goalieInNetId')
                shot['shot_type'] = event['details'].get('shotType')     
                shot['net_x'] = find_oppenent_net(event['details']['zoneCode'], shot['x_coordinate'])
                shot['is_empty_net'] = is_empty_net(event['situationCode'], shot['team'] == home_team)

                result.append(shot)
        except Exception as e:
            print(e)
            print("game_id", data['id'])
            print(event)

    return result

In [9]:
def engineer_feature(input_df):
    result = input_df.copy()
    mode_per_group = input_df.groupby(['team', 'period'])['net_x'].transform(lambda x: x.mode()[0] if not x.mode().empty else None)
    result['net_x'] = result['net_x'].fillna(mode_per_group)

    result['net_distance'] = np.sqrt(
        (result['x_coordinate'] - result['net_x']) ** 2 + result['y_coordinate'] ** 2)  

    result['shot_angle'] = np.degrees(np.arctan2(
        result['y_coordinate'], np.abs(result['x_coordinate'] - result['net_x'])))

    result['is_goal'] = result['event'].apply(lambda e: 1 if e == 'goal' else 0)

    return result

In [10]:
game_id = '2022030411'

def extract_data(game_id):
    t = pd.DataFrame(process_raw(game_id))
    return engineer_feature(t)

extract_data(game_id)

Unnamed: 0,game_id,eventId,event,period,period_time,game_seconds,period_remaining,team,x_coordinate,y_coordinate,shot_type,net_x,is_empty_net,net_distance,shot_angle,is_goal
0,2022030411,55,shot-on-goal,1,01:29,89,18:31,FLA,73,27,wrist,89.0,0,31.384710,59.349332,0
1,2022030411,62,shot-on-goal,1,03:32,212,16:28,FLA,36,23,slap,89.0,0,57.775427,23.459024,0
2,2022030411,64,shot-on-goal,1,03:49,229,16:11,VGK,-73,-2,wrist,-89.0,0,16.124515,-7.125016,0
3,2022030411,66,shot-on-goal,1,03:52,232,16:08,VGK,-53,-18,wrist,-89.0,0,40.249224,-26.565051,0
4,2022030411,67,shot-on-goal,1,03:57,237,16:03,VGK,-45,-39,wrist,-89.0,0,58.796258,-41.552613,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,2022030411,850,shot-on-goal,3,17:53,3473,02:07,FLA,86,-7,wrist,89.0,0,7.615773,-66.801409,0
65,2022030411,1001,goal,3,18:15,3495,01:45,VGK,-1,-17,wrist,-89.0,1,89.627005,-10.933817,1
66,2022030411,1003,shot-on-goal,3,18:29,3509,01:31,VGK,-30,-35,wrist,-89.0,0,68.600292,-30.677280,0
67,2022030411,1005,shot-on-goal,3,19:34,3574,00:26,FLA,71,-11,tip-in,89.0,0,21.095023,-31.429566,0
