In [5]:
import requests
import pandas as pd
import numpy as np
import math
from datetime import datetime, timedelta
from time import strftime
import os

In [6]:
API_URL = 'https://api-web.nhle.com/v1'
all_plays = []

In [7]:
def get_matchup_games(start_date, end_date):
    r = requests.get(url=API_URL + '/schedule/' + str(start_date))
    data = r.json()

    end_date_dt = datetime.strptime(end_date, '%Y-%m-%d')
    matchup_games = {'next_start_date': '', 'game_ids': {'id': [], 'date': []}}

    matchup_games['next_start_date'] = data['nextStartDate']

    for day in data['gameWeek']:
        for game in day['games']:
            # game_date_timestamp = game['startTimeUTC']  # Read the game's start time
            # game_date = datetime.strptime(game_date_timestamp, '%Y-%m-%dT%H:%M:%SZ').strftime('%Y-%m-%d')
            game_date = day['date']
            # Strip the time and retain only the date this causes problems for the sweden games

            if datetime.strptime(game_date, '%Y-%m-%d').date() <= end_date_dt.date():
                matchup_games['game_ids']['id'].append(game['id'])
                matchup_games['game_ids']['date'].append(game_date)

    return matchup_games

In [8]:
def retrieve_schedule(start_date_str, end_date_str):
    all_game_ids = {'game_ids': [], 'game_dates': []}
    temp_week = get_matchup_games(start_date_str, end_date_str)

    all_game_ids['game_ids'].extend(temp_week['game_ids']['id'])
    all_game_ids['game_dates'].extend(temp_week['game_ids']['date'])

    end_date_dt = datetime.strptime(end_date_str, '%Y-%m-%d')

    while True:
        temp_next_start = datetime.strptime(temp_week['next_start_date'], '%Y-%m-%d')

        if temp_next_start <= end_date_dt:
            temp_start = temp_week['next_start_date']
            temp_week = get_matchup_games(temp_start, end_date_str)

            game_ids = temp_week['game_ids']['id']
            game_dates = temp_week['game_ids']['date']

            for game_id, game_date in zip(game_ids, game_dates):
                game_date_dt = datetime.strptime(game_date, '%Y-%m-%d').date()
                if game_date_dt <= end_date_dt.date():
                    all_game_ids['game_ids'].append(game_id)
                    all_game_ids['game_dates'].append(game_date)
                else:
                    # Instead of breaking here, move to the next week
                    break
        else:
            break

    return all_game_ids

In [9]:
today_datetime= datetime.now()
yesterday_datetime = today_datetime - timedelta(days=1, hours=6) # UTC offset
yesterday = yesterday_datetime.strftime('%Y-%m-%d')
yesterday

'2024-10-10'

In [10]:
# Creating a dictionary to store the NHL team information
def get_team_info():
    nhl_teams = {}
    # https://api.nhle.com/stats/rest/en/team
    response = requests.get("https://api.nhle.com/stats/rest/en/team", params={"Content-Type": "application/json"})
    data = response.json()

    for team in data["data"]:
        team_id = team['id']
        team_info = {
            "fullName": team["fullName"],
            "triCode": team["triCode"]
        }
        nhl_teams[team_id] = team_info
    return nhl_teams

In [11]:
team_info = get_team_info()

In [12]:
team_info

{11: {'fullName': 'Atlanta Thrashers', 'triCode': 'ATL'},
 34: {'fullName': 'Hartford Whalers', 'triCode': 'HFD'},
 32: {'fullName': 'Quebec Nordiques', 'triCode': 'QUE'},
 33: {'fullName': 'Winnipeg Jets (1979)', 'triCode': 'WIN'},
 35: {'fullName': 'Colorado Rockies', 'triCode': 'CLR'},
 36: {'fullName': 'Ottawa Senators (1917)', 'triCode': 'SEN'},
 37: {'fullName': 'Hamilton Tigers', 'triCode': 'HAM'},
 38: {'fullName': 'Pittsburgh Pirates', 'triCode': 'PIR'},
 39: {'fullName': 'Philadelphia Quakers', 'triCode': 'QUA'},
 40: {'fullName': 'Detroit Cougars', 'triCode': 'DCG'},
 41: {'fullName': 'Montreal Wanderers', 'triCode': 'MWN'},
 42: {'fullName': 'Quebec Bulldogs', 'triCode': 'QBD'},
 43: {'fullName': 'Montreal Maroons', 'triCode': 'MMR'},
 44: {'fullName': 'New York Americans', 'triCode': 'NYA'},
 45: {'fullName': 'St. Louis Eagles', 'triCode': 'SLE'},
 46: {'fullName': 'Oakland Seals', 'triCode': 'OAK'},
 47: {'fullName': 'Atlanta Flames', 'triCode': 'AFM'},
 48: {'fullName': 

In [13]:
# doesn't include international games
start_2024 = '2024-10-08'
so_far = (retrieve_schedule(start_2024, yesterday))

In [14]:
so_far

{'game_ids': [2024020003,
  2024020004,
  2024020005,
  2024020006,
  2024020007,
  2024020008,
  2024020009,
  2024020010,
  2024020011,
  2024020012,
  2024020013,
  2024020015,
  2024020016,
  2024020014,
  2024020017,
  2024020018,
  2024020019],
 'game_dates': ['2024-10-08',
  '2024-10-08',
  '2024-10-08',
  '2024-10-09',
  '2024-10-09',
  '2024-10-09',
  '2024-10-09',
  '2024-10-09',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10']}

In [15]:
def load_latest_psg_df(directory_path):
    # List all files in the specified directory
    files = os.listdir(directory_path)

    # Filter for files that match the 'psg' pattern with a date suffix
    psg_files = [f for f in files if f.startswith('psg_') and f.endswith('.csv')]

    # Extract the date from the filenames and convert to datetime objects
    dates = []
    for file in psg_files:
        # Extracting date part from the filename (assuming format 'psg_mmddyy.csv')
        date_str = file.split('_')[1].split('.')[0]
        # Parsing the date string to a datetime object
        date = datetime.strptime(date_str, '%m%d%y')
        dates.append((file, date))

    # Find the file with the latest date
    latest_file = max(dates, key=lambda x: x[1])[0]

    # Read the CSV file into a DataFrame
    psg_df = pd.read_csv(os.path.join(directory_path, latest_file))
    return psg_df

In [31]:
# https://api-web.nhle.com/v1/gamecenter/2022030415/play-by-play
def get_livedata_from_game(game_list):
    psg = {
        'id': [],
        'team': [],
        'shot': [],
        'shot_x': [],
        'shot_y': [],
        'goal': [],
        'event_type': [],
        'event_description': [],
        'event_time': [],
        'gid': []
    }
    all_plays = []
    for game in game_list['game_ids']:
        response = requests.get(
            f"{API_URL}/gamecenter/{game}/play-by-play",
            headers={"Content-Type": "application/json"}
        )
        data = response.json()
        temp_game_plays = data.get('plays', [])
        all_plays.extend(temp_game_plays)

        for play in temp_game_plays:
            details = play.get('details', {})
            psg['gid'].append(str(game))
            psg['event_type'].append(play.get('typeDescKey', 'unknown'))
            psg['event_description'].append(play.get('description', 'No description'))
            psg['event_time'].append(play.get('clock', {}).get('timeRemaining', '00:00'))

            if play.get('typeDescKey') in ['shot-on-goal', 'goal']:
                psg['id'].append(details.get('shootingPlayerId') or details.get('scoringPlayerId'))
                psg['team'].append(details.get('eventOwnerTeamId'))
                psg['shot'].append(1)
                psg['goal'].append(1 if play.get('typeDescKey') == 'goal' else 0)
                psg['shot_x'].append(details.get('xCoord'))
                psg['shot_y'].append(details.get('yCoord'))
            else:
                psg['id'].append(None)
                psg['team'].append(None)
                psg['shot'].append(0)
                psg['goal'].append(0)
                psg['shot_x'].append(None)
                psg['shot_y'].append(None)
    return psg

In [32]:
so_far


{'game_ids': [2024020003,
  2024020004,
  2024020005,
  2024020006,
  2024020007,
  2024020008,
  2024020009,
  2024020010,
  2024020011,
  2024020012,
  2024020013,
  2024020015,
  2024020016,
  2024020014,
  2024020017,
  2024020018,
  2024020019],
 'game_dates': ['2024-10-08',
  '2024-10-08',
  '2024-10-08',
  '2024-10-09',
  '2024-10-09',
  '2024-10-09',
  '2024-10-09',
  '2024-10-09',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10',
  '2024-10-10']}

In [35]:
psg_df = get_livedata_from_game(
    {'game_ids':[2024020019],
     'game_dates':['2024-10-10']}
)
psg_df


{'id': [None,
  None,
  8476892,
  None,
  None,
  8475753,
  None,
  None,
  None,
  None,
  None,
  None,
  8475181,
  8482077,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  8475798,
  8475170,
  8476892,
  None,
  None,
  None,
  8480023,
  None,
  None,
  8481598,
  None,
  None,
  None,
  8477953,
  None,
  None,
  8484801,
  None,
  None,
  8475726,
  None,
  8477402,
  None,
  None,
  None,
  8480848,
  None,
  None,
  None,
  8477953,
  8475170,
  8478472,
  8475170,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  8482667,
  None,
  None,
  None,
  8478472,
  None,
  8482077,
  None,
  None,
  None,
  None,
  None,
  8480023,
  8482089,
  8482089,
  8480023,
  None,
  None,
  None,
  8477505,
  None,
  None,
  8480188,
  8475181,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  None,
  8475726,
  None,
  None,
  None,
  None,
  None,
  None,
  8470600,
  None,
  None,
  None,
  None,
  None,


In [34]:
pd.DataFrame(psg_df)

Unnamed: 0,id,team,shot,shot_x,shot_y,goal,event_type,event_description,event_time,gid
0,,,0,,,0,period-start,No description,00:00,2024020019
1,,,0,,,0,faceoff,No description,00:00,2024020019
2,8476892.0,19.0,1,-36.0,30.0,0,shot-on-goal,No description,00:00,2024020019
3,,,0,,,0,missed-shot,No description,00:00,2024020019
4,,,0,,,0,giveaway,No description,00:00,2024020019
...,...,...,...,...,...,...,...,...,...,...
320,,,0,,,0,period-start,No description,00:00,2024020019
321,,,0,,,0,faceoff,No description,00:00,2024020019
322,8475170.0,19.0,1,64.0,20.0,1,goal,No description,00:00,2024020019
323,,,0,,,0,period-end,No description,00:00,2024020019
