In [1]:
import pandas as pd
import requests
import json
import numpy as np
import time

In [2]:
# Creating list for years the NHL had a season since 2000 
years = [2000 + x for x in range(0, 23)]
years.remove(2004) # The NHL was in lockout and this season was not played

In [3]:
# Pulling data about the regular season from 2000-2023.
df = []
for year in years:
    for game in range(1, 1313):
        year = year
        game = f"{game:04d}"

        url = f'https://statsapi.web.nhl.com/api/v1/game/{year}02{game}/feed/live'

        # Sometimes the connection will timeout. This is to avoid breaking the loop.
        for i in range(1, 11):
            try:
                response = requests.get(url)
                break
            except:
                print('Wating for game ' + str(game) + ' of season ' + str(year) + '.')
                time.sleep(10)

        # There are different number of games played over the seasons. The if statment allows me to break the loop when the max
        # number of games have been played for that season.
        if response.status_code == 200:

            linescore = response.json()['liveData']['linescore']
            home_team = linescore['teams']['home']
            away_team = linescore['teams']['away']

            # Appending information about the game and stats about the home and away teams to an empty list.    
            df.append({
                'gameID' : f'{year}02{game}',
                'season' : year,
                'date' : response.json()['gameData']['datetime']['dateTime'][0:10],
                'current_period' : linescore['currentPeriod'],

                'home' : home_team['team']['name'],
                'away' : away_team['team']['name'],

                'home_goals' : home_team['goals'],
                'away_goals' : away_team['goals'],

                'home_shots' : home_team['shotsOnGoal'],
                'away_shots' : away_team['shotsOnGoal']
                })
        else:
            break

# Removing games that were not played and converting the list of dictionaries to a DataFrame.
regular_season = pd.DataFrame(df).loc[pd.DataFrame(df)['current_period'] != 0] 

Wating for game 0659 of season 2005.
Wating for game 0843 of season 2018.
Wating for game 0843 of season 2018.


In [4]:
# Pulling data about the playoffs from 2000-2023
df = []
for year in years:
    for game in range(1, 425):
        year = year
        game = f"{game:04d}"

        url = f'https://statsapi.web.nhl.com/api/v1/game/{year}03{game}/feed/live'

        # Sometimes the connection will timeout. This is to avoid breaking the loop.
        for i in range(1, 11):
            try:
                response = requests.get(url)
                break
            except:
                print('Wating for game ' + str(game) + ' of season ' + str(year) + '.')
                time.sleep(10)

        # The playoff game id's are a bit weird so the if statment is to keep the loop from breaking if a game id does not exist
        if response.status_code == 200:

            linescore = response.json()['liveData']['linescore']
            home_team = linescore['teams']['home']
            away_team = linescore['teams']['away']

            # Appending information about the game and stats about the home and away teams to an empty list.    
            df.append({
                'gameID' : f'{year}03{game}',
                'season' : year,
                'date' : response.json()['gameData']['datetime']['dateTime'][0:10],
                'round' : game[1],
                'game' : game[-1],
                'current_period' : linescore['currentPeriod'],

                'home' : home_team['team']['name'],
                'away' : away_team['team']['name'],

                'home_goals' : home_team['goals'],
                'away_goals' : home_team['goals'],

                'home_shots' : home_team['shotsOnGoal'],
                'away_shots' : away_team['shotsOnGoal']
            })

# Removing games that were not played and converting the list of dictionaries to a DataFrame.
playoffs = pd.DataFrame(df).loc[pd.DataFrame(df)['current_period'] != 0]

In [5]:
# regular_season.to_csv('../data/regular_season.csv', index = False)
# playoffs.to_csv('../data/playoffs.csv', index = False)