In [1]:
import pandas as pd
import requests
import numpy as np

The NHL API has no official documentation, but it is somewhat documented at [this repo](https://github.com/Zmalski/NHL-API-Reference)

In [2]:
base_url = 'https://api.nhle.com/stats/rest/en'

## Getting playoff team stats per season

We restrict our timeline to be after the year 2000. We will only include the teams that made the playoffs each season.

In [3]:
def GetTeamStats(start_season=2000, playoffs=False):

    seasons = [f'{start_season + i}{start_season + i + 1}' for i in range(2025-start_season)]
    
    cayenneExp = '('
    for season in seasons[:-1]:
        cayenneExp += f'seasonId={season} or '
    cayenneExp += f'seasonId={seasons[-1]})'

    # 2 ~ regular season, 3 ~ playoffs
    cayenneExp += f' and gameTypeId={3 if playoffs else 2}'

    
    url = base_url + f'/team/summary?limit=-1&cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    return pd.json_normalize(data)


df = GetTeamStats(playoffs=True)

df = df.sort_values('seasonId')
df.tail()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,regulationAndOtWins,seasonId,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout
142,0.493333,5,22,4.4,13,2.6,4,,0.55,0.55,...,1,20232024,31.0,29.0,Los Angeles Kings,26,,1,0,0
67,0.465561,13,31,2.38461,31,2.38461,7,,0.857142,0.857143,...,6,20232024,32.15384,24.84615,Boston Bruins,6,,6,5,0
292,0.498412,5,19,3.8,12,2.4,4,,0.666666,0.666667,...,1,20232024,33.8,27.2,New York Islanders,2,,1,0,0
56,0.561881,7,18,2.57142,12,1.71428,4,,0.647058,0.647059,...,3,20232024,27.57142,30.71428,Toronto Maple Leafs,10,,3,2,0
189,0.444954,4,15,3.75,7,1.75,4,,0.625,0.625,...,0,20232024,26.0,25.25,Washington Capitals,15,,0,0,0


We also want to add each playoff team's regular season statistics. We will do this with a merge with the team name and season id as keys.

In [4]:
rs_df = GetTeamStats(playoffs=False)
rs_df.tail()

df = pd.merge(df, rs_df, on=['teamFullName', 'seasonId'], how='inner', suffixes=['', 'RS'])
df = df.drop(['teamIdRS'], axis=1)

There are some factors we want to include that we can derive from this data.

One example is a way to quantify how well a team performed in the playoffs - we can assign a `playoffScore` to each team where
- a team that does not make the playoffs or does not win any games in the playoffs has a score of 0
- a team that does make the playoffs has a score of their wins divided by the total possible wins
- a team that makes the playoffs and wins the cup has a score of 1

In [5]:
df['playoffScore'] = df['wins'] / 16
df.tail()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,powerPlayNetPctRS,powerPlayPctRS,regulationAndOtWinsRS,shotsAgainstPerGameRS,shotsForPerGameRS,tiesRS,winsRS,winsInRegulationRS,winsInShootoutRS,playoffScore
371,0.493333,5,22,4.4,13,2.6,4,,0.55,0.55,...,0.19758,0.225806,42,28.0,31.90243,,44,37,2,0.0625
372,0.465561,13,31,2.38461,31,2.38461,7,,0.857142,0.857143,...,0.19753,0.222222,43,30.53658,29.25609,,47,36,4,0.375
373,0.498412,5,19,3.8,12,2.4,4,,0.666666,0.666667,...,0.190476,0.203463,38,32.75609,30.12195,,39,29,1,0.0625
374,0.561881,7,18,2.57142,12,1.71428,4,,0.647058,0.647059,...,0.20168,0.239495,41,29.82926,32.57317,,46,33,5,0.1875
375,0.444954,4,15,3.75,7,1.75,4,,0.625,0.625,...,0.18421,0.20614,36,30.52439,26.4878,,40,32,4,0.0


We also want to be able to quantify how well a team performed in the playoffs in their recent seasons. We will create a column `last3YearsPlayoffScore` that is a team's total playoff score over the last 3 years.

In [6]:
df['last3PlayoffScore'] = 0.0

for index, team in df.iterrows():

    season = int(str(team['seasonId'])[:4])
    last3Seasons = [int(f'{season - 3 + s}{season - 3 + s + 1}') for s in range(3)]

    data = df[df['seasonId'].isin(last3Seasons)]
    data = data[data['teamId'] == team['teamId']]
    last3PlayoffScore = data['playoffScore'].sum()

    df.loc[index, 'last3PlayoffScore'] = last3PlayoffScore
    
df.head(100)

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,powerPlayPctRS,regulationAndOtWinsRS,shotsAgainstPerGameRS,shotsForPerGameRS,tiesRS,winsRS,winsInRegulationRS,winsInShootoutRS,playoffScore,last3PlayoffScore
0,0.468677,6,16,2.66666,11,1.83333,4,,0.851851,0.814815,...,0.140394,40,26.45121,27.64634,12.0,40,33,0,0.1250,0.000
1,0.569819,6,14,2.33333,10,1.66666,4,,0.761904,0.761905,...,0.212464,41,27.71951,28.63414,10.0,41,39,0,0.1250,0.000
2,0.475138,25,52,2.08000,69,2.76000,10,,0.865384,0.836539,...,0.229032,48,24.68292,31.58536,12.0,48,43,0,0.9375,0.000
3,0.464799,18,44,2.44444,38,2.11111,9,,0.794520,0.780822,...,0.202666,42,29.23170,29.19512,9.0,42,39,0,0.5625,0.000
4,0.498855,6,15,2.50000,17,2.83333,4,,0.850000,0.850000,...,0.221354,49,27.08536,32.31707,9.0,49,39,0,0.1250,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.496960,11,29,2.63636,30,2.72727,6,,0.826086,0.826087,...,0.177111,39,28.43902,28.70731,,49,36,10,0.3125,0.375
96,0.475675,6,17,2.83333,12,2.00000,4,,0.884615,0.884616,...,0.148044,38,29.75609,29.57317,,41,33,3,0.1250,0.125
97,0.523026,5,19,3.80000,12,2.40000,4,,0.764705,0.764706,...,0.156250,38,27.52439,28.80487,,46,31,8,0.0625,0.625
98,0.509971,17,54,3.17647,52,3.05882,8,,0.786666,0.760000,...,0.218181,39,31.81707,28.76829,,42,36,3,0.5625,0.125


We will do the same for the past 5 seasons as well.

In [7]:
df['last5PlayoffScore'] = 0.0

for index, team in df.iterrows():

    season = int(str(team['seasonId'])[:4])
    last5Seasons = [int(f'{season - 5 + s}{season - 5 + s + 1}') for s in range(5)]

    data = df[df['seasonId'].isin(last5Seasons)]
    data = data[data['teamId'] == team['teamId']]
    last5PlayoffScore = data['playoffScore'].sum()

    df.loc[index, 'last5PlayoffScore'] = last5PlayoffScore
    
df.head(100)

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,regulationAndOtWinsRS,shotsAgainstPerGameRS,shotsForPerGameRS,tiesRS,winsRS,winsInRegulationRS,winsInShootoutRS,playoffScore,last3PlayoffScore,last5PlayoffScore
0,0.468677,6,16,2.66666,11,1.83333,4,,0.851851,0.814815,...,40,26.45121,27.64634,12.0,40,33,0,0.1250,0.000,0.0000
1,0.569819,6,14,2.33333,10,1.66666,4,,0.761904,0.761905,...,41,27.71951,28.63414,10.0,41,39,0,0.1250,0.000,0.0000
2,0.475138,25,52,2.08000,69,2.76000,10,,0.865384,0.836539,...,48,24.68292,31.58536,12.0,48,43,0,0.9375,0.000,0.0000
3,0.464799,18,44,2.44444,38,2.11111,9,,0.794520,0.780822,...,42,29.23170,29.19512,9.0,42,39,0,0.5625,0.000,0.0000
4,0.498855,6,15,2.50000,17,2.83333,4,,0.850000,0.850000,...,49,27.08536,32.31707,9.0,49,39,0,0.1250,0.000,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.496960,11,29,2.63636,30,2.72727,6,,0.826086,0.826087,...,39,28.43902,28.70731,,49,36,10,0.3125,0.375,1.5000
96,0.475675,6,17,2.83333,12,2.00000,4,,0.884615,0.884616,...,38,29.75609,29.57317,,41,33,3,0.1250,0.125,0.2500
97,0.523026,5,19,3.80000,12,2.40000,4,,0.764705,0.764706,...,38,27.52439,28.80487,,46,31,8,0.0625,0.625,1.6875
98,0.509971,17,54,3.17647,52,3.05882,8,,0.786666,0.760000,...,39,31.81707,28.76829,,42,36,3,0.5625,0.125,1.1875


We now want to quantify how a team performed in the regular season against 'good' teams - teams above a .500 win percentage.

In [8]:
def GetTeamGamesBySeason(teamId: int, season: int):

    cayenneExp = f'(homeTeamId={teamId} or visitingTeamId={teamId}) and season={season} and gameType=2'
    
    url = base_url + f'/game?cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    return pd.json_normalize(data)

# e.g. Florida
GetTeamGamesBySeason(13, 20232024).head()

Unnamed: 0,id,easternStartTime,gameDate,gameNumber,gameScheduleStateId,gameStateId,gameType,homeScore,homeTeamId,period,season,visitingScore,visitingTeamId
0,2023020014,2023-10-12T20:00:00,2023-10-12,14,1,7,2,2,30,3,20232024,0,13
1,2023020020,2023-10-14T16:00:00,2023-10-14,20,1,7,2,6,52,3,20232024,4,13
2,2023020036,2023-10-16T19:00:00,2023-10-16,36,1,7,2,3,1,3,20232024,4,13
3,2023020052,2023-10-19T19:00:00,2023-10-19,52,1,7,2,3,13,3,20232024,1,10
4,2023020068,2023-10-21T19:00:00,2023-10-21,68,1,7,2,3,13,3,20232024,5,23


In [9]:
def GetTeamsAbove500BySeason(season: int):
    
    cayenneExp = f'seasonId={season}'
    
    url = base_url + f'/team/summary?limit=-1&cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    df = pd.json_normalize(data)

    df = df[df['wins'] / df['gamesPlayed'] >= 0.5]
    return df

In [10]:
df['winRateAgainst500Teams'] = 0.0

for index, team in df.iterrows():
    teamGames = GetTeamGamesBySeason(teamId=team['teamId'], season=team['seasonId'])
    above500Teams = GetTeamsAbove500BySeason(team['seasonId'])

    homeCondition = (teamGames['homeTeamId'] == team['teamId']) & (teamGames['visitingTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] > teamGames['visitingScore']))
    awayCondition = (teamGames['visitingTeamId'] == team['teamId']) & (teamGames['homeTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] < teamGames['visitingScore']))
    
    winsAgainst500Teams = len(teamGames[homeCondition | awayCondition])
    gamesPlayed = len(teamGames)
    winRate = winsAgainst500Teams / gamesPlayed

    df.loc[index, 'winRateAgainst500Teams'] = winRate
        
df.head()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,shotsAgainstPerGameRS,shotsForPerGameRS,tiesRS,winsRS,winsInRegulationRS,winsInShootoutRS,playoffScore,last3PlayoffScore,last5PlayoffScore,winRateAgainst500Teams
0,0.468677,6,16,2.66666,11,1.83333,4,,0.851851,0.814815,...,26.45121,27.64634,12.0,40,33,0,0.125,0.0,0.0,0.085366
1,0.569819,6,14,2.33333,10,1.66666,4,,0.761904,0.761905,...,27.71951,28.63414,10.0,41,39,0,0.125,0.0,0.0,0.134146
2,0.475138,25,52,2.08,69,2.76,10,,0.865384,0.836539,...,24.68292,31.58536,12.0,48,43,0,0.9375,0.0,0.0,0.109756
3,0.464799,18,44,2.44444,38,2.11111,9,,0.79452,0.780822,...,29.2317,29.19512,9.0,42,39,0,0.5625,0.0,0.0,0.158537
4,0.498855,6,15,2.5,17,2.83333,4,,0.85,0.85,...,27.08536,32.31707,9.0,49,39,0,0.125,0.0,0.0,0.146341


In [11]:
# save to csv
df.to_csv('NHLTeamPlayoffStats.csv')