In [1]:
import pandas as pd
import requests
import numpy as np

The NHL API has no official documentation, but it is somewhat documented at [this repo](https://github.com/Zmalski/NHL-API-Reference)

In [7]:
base_url = 'https://api.nhle.com/stats/rest/en'

## Getting playoff team stats per season

We restrict our timeline to be after the year 2000. We will only include the teams that made the playoffs each season.

In [8]:
def GetTeamStats(start_season=2000, playoffs=False):

    seasons = [f'{start_season + i}{start_season + i + 1}' for i in range(2025-start_season)]
    
    cayenneExp = '('
    for season in seasons[:-1]:
        cayenneExp += f'seasonId={season} or '
    cayenneExp += f'seasonId={seasons[-1]})'

    if playoffs: cayenneExp += ' and gameTypeId=3'

    
    url = base_url + f'/team/summary?limit=-1&cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    return pd.json_normalize(data)


df = GetTeamStats(playoffs=True)

df = df.sort_values('seasonId')
df.tail()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,regulationAndOtWins,seasonId,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout
134,0.493333,5,22,4.4,13,2.6,4,,0.55,0.55,...,1,20232024,31.0,29.0,Los Angeles Kings,26,,1,0,0
71,0.465561,13,31,2.38461,31,2.38461,7,,0.857142,0.857143,...,6,20232024,32.15384,24.84615,Boston Bruins,6,,6,5,0
81,0.555555,5,28,5.6,15,3.0,4,,0.625,0.625,...,1,20232024,37.0,28.0,Winnipeg Jets,52,,1,1,0
292,0.498412,5,19,3.8,12,2.4,4,,0.666666,0.666667,...,1,20232024,33.8,27.2,New York Islanders,2,,1,0,0
262,0.459574,11,37,3.36363,43,3.90909,5,,0.766666,0.733334,...,6,20232024,28.63636,32.0909,Colorado Avalanche,21,,6,5,0


There are some factors we want to include that we can derive from this data.

One example is a way to quantify how well a team performed in the playoffs - we can assign a `playoffScore` to each team where
- a team that does not make the playoffs or does not win any games in the playoffs has a score of 0
- a team that does make the playoffs has a score of their wins divided by the total possible wins
- a team that makes the playoffs and wins the cup has a score of 1

In [9]:
df['playoffScore'] = df['wins'] / 16
df.tail()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,seasonId,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout,playoffScore
134,0.493333,5,22,4.4,13,2.6,4,,0.55,0.55,...,20232024,31.0,29.0,Los Angeles Kings,26,,1,0,0,0.0625
71,0.465561,13,31,2.38461,31,2.38461,7,,0.857142,0.857143,...,20232024,32.15384,24.84615,Boston Bruins,6,,6,5,0,0.375
81,0.555555,5,28,5.6,15,3.0,4,,0.625,0.625,...,20232024,37.0,28.0,Winnipeg Jets,52,,1,1,0,0.0625
292,0.498412,5,19,3.8,12,2.4,4,,0.666666,0.666667,...,20232024,33.8,27.2,New York Islanders,2,,1,0,0,0.0625
262,0.459574,11,37,3.36363,43,3.90909,5,,0.766666,0.733334,...,20232024,28.63636,32.0909,Colorado Avalanche,21,,6,5,0,0.375


We also want to be able to quantify how well a team performed in the playoffs in their recent seasons. We will create a column `last3YearsPlayoffScore` that is a team's total playoff score over the last 3 years.

In [10]:
df['last3PlayoffScore'] = 0.0

for index, team in df.iterrows():

    season = int(str(team['seasonId'])[:4])
    last3Seasons = [int(f'{season - 3 + s}{season - 3 + s + 1}') for s in range(3)]

    data = df[df['seasonId'].isin(last3Seasons)]
    data = data[data['teamId'] == team['teamId']]
    last3PlayoffScore = data['playoffScore'].sum()

    df.loc[index, 'last3PlayoffScore'] = last3PlayoffScore
    
df.head(100)

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout,playoffScore,last3PlayoffScore
91,0.491155,13,34,2.61538,25,1.92307,6,,0.777777,0.777778,...,27.84615,25.61538,Los Angeles Kings,26,,7,3,0,0.4375,0.0000
96,0.464799,18,44,2.44444,38,2.11111,9,,0.794520,0.780822,...,26.83333,23.55555,Pittsburgh Penguins,5,,9,6,0,0.5625,0.0000
15,0.475138,25,52,2.08000,69,2.76000,10,,0.865384,0.836539,...,20.28000,28.96000,New Jersey Devils,1,,15,13,0,0.9375,0.0000
305,0.530732,23,41,1.78260,69,3.00000,7,,0.858695,0.858696,...,27.04347,26.26086,Colorado Avalanche,21,,16,13,0,1.0000,0.0000
148,0.569819,6,14,2.33333,10,1.66666,4,,0.761904,0.761905,...,25.50000,26.83333,Washington Capitals,15,,2,1,0,0.1250,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,0.537473,7,13,1.85714,12,1.71428,4,,0.921052,0.921053,...,33.00000,34.28571,Dallas Stars,25,,3,2,0,0.1875,0.1250
111,0.490109,7,23,3.28571,20,2.85714,4,,0.777777,0.777778,...,34.71428,31.14285,Washington Capitals,15,,3,3,0,0.1875,0.0000
160,0.523026,5,19,3.80000,12,2.40000,4,,0.764705,0.764706,...,30.00000,28.80000,New Jersey Devils,1,,1,0,0,0.0625,0.6250
266,0.533596,4,16,4.00000,5,1.25000,4,,0.739130,0.739131,...,40.25000,28.00000,Ottawa Senators,9,,0,0,0,0.0000,1.1250


We will do the same for the past 5 seasons as well.

In [11]:
df['last5PlayoffScore'] = 0.0

for index, team in df.iterrows():

    season = int(str(team['seasonId'])[:4])
    last5Seasons = [int(f'{season - 5 + s}{season - 5 + s + 1}') for s in range(5)]

    data = df[df['seasonId'].isin(last5Seasons)]
    data = data[data['teamId'] == team['teamId']]
    last5PlayoffScore = data['playoffScore'].sum()

    df.loc[index, 'last5PlayoffScore'] = last5PlayoffScore
    
df.head(100)

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout,playoffScore,last3PlayoffScore,last5PlayoffScore
91,0.491155,13,34,2.61538,25,1.92307,6,,0.777777,0.777778,...,25.61538,Los Angeles Kings,26,,7,3,0,0.4375,0.0000,0.0000
96,0.464799,18,44,2.44444,38,2.11111,9,,0.794520,0.780822,...,23.55555,Pittsburgh Penguins,5,,9,6,0,0.5625,0.0000,0.0000
15,0.475138,25,52,2.08000,69,2.76000,10,,0.865384,0.836539,...,28.96000,New Jersey Devils,1,,15,13,0,0.9375,0.0000,0.0000
305,0.530732,23,41,1.78260,69,3.00000,7,,0.858695,0.858696,...,26.26086,Colorado Avalanche,21,,16,13,0,1.0000,0.0000,0.0000
148,0.569819,6,14,2.33333,10,1.66666,4,,0.761904,0.761905,...,26.83333,Washington Capitals,15,,2,1,0,0.1250,0.0000,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,0.537473,7,13,1.85714,12,1.71428,4,,0.921052,0.921053,...,34.28571,Dallas Stars,25,,3,2,0,0.1875,0.1250,0.5000
111,0.490109,7,23,3.28571,20,2.85714,4,,0.777777,0.777778,...,31.14285,Washington Capitals,15,,3,3,0,0.1875,0.0000,0.1250
160,0.523026,5,19,3.80000,12,2.40000,4,,0.764705,0.764706,...,28.80000,New Jersey Devils,1,,1,0,0,0.0625,0.6250,1.6875
266,0.533596,4,16,4.00000,5,1.25000,4,,0.739130,0.739131,...,28.00000,Ottawa Senators,9,,0,0,0,0.0000,1.1250,2.0000


We now want to quantify how a team performed in the regular season against 'good' teams - teams above a .500 win percentage.

In [12]:
def GetTeamGamesBySeason(teamId: int, season: int):

    cayenneExp = f'(homeTeamId={teamId} or visitingTeamId={teamId}) and season={season} and gameType=2'
    
    url = base_url + f'/game?cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    return pd.json_normalize(data)

# e.g. Florida
GetTeamGamesBySeason(13, 20232024).head()

Unnamed: 0,id,easternStartTime,gameDate,gameNumber,gameScheduleStateId,gameStateId,gameType,homeScore,homeTeamId,period,season,visitingScore,visitingTeamId
0,2023020014,2023-10-12T20:00:00,2023-10-12,14,1,7,2,2,30,3,20232024,0,13
1,2023020020,2023-10-14T16:00:00,2023-10-14,20,1,7,2,6,52,3,20232024,4,13
2,2023020036,2023-10-16T19:00:00,2023-10-16,36,1,7,2,3,1,3,20232024,4,13
3,2023020052,2023-10-19T19:00:00,2023-10-19,52,1,7,2,3,13,3,20232024,1,10
4,2023020068,2023-10-21T19:00:00,2023-10-21,68,1,7,2,3,13,3,20232024,5,23


In [13]:
def GetTeamsAbove500BySeason(season: int):
    
    cayenneExp = f'seasonId={season}'
    
    url = base_url + f'/team/summary?limit=-1&cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    df = pd.json_normalize(data)

    df = df[df['wins'] / df['gamesPlayed'] >= 0.5]
    return df

In [14]:
df['winRateAgainst500Teams'] = 0.0

for index, team in df.iterrows():
    teamGames = GetTeamGamesBySeason(teamId=team['teamId'], season=team['seasonId'])
    above500Teams = GetTeamsAbove500BySeason(team['seasonId'])

    homeCondition = (teamGames['homeTeamId'] == team['teamId']) & (teamGames['visitingTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] > teamGames['visitingScore']))
    awayCondition = (teamGames['visitingTeamId'] == team['teamId']) & (teamGames['homeTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] < teamGames['visitingScore']))
    
    winsAgainst500Teams = len(teamGames[homeCondition | awayCondition])
    gamesPlayed = len(teamGames)
    winRate = winsAgainst500Teams / gamesPlayed

    df.loc[index, 'winRateAgainst500Teams'] = winRate
        
df.head()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout,playoffScore,last3PlayoffScore,last5PlayoffScore,winRateAgainst500Teams
91,0.491155,13,34,2.61538,25,1.92307,6,,0.777777,0.777778,...,Los Angeles Kings,26,,7,3,0,0.4375,0.0,0.0,0.121951
96,0.464799,18,44,2.44444,38,2.11111,9,,0.79452,0.780822,...,Pittsburgh Penguins,5,,9,6,0,0.5625,0.0,0.0,0.158537
15,0.475138,25,52,2.08,69,2.76,10,,0.865384,0.836539,...,New Jersey Devils,1,,15,13,0,0.9375,0.0,0.0,0.109756
305,0.530732,23,41,1.7826,69,3.0,7,,0.858695,0.858696,...,Colorado Avalanche,21,,16,13,0,1.0,0.0,0.0,0.085366
148,0.569819,6,14,2.33333,10,1.66666,4,,0.761904,0.761905,...,Washington Capitals,15,,2,1,0,0.125,0.0,0.0,0.134146


In [16]:
# save to csv
df.to_csv('NHLTeamPlayoffStats.csv')