In [1]:
import pandas as pd
import requests
import numpy as np

In [2]:
base_url = 'https://api.nhle.com/stats/rest/en'

## Getting playoff team stats per season

We restrict our timeline to be after the year 2000. We will only include the teams that made the playoffs each season.

In [3]:
def GetTeamStats(start_season=2000, playoffs=False):

    seasons = [f'{start_season + i}{start_season + i + 1}' for i in range(2025-start_season)]
    
    cayenneExp = '('
    for season in seasons[:-1]:
        cayenneExp += f'seasonId={season} or '
    cayenneExp += f'seasonId={seasons[-1]})'

    if playoffs: cayenneExp += ' and gameTypeId=3'

    
    url = base_url + f'/team/summary?limit=-1&cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    return pd.json_normalize(data)


df = GetTeamStats(playoffs=True)

df = df.sort_values('seasonId')
df.tail()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,regulationAndOtWins,seasonId,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout
90,0.555555,5,28,5.6,15,3.0,4,,0.625,0.625,...,1,20232024,37.0,28.0,Winnipeg Jets,52,,1,1,0
205,0.444954,4,15,3.75,7,1.75,4,,0.625,0.625,...,0,20232024,26.0,25.25,Washington Capitals,15,,0,0,0
339,0.507723,16,42,2.625,47,2.9375,6,,0.947368,0.842106,...,10,20232024,32.9375,25.75,New York Rangers,3,,10,6,0
114,0.489864,5,20,4.0,14,2.8,4,,0.846153,0.769231,...,1,20232024,32.0,27.0,Tampa Bay Lightning,14,,1,1,0
84,0.465561,13,31,2.38461,31,2.38461,7,,0.857142,0.857143,...,6,20232024,32.15384,24.84615,Boston Bruins,6,,6,5,0


There are some factors we want to include that we can derive from this data.

One example is a way to quantify how well a team performed in the playoffs - we can assign a `playoffScore` to each team where
- a team that does not make the playoffs or does not win any games in the playoffs has a score of 0
- a team that does make the playoffs has a score of their wins divided by the total possible wins
- a team that makes the playoffs and wins the cup has a score of 1

In [4]:
df['playoffScore'] = df['wins'] / 16
df.tail()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,seasonId,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout,playoffScore
90,0.555555,5,28,5.6,15,3.0,4,,0.625,0.625,...,20232024,37.0,28.0,Winnipeg Jets,52,,1,1,0,0.0625
205,0.444954,4,15,3.75,7,1.75,4,,0.625,0.625,...,20232024,26.0,25.25,Washington Capitals,15,,0,0,0,0.0
339,0.507723,16,42,2.625,47,2.9375,6,,0.947368,0.842106,...,20232024,32.9375,25.75,New York Rangers,3,,10,6,0,0.625
114,0.489864,5,20,4.0,14,2.8,4,,0.846153,0.769231,...,20232024,32.0,27.0,Tampa Bay Lightning,14,,1,1,0,0.0625
84,0.465561,13,31,2.38461,31,2.38461,7,,0.857142,0.857143,...,20232024,32.15384,24.84615,Boston Bruins,6,,6,5,0,0.375


We also want to be able to quantify how well a team performed in the playoffs in their recent seasons. We will create a column `last5YearsPlayoffScore` that is a team's total playoff score over the last 5 years.

In [5]:
df['last5PlayoffScore'] = 0.0

for index, team in df.iterrows():

    season = int(str(team['seasonId'])[:4])
    last5Seasons = [int(f'{season - 5 + s}{season - 5 + s + 1}') for s in range(5)]

    data = df[df['seasonId'].isin(last5Seasons)]
    data = data[data['teamId'] == team['teamId']]
    last5PlayoffScore = data['playoffScore'].sum()

    df.loc[index, 'last5PlayoffScore'] = last5PlayoffScore
    
df.head(100)

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout,playoffScore,last5PlayoffScore
355,0.555248,6,20,3.33333,8,1.33333,4,,0.916666,0.916667,...,33.50000,19.00000,Carolina Hurricanes,12,,2,1,0,0.1250,0.0000
269,0.484949,4,16,4.00000,9,2.25000,4,,0.736842,0.736843,...,29.00000,22.00000,Vancouver Canucks,23,,0,0,0,0.0000,0.0000
268,0.450549,6,21,3.50000,13,2.16666,4,,0.722222,0.666667,...,30.33333,29.33333,Philadelphia Flyers,4,,2,2,0,0.1250,0.0000
354,0.504213,10,26,2.60000,22,2.20000,6,,0.872340,0.872341,...,27.80000,30.00000,Dallas Stars,25,,4,1,0,0.2500,0.0000
26,0.475138,25,52,2.08000,69,2.76000,10,,0.865384,0.836539,...,20.28000,28.96000,New Jersey Devils,1,,15,13,0,0.9375,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311,0.470297,12,26,2.16666,21,1.75000,7,,0.894736,0.894737,...,36.50000,32.16666,Vancouver Canucks,23,,5,2,0,0.3125,0.7500
146,0.509971,17,54,3.17647,52,3.05882,8,,0.786666,0.760000,...,31.88235,29.94117,Philadelphia Flyers,4,,9,7,0,0.5625,1.1875
109,0.490109,7,23,3.28571,20,2.85714,4,,0.777777,0.777778,...,34.71428,31.14285,Washington Capitals,15,,3,3,0,0.1875,0.1250
160,0.523026,5,19,3.80000,12,2.40000,4,,0.764705,0.764706,...,30.00000,28.80000,New Jersey Devils,1,,1,0,0,0.0625,1.6875


We now want to quantify how a team performed in the regular season against 'good' teams - teams above a .500 win percentage.

In [6]:
def GetTeamGamesBySeason(teamId: int, season: int):

    cayenneExp = f'(homeTeamId={teamId} or visitingTeamId={teamId}) and season={season} and gameType=2'
    
    url = base_url + f'/game?cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    return pd.json_normalize(data)

# e.g. Florida
GetTeamGamesBySeason(13, 20232024).head()

Unnamed: 0,id,easternStartTime,gameDate,gameNumber,gameScheduleStateId,gameStateId,gameType,homeScore,homeTeamId,period,season,visitingScore,visitingTeamId
0,2023020014,2023-10-12T20:00:00,2023-10-12,14,1,7,2,2,30,3,20232024,0,13
1,2023020020,2023-10-14T16:00:00,2023-10-14,20,1,7,2,6,52,3,20232024,4,13
2,2023020036,2023-10-16T19:00:00,2023-10-16,36,1,7,2,3,1,3,20232024,4,13
3,2023020052,2023-10-19T19:00:00,2023-10-19,52,1,7,2,3,13,3,20232024,1,10
4,2023020068,2023-10-21T19:00:00,2023-10-21,68,1,7,2,3,13,3,20232024,5,23


In [7]:
def GetTeamsAbove500BySeason(season: int):
    
    cayenneExp = f'seasonId={season}'
    
    url = base_url + f'/team/summary?limit=-1&cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    df = pd.json_normalize(data)

    df = df[df['wins'] / df['gamesPlayed'] >= 0.5]
    return df

In [8]:
df['winRateAgainst500Teams'] = 0.0

for index, team in df.iterrows():
    teamGames = GetTeamGamesBySeason(teamId=team['teamId'], season=team['seasonId'])
    above500Teams = GetTeamsAbove500BySeason(team['seasonId'])

    homeCondition = (teamGames['homeTeamId'] == team['teamId']) & (teamGames['visitingTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] > teamGames['visitingScore']))
    awayCondition = (teamGames['visitingTeamId'] == team['teamId']) & (teamGames['homeTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] < teamGames['visitingScore']))
    
    winsAgainst500Teams = len(teamGames[homeCondition | awayCondition])
    gamesPlayed = len(teamGames)
    winRate = winsAgainst500Teams / gamesPlayed

    df.loc[index, 'winRateAgainst500Teams'] = winRate
        
df.head()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout,playoffScore,last5PlayoffScore,winRateAgainst500Teams
326,0.498855,6,15,2.5,17,2.83333,4,,0.85,0.85,...,27.66666,Detroit Red Wings,17,,2,2,0,0.125,0.0,0.146341
71,0.491155,13,34,2.61538,25,1.92307,6,,0.777777,0.777778,...,25.61538,Los Angeles Kings,26,,7,3,0,0.4375,0.0,0.121951
230,0.517482,6,16,2.66666,13,2.16666,4,,0.846153,0.846154,...,25.33333,Edmonton Oilers,22,,2,1,0,0.125,0.0,0.134146
222,0.490304,11,24,2.18181,28,2.54545,4,,0.895833,0.875,...,22.45454,Toronto Maple Leafs,10,,7,5,0,0.4375,0.0,0.085366
298,0.530732,23,41,1.7826,69,3.0,7,,0.858695,0.858696,...,26.26086,Colorado Avalanche,21,,16,13,0,1.0,0.0,0.085366


In [9]:
# save to csv
df.to_csv('NHLTeamSeasonStats.csv')