In [1]:
import pandas as pd
import requests
import numpy as np

In [2]:
base_url = 'https://api.nhle.com/stats/rest/en'

## Getting playoff team stats per season

We restrict our timeline to be after the year 2000. We will only include the teams that made the playoffs each season.

In [3]:
def GetTeamStats(start_season=2000, playoffs=False):

    seasons = [f'{start_season + i}{start_season + i + 1}' for i in range(2025-start_season)]
    
    cayenneExp = '('
    for season in seasons[:-1]:
        cayenneExp += f'seasonId={season} or '
    cayenneExp += f'seasonId={seasons[-1]})'

    if playoffs: cayenneExp += ' and gameTypeId=3'

    
    url = base_url + f'/team/summary?limit=-1&cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    return pd.json_normalize(data)


df = GetTeamStats(playoffs=True)

df = df.sort_values('seasonId')
df.tail()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,regulationAndOtWins,seasonId,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout
292,0.498412,5,19,3.8,12,2.4,4,,0.666666,0.666667,...,1,20232024,33.8,27.2,New York Islanders,2,,1,0,0
20,0.505882,11,32,2.90909,38,3.45454,5,,0.733333,0.733334,...,6,20232024,26.63636,35.72727,Carolina Hurricanes,12,,6,6,0
128,0.521591,19,48,2.52631,52,2.73684,9,,0.729729,0.675676,...,10,20232024,27.31578,27.94736,Dallas Stars,25,,10,8,0
122,0.489864,5,20,4.0,14,2.8,4,,0.846153,0.769231,...,1,20232024,32.0,27.0,Tampa Bay Lightning,14,,1,1,0
280,0.531413,13,36,2.76923,33,2.53846,6,,0.809523,0.809524,...,7,20232024,27.15384,20.76923,Vancouver Canucks,23,,7,6,0


There are some factors we want to include that we can derive from this data.

One example is a way to quantify how well a team performed in the playoffs - we can assign a `playoffScore` to each team where
- a team that does not make the playoffs or does not win any games in the playoffs has a score of 0
- a team that does make the playoffs has a score of their wins divided by the total possible wins
- a team that makes the playoffs and wins the cup has a score of 1

In [4]:
df['playoffScore'] = df['wins'] / 16
df.tail()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,seasonId,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout,playoffScore
292,0.498412,5,19,3.8,12,2.4,4,,0.666666,0.666667,...,20232024,33.8,27.2,New York Islanders,2,,1,0,0,0.0625
20,0.505882,11,32,2.90909,38,3.45454,5,,0.733333,0.733334,...,20232024,26.63636,35.72727,Carolina Hurricanes,12,,6,6,0,0.375
128,0.521591,19,48,2.52631,52,2.73684,9,,0.729729,0.675676,...,20232024,27.31578,27.94736,Dallas Stars,25,,10,8,0,0.625
122,0.489864,5,20,4.0,14,2.8,4,,0.846153,0.769231,...,20232024,32.0,27.0,Tampa Bay Lightning,14,,1,1,0,0.0625
280,0.531413,13,36,2.76923,33,2.53846,6,,0.809523,0.809524,...,20232024,27.15384,20.76923,Vancouver Canucks,23,,7,6,0,0.4375


We also want to be able to quantify how well a team performed in the playoffs in their recent seasons. We will create a column `last5YearsPlayoffScore` that is a team's total playoff score over the last 5 years.

In [5]:
df['last5PlayoffScore'] = 0.0

for index, team in df.iterrows():

    season = int(str(team['seasonId'])[:4])
    last5Seasons = [int(f'{season - 5 + s}{season - 5 + s + 1}') for s in range(5)]

    data = df[df['seasonId'].isin(last5Seasons)]
    data = data[data['teamId'] == team['teamId']]
    last5PlayoffScore = data['playoffScore'].sum()

    df.loc[index, 'last5PlayoffScore'] = last5PlayoffScore
    
df.head(100)

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout,playoffScore,last5PlayoffScore
326,0.498855,6,15,2.50000,17,2.83333,4,,0.850000,0.850000,...,26.33333,27.66666,Detroit Red Wings,17,,2,2,0,0.1250,0.0000
71,0.491155,13,34,2.61538,25,1.92307,6,,0.777777,0.777778,...,27.84615,25.61538,Los Angeles Kings,26,,7,3,0,0.4375,0.0000
230,0.517482,6,16,2.66666,13,2.16666,4,,0.846153,0.846154,...,31.33333,25.33333,Edmonton Oilers,22,,2,1,0,0.1250,0.0000
222,0.490304,11,24,2.18181,28,2.54545,4,,0.895833,0.875000,...,29.90909,22.45454,Toronto Maple Leafs,10,,7,5,0,0.4375,0.0000
298,0.530732,23,41,1.78260,69,3.00000,7,,0.858695,0.858696,...,27.04347,26.26086,Colorado Avalanche,21,,16,13,0,1.0000,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205,0.471698,5,16,3.20000,14,2.80000,4,,0.966666,0.933334,...,32.60000,26.80000,Nashville Predators,18,,1,1,0,0.0625,0.1875
190,0.464285,7,19,2.71428,17,2.42857,4,,0.806451,0.806452,...,32.71428,23.14285,Calgary Flames,20,,3,3,0,0.1875,1.2500
248,0.529255,13,32,2.46153,30,2.30769,7,,0.795454,0.750000,...,25.69230,31.92307,San Jose Sharks,28,,6,5,0,0.3750,1.3750
109,0.490109,7,23,3.28571,20,2.85714,4,,0.777777,0.777778,...,34.71428,31.14285,Washington Capitals,15,,3,3,0,0.1875,0.1250


We now want to quantify how a team performed in the regular season against 'good' teams - teams above a .500 win percentage.

In [6]:
def GetTeamGamesBySeason(teamId: int, season: int):

    cayenneExp = f'(homeTeamId={teamId} or visitingTeamId={teamId}) and season={season} and gameType=2'
    
    url = base_url + f'/game?cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    return pd.json_normalize(data)

GetTeamGamesBySeason(13, 20232024).head()

Unnamed: 0,id,easternStartTime,gameDate,gameNumber,gameScheduleStateId,gameStateId,gameType,homeScore,homeTeamId,period,season,visitingScore,visitingTeamId
0,2023020014,2023-10-12T20:00:00,2023-10-12,14,1,7,2,2,30,3,20232024,0,13
1,2023020020,2023-10-14T16:00:00,2023-10-14,20,1,7,2,6,52,3,20232024,4,13
2,2023020036,2023-10-16T19:00:00,2023-10-16,36,1,7,2,3,1,3,20232024,4,13
3,2023020052,2023-10-19T19:00:00,2023-10-19,52,1,7,2,3,13,3,20232024,1,10
4,2023020068,2023-10-21T19:00:00,2023-10-21,68,1,7,2,3,13,3,20232024,5,23


In [7]:
def GetTeamsAbove500BySeason(season: int):
    
    cayenneExp = f'seasonId={season}'
    
    url = base_url + f'/team/summary?limit=-1&cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    df = pd.json_normalize(data)

    df = df[df['wins'] / df['gamesPlayed'] >= 0.5]
    return df

In [8]:
df['winRateAgainst500Teams'] = 0.0

for index, team in df.iterrows():
    teamGames = GetTeamGamesBySeason(teamId=team['teamId'], season=team['seasonId'])
    above500Teams = GetTeamsAbove500BySeason(team['seasonId'])

    homeCondition = (teamGames['homeTeamId'] == team['teamId']) & (teamGames['visitingTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] > teamGames['visitingScore']))
    awayCondition = (teamGames['visitingTeamId'] == team['teamId']) & (teamGames['homeTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] < teamGames['visitingScore']))
    
    winsAgainst500Teams = len(teamGames[homeCondition | awayCondition])
    gamesPlayed = len(teamGames)
    winRate = winsAgainst500Teams / gamesPlayed

    df.loc[index, 'winRateAgainst500Teams'] = winRate
        
df.head()

Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,shotsForPerGame,teamFullName,teamId,ties,wins,winsInRegulation,winsInShootout,playoffScore,last5PlayoffScore,winRateAgainst500Teams
326,0.498855,6,15,2.5,17,2.83333,4,,0.85,0.85,...,27.66666,Detroit Red Wings,17,,2,2,0,0.125,0.0,0.146341
71,0.491155,13,34,2.61538,25,1.92307,6,,0.777777,0.777778,...,25.61538,Los Angeles Kings,26,,7,3,0,0.4375,0.0,0.121951
230,0.517482,6,16,2.66666,13,2.16666,4,,0.846153,0.846154,...,25.33333,Edmonton Oilers,22,,2,1,0,0.125,0.0,0.134146
222,0.490304,11,24,2.18181,28,2.54545,4,,0.895833,0.875,...,22.45454,Toronto Maple Leafs,10,,7,5,0,0.4375,0.0,0.085366
298,0.530732,23,41,1.7826,69,3.0,7,,0.858695,0.858696,...,26.26086,Colorado Avalanche,21,,16,13,0,1.0,0.0,0.085366


In [9]:
# save to csv
df.to_csv('NHLTeamSeasonStats.csv')