In [None]:
import pandas as pd
import requests
import numpy as np

The NHL API has no official documentation, but it is somewhat documented at [this repo](https://github.com/Zmalski/NHL-API-Reference)

In [None]:
base_url = 'https://api.nhle.com/stats/rest/en'

## Getting playoff team stats per season

We restrict our timeline to be after the year 2000. We will only include the teams that made the playoffs each season.

In [None]:
def GetTeamStats(start_season=2000, end_season=2025, playoffs=False):

    seasons = [f'{start_season + i}{start_season + i + 1}' for i in range(end_season - start_season)]
    
    cayenneExp = '('
    for season in seasons[:-1]:
        cayenneExp += f'seasonId={season} or '
    cayenneExp += f'seasonId={seasons[-1]})'

    # 2 ~ regular season, 3 ~ playoffs
    cayenneExp += f' and gameTypeId={3 if playoffs else 2}'

    
    url = base_url + f'/team/summary?limit=-1&cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    return pd.json_normalize(data)


df = GetTeamStats(playoffs=True)

df = df.sort_values('seasonId')
df.tail()

We also want to add each playoff team's regular season statistics. We will do this with a merge with the team name and season id as keys.

In [None]:
rs_df = GetTeamStats(playoffs=False)
rs_df.tail()

df = pd.merge(df, rs_df, on=['teamFullName', 'seasonId'], how='inner', suffixes=['', 'RS'])
df = df.drop(['teamIdRS'], axis=1)

There are some factors we want to include that we can derive from this data.

One example is a way to quantify how well a team performed in the playoffs - we can assign a `playoffScore` to each team where
- a team that does not make the playoffs or does not win any games in the playoffs has a score of 0
- a team that does make the playoffs has a score of their wins divided by the total possible wins
- a team that makes the playoffs and wins the cup has a score of 1

In [None]:
df['playoffScore'] = df['wins'] / 16
df.tail()

We also want to be able to quantify how well a team performed in the playoffs in their recent seasons. We will create a column `last3YearsPlayoffScore` that is a team's total playoff score over the last 3 years.

In [None]:
df['last3PlayoffScore'] = 0.0

for index, team in df.iterrows():

    season = int(str(team['seasonId'])[:4])
    last3Seasons = [int(f'{season - 3 + s}{season - 3 + s + 1}') for s in range(3)]

    data = df[df['seasonId'].isin(last3Seasons)]
    data = data[data['teamId'] == team['teamId']]
    last3PlayoffScore = data['playoffScore'].sum()

    df.loc[index, 'last3PlayoffScore'] = last3PlayoffScore
    
df.head(100)

We will do the same for the past 5 seasons as well.

In [None]:
df['last5PlayoffScore'] = 0.0

for index, team in df.iterrows():

    season = int(str(team['seasonId'])[:4])
    last5Seasons = [int(f'{season - 5 + s}{season - 5 + s + 1}') for s in range(5)]

    data = df[df['seasonId'].isin(last5Seasons)]
    data = data[data['teamId'] == team['teamId']]
    last5PlayoffScore = data['playoffScore'].sum()

    df.loc[index, 'last5PlayoffScore'] = last5PlayoffScore
    
df.head(100)

We now want to quantify how a team performed in the regular season against 'good' teams - teams above a .500 win percentage.

In [None]:
def GetTeamGamesBySeason(teamId: int, season: int):

    cayenneExp = f'(homeTeamId={teamId} or visitingTeamId={teamId}) and season={season} and gameType=2'
    
    url = base_url + f'/game?cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    return pd.json_normalize(data)

# e.g. Florida
GetTeamGamesBySeason(13, 20232024).head()

In [None]:
def GetTeamsAbove500BySeason(season: int):
    
    cayenneExp = f'seasonId={season}'
    
    url = base_url + f'/team/summary?limit=-1&cayenneExp={cayenneExp}'
    req = requests.get(url)
    
    data = req.json()['data']
    df = pd.json_normalize(data)

    df = df[df['wins'] / df['gamesPlayed'] >= 0.5]
    return df

In [None]:
df['winRateAgainst500Teams'] = 0.0

for index, team in df.iterrows():
    teamGames = GetTeamGamesBySeason(teamId=team['teamId'], season=team['seasonId'])
    above500Teams = GetTeamsAbove500BySeason(team['seasonId'])

    homeCondition = (teamGames['homeTeamId'] == team['teamId']) & (teamGames['visitingTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] > teamGames['visitingScore']))
    awayCondition = (teamGames['visitingTeamId'] == team['teamId']) & (teamGames['homeTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] < teamGames['visitingScore']))
    
    winsAgainst500Teams = len(teamGames[homeCondition | awayCondition])
    gamesPlayed = len(teamGames)
    winRate = winsAgainst500Teams / gamesPlayed

    df.loc[index, 'winRateAgainst500Teams'] = winRate
        
df.head()

In [None]:
# save to csv
df.to_csv('NHLTeamPlayoffStats.csv')

We will also get this years (2024-25) team statistics and save it in a separate file

In [None]:
df.head()

In [None]:
df_25 = GetTeamStats(start_season=2024, end_season=2026, playoffs=False)
df_25.head()

In [None]:
for key in df_25.keys():
    if key not in ['seasonId', 'teamFullName', 'teamId']:
        df_25 = df_25.rename(columns={key: f'{key}RS'})

In [None]:
df_25.keys()

In [None]:
df_25['last3PlayoffScore'] = 0.0

for index, team in df_25.iterrows():

    season = int(str(team['seasonId'])[:4])
    last3Seasons = [int(f'{season - 3 + s}{season - 3 + s + 1}') for s in range(3)]

    data = df[df['seasonId'].isin(last3Seasons)]
    data = data[data['teamId'] == team['teamId']]
    last3PlayoffScore = data['playoffScore'].sum()

    df_25.loc[index, 'last3PlayoffScore'] = last3PlayoffScore
    
df_25.head(16)

In [None]:
df_25['last5PlayoffScore'] = 0.0

for index, team in df_25.iterrows():

    season = int(str(team['seasonId'])[:4])
    last5Seasons = [int(f'{season - 5 + s}{season - 5 + s + 1}') for s in range(5)]

    data = df[df['seasonId'].isin(last5Seasons)]
    data = data[data['teamId'] == team['teamId']]
    last5PlayoffScore = data['playoffScore'].sum()

    df_25.loc[index, 'last5PlayoffScore'] = last5PlayoffScore
    
df_25.head(16)

In [None]:
df_25['winRateAgainst500Teams'] = 0.0

for index, team in df_25.iterrows():
    teamGames = GetTeamGamesBySeason(teamId=team['teamId'], season=team['seasonId'])
    above500Teams = GetTeamsAbove500BySeason(team['seasonId'])

    homeCondition = (teamGames['homeTeamId'] == team['teamId']) & (teamGames['visitingTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] > teamGames['visitingScore']))
    awayCondition = (teamGames['visitingTeamId'] == team['teamId']) & (teamGames['homeTeamId'].isin(above500Teams['teamId']) & (teamGames['homeScore'] < teamGames['visitingScore']))
    
    winsAgainst500Teams = len(teamGames[homeCondition | awayCondition])
    gamesPlayed = len(teamGames)
    winRate = winsAgainst500Teams / gamesPlayed

    df_25.loc[index, 'winRateAgainst500Teams'] = winRate
        
df_25.head()

In [None]:
df_25.to_csv('2025TeamStats.csv')