In [25]:
import pandas as pd
import numpy as np
import random
from datetime import date
import requests
import json
from bs4 import BeautifulSoup as BS

In [26]:
games = pd.read_csv('../data/games.csv')
games_2022 = games.loc[games['season'] == 2022]
games['date'] = pd.to_datetime(games['date'])

season_rating = pd.read_csv('../data/season_rating.csv')
season_rating_start = season_rating[['team', 'season_2022_start']]
season_rating_end = season_rating[['team', 'season_2022_end']]

In [27]:
url = 'https://statsapi.web.nhl.com/api/v1/teams'

response = requests.get(url)

In [28]:
div = []
for team in response.json()['teams']:
    div.append({
        'team' : team['name'],
        'div' : team['division']['name'],
        'con' : team['conference']['name']
    })

In [29]:
div = pd.DataFrame(div)

In [30]:
standings = pd.DataFrame({
    'team' : games_2022['home'].unique(),
    'pts' : 0,
    'w' : 0,
    'l' : 0,
    'otl' : 0,
    'rw' : 0,
    'otw' : 0
}).merge(
    right = div,
    how = 'left',
    on = 'team'
).merge(
    right = season_rating_start,
    how = 'left',
    on = 'team'
).rename(columns = {'season_2022_start' : 'rating'})

In [31]:
for index, game in games_2022.iterrows():

    if game['round'] == 999:
    
        home_index = game['home'] == standings['team']
        away_index = game['away'] == standings['team']

        home_w, home_otw, home_rw, home_otl = 0,0,0,0
        away_w, away_otw, away_rw, away_otl = 0,0,0,0

        home_rating = standings.loc[home_index, 'rating'].values + 50 # Home ice advantage
        away_rating = standings.loc[away_index, 'rating'].values

        games_2022.loc[index, 'home_rating'] = home_rating
        games_2022.loc[index, 'away_rating'] = away_rating

        rating_diff_home = (away_rating - home_rating)
        rating_diff_away = (home_rating - away_rating)

        E_home = 1 / (1 + 10 ** ((rating_diff_home) / 400))
        E_away = 1 / (1 + 10 ** ((rating_diff_away) / 400))

        if game['home_goals'] > game['away_goals']:
            home_w = 1
            S_home, S_away = 1, 0 # Home wins
            auto_correction = 2.05 / ((home_rating - away_rating) * 0.001 + 2.05)
            margin_of_victory = 0.6686 * np.log(abs(game['home_goals'] - game['away_goals'])) + 0.0848
        else:
            away_w = 1
            S_home, S_away = 0, 1 # Away wins
            auto_correction = 2.05 / ((away_rating - home_rating) * 0.001 + 2.05)
            margin_of_victory = 0.6686 * np.log(abs(game['home_goals'] - game['away_goals'])) + 0.0848
        
        if game['current_period'] == 3:
            if home_w == 1:
                home_rw = 1
            elif away_w == 1:
                away_rw = 1

        if game['current_period'] == 4:
            if home_w == 1:
                home_otw, away_otl = 1, 1
            elif away_w == 1:
                home_otl, away_otw = 1, 1

        if game['current_period'] == 5:
            if home_w == 1:
                away_otl = 1
            elif away_w == 1:
                home_otl = 1

        standings.loc[home_index, 'w'] += home_w
        standings.loc[home_index, 'l'] += away_w
        standings.loc[home_index, 'otl'] += home_otl
        standings.loc[home_index, 'rw'] += home_rw
        standings.loc[home_index, 'otw'] += home_otw

        standings.loc[away_index, 'w'] += away_w
        standings.loc[away_index, 'l'] += home_w
        standings.loc[away_index, 'otl'] += away_otl
        standings.loc[away_index, 'rw'] += away_rw
        standings.loc[away_index, 'otw'] += away_otw

        standings.loc[home_index, 'rating'] += 6 * (S_home - E_home) * margin_of_victory * auto_correction
        standings.loc[away_index, 'rating'] += 6 * (S_away - E_away) * margin_of_victory * auto_correction

standings['pts'] = 2 * standings['w'] + standings['otl']

In [32]:
top_3 = standings.sort_values(['div', 'pts', 'rw', 'otw', 'w'], ascending = False).groupby('div').head(3)
metro = top_3.loc[top_3['div'] == 'Metropolitan'].reset_index(drop = True)
pacific = top_3.loc[top_3['div'] == 'Pacific'].reset_index(drop = True)
central = top_3.loc[top_3['div'] == 'Central'].reset_index(drop = True)
atlantic = top_3.loc[top_3['div'] == 'Atlantic'].reset_index(drop = True)

In [33]:
top_3_list = top_3['team'].to_list()
wildcard = standings.sort_values(['pts', 'rw', 'otw', 'w'], ascending = False)
wildcard = wildcard.loc[~wildcard['team'].isin(top_3_list)].groupby('con').head(2)
east_wildcard = wildcard.loc[wildcard['con'] == 'Eastern'].reset_index(drop = True)
west_wildcard = wildcard.loc[wildcard['con'] == 'Western'].reset_index(drop = True)

In [34]:
top_con = standings.sort_values(['pts', 'rw', 'otw', 'w'], ascending = False).groupby('con').head(1)
top_east = top_con.loc[top_con['con'] == 'Eastern', 'team'].values[0]
top_west = top_con.loc[top_con['con'] == 'Western', 'team'].values[0]

In [35]:
if metro.loc[0, 'team'] == top_east:
    metro = pd.concat([metro, east_wildcard.iloc[[1]]]).reset_index(drop = True)
    atlantic = pd.concat([atlantic, east_wildcard.iloc[[0]]]).reset_index(drop = True)
else: 
    metro = pd.concat([metro, east_wildcard.iloc[[0]]]).reset_index(drop = True)
    atlantic = pd.concat([atlantic, east_wildcard.iloc[[1]]]).reset_index(drop = True)


if central.loc[0, 'team'] == top_west:
    central = pd.concat([central, west_wildcard.iloc[[1]]]).reset_index(drop = True)
    pacific = pd.concat([pacific, west_wildcard.iloc[[0]]]).reset_index(drop = True)
else:
    central = pd.concat([central, west_wildcard.iloc[[0]]]).reset_index(drop = True)
    pacific = pd.concat([pacific, west_wildcard.iloc[[1]]]).reset_index(drop = True)

In [36]:
times_made = pd.DataFrame({
    'team' : games_2022['home'].unique(),
    'made_playoffs' : 0,
    'round_2' : 0,
    'round_3' : 0,
    'finals' : 0,
    'champion' : 0
})

In [37]:
def round_1(div):
    global upper
    global lower
    home = [
        div.loc[0, 'team'], 
        div.loc[0, 'team'], 
        div.loc[3, 'team'], 
        div.loc[3, 'team'], 
        div.loc[0, 'team'],
        div.loc[3, 'team'],
        div.loc[0, 'team']
    ]
    away = [
        div.loc[3, 'team'], 
        div.loc[3, 'team'], 
        div.loc[0, 'team'], 
        div.loc[0, 'team'], 
        div.loc[3, 'team'],
        div.loc[0, 'team'],
        div.loc[3, 'team']
    ]
    upper = pd.DataFrame({
        'home' : home,
        'away' : away
    })

    home = [
        div.loc[1, 'team'], 
        div.loc[1, 'team'], 
        div.loc[2, 'team'], 
        div.loc[2, 'team'], 
        div.loc[1, 'team'],
        div.loc[2, 'team'],
        div.loc[1, 'team']
    ]
    away = [
        div.loc[2, 'team'], 
        div.loc[2, 'team'], 
        div.loc[1, 'team'], 
        div.loc[1, 'team'], 
        div.loc[2, 'team'],
        div.loc[1, 'team'],
        div.loc[2, 'team']
    ]
    lower = pd.DataFrame({
        'home' : home,
        'away' : away
    })

In [38]:
def round_2_fun(div):
    global upper
    home = [
        div.loc[0, 'team'], 
        div.loc[0, 'team'], 
        div.loc[1, 'team'], 
        div.loc[1, 'team'], 
        div.loc[0, 'team'],
        div.loc[1, 'team'],
        div.loc[0, 'team']
    ]
    away = [
        div.loc[1, 'team'], 
        div.loc[1, 'team'], 
        div.loc[0, 'team'], 
        div.loc[0, 'team'], 
        div.loc[1, 'team'],
        div.loc[0, 'team'],
        div.loc[1, 'team']
    ]
    upper = pd.DataFrame({
        'home' : home,
        'away' : away
    })

In [39]:
def game_sim(home, away, rating_diff, DataFrames):    
    global E_home
    global home_score
    global away_score
    randy = random.random()
    E_home = 1 / (1 + 10 ** ((rating_diff) / 400))
    home_score = 0
    away_score = 0
    if randy <= E_home:
        DataFrames.loc[DataFrames['team'] == home, 'games_won'] += 1
        while home_score <= away_score:
            home_score = round((random.gauss(home_mean_for, home_std_for) + random.gauss(away_mean_against, away_std_against)) / 2)
            away_score = round((random.gauss(away_mean_for, away_std_for) + random.gauss(home_mean_against, home_std_against)) / 2)

    else:
        DataFrames.loc[DataFrames['team'] == away, 'games_won'] += 1
        while home_score >= away_score:
            home_score = round((random.gauss(home_mean_for, home_std_for) + random.gauss(away_mean_against, away_std_against)) / 2)
            away_score = round((random.gauss(away_mean_for, away_std_for) + random.gauss(home_mean_against, home_std_against)) / 2)

In [40]:
def rating_update(home, away, E_home, home_score, away_score):
    E_away = 1 - E_home
    if home_score > away_score:
        S_home, S_away = 1, 0 # Home wins
        auto_correction = 2.05 / ((home_rating - away_rating) * 0.001 + 2.05)
        margin_of_victory = 0.6686 * np.log(abs(home_score - away_score)) + 0.0848
    else:
        S_home, S_away = 0, 1 # Away wins
        auto_correction = 2.05 / ((away_rating - home_rating) * 0.001 + 2.05)
        margin_of_victory = 0.6686 * np.log(abs(home_score - away_score)) + 0.0848

    standings.loc[standings['team'] == home, 'rating'] += 6 * (S_home - E_home) * margin_of_victory * auto_correction
    standings.loc[standings['team'] == away, 'rating'] += 6 * (S_away - E_away) * margin_of_victory * auto_correction

In [41]:
def rating_grab(home, away):  
    global home_rating
    global away_rating
    global rating_diff_home_playoffs
    global rating_diff_home
    home_rating = standings.loc[standings['team'] == home, 'rating'].values[0] + 50
    away_rating = standings.loc[standings['team'] == away, 'rating'].values[0]
    rating_diff_home_playoffs = (away_rating - home_rating) * 1.25
    rating_diff_home = (away_rating - home_rating)
    

In [42]:
def team_stats(home, away):
    global home_mean_for
    global home_std_for
    global home_mean_against
    global home_std_against
    global away_mean_for
    global away_std_for
    global away_mean_against
    global away_std_against
    home_mean_for = np.append(
        np.array(games_2022.loc[games_2022['home'] == home]['home_goals']),
        np.array(games_2022.loc[games_2022['away'] == home]['away_goals'])
            ).mean()
    home_std_for = np.append(
        np.array(games_2022.loc[games_2022['home'] == home]['home_goals']),
        np.array(games_2022.loc[games_2022['away'] == home]['away_goals'])
            ).std()

    home_mean_against = np.append(
        np.array(games_2022.loc[games_2022['home'] == home]['away_goals']),
        np.array(games_2022.loc[games_2022['away'] == home]['home_goals'])
            ).mean()

    home_std_against = np.append(
        np.array(games_2022.loc[games_2022['home'] == home]['away_goals']),
        np.array(games_2022.loc[games_2022['away'] == home]['home_goals'])
            ).std()

    away_mean_for = np.append(
        np.array(games_2022.loc[games_2022['home'] == away]['home_goals']),
        np.array(games_2022.loc[games_2022['away'] == away]['away_goals'])
            ).mean()
    away_std_for = np.append(
        np.array(games_2022.loc[games_2022['home'] == away]['home_goals']),
        np.array(games_2022.loc[games_2022['away'] == away]['away_goals'])
            ).std()

    away_mean_against = np.append(
        np.array(games_2022.loc[games_2022['home'] == away]['away_goals']),
        np.array(games_2022.loc[games_2022['away'] == away]['home_goals'])
            ).mean()

    away_std_against = np.append(
        np.array(games_2022.loc[games_2022['home'] == away]['away_goals']),
        np.array(games_2022.loc[games_2022['away'] == away]['home_goals'])
            ).std()

In [43]:
def first_round(atlantic = atlantic, metro = metro, pacific = pacific, central = central):   
    atlantic['games_won'] = 0
    metro['games_won'] = 0
    central['games_won'] = 0
    pacific['games_won'] = 0
    for div in [atlantic, metro, central, pacific]:
        round_1(div)
        for df in [upper, lower]:  
            for index, game in df.iterrows():
                home = df.loc[index, 'home'] 
                away = df.loc[index, 'away']

                if div.loc[div['team'] == home, 'games_won'].values[0] < 4 and div.loc[div['team'] == away, 'games_won'].values[0] < 4:
        
                    rating_grab(home, away)

                    team_stats(home, away)

                    game_sim(home, away, rating_diff_home_playoffs, div)
        
                    rating_update(home, away, E_home, home_score, away_score)
                else:
                    break

In [44]:
def second_round(atlantic_round_2, metro_round_2, central_round_2, pacific_round_2):  
    atlantic_round_2['games_won'] = 0
    metro_round_2['games_won'] = 0
    central_round_2['games_won'] = 0
    pacific_round_2['games_won'] = 0


    for div in [atlantic_round_2, metro_round_2, central_round_2, pacific_round_2]:
        round_2_fun(div)
        for df in [upper]:  
            for index, game in df.iterrows():
                home = df.loc[index, 'home'] 
                away = df.loc[index, 'away']

                if div.loc[div['team'] == home, 'games_won'].values[0] < 4 and div.loc[div['team'] == away, 'games_won'].values[0] < 4:
        
                    rating_grab(home, away)

                    team_stats(home, away)

                    game_sim(home, away, rating_diff_home_playoffs, div)
        
                    rating_update(home, away, E_home, home_score, away_score)
                else:
                    break

In [45]:
def con_finals(east, west):
    east['games_won'] = 0
    west['games_won'] = 0

    for con in [east, west]:
        round_2_fun(con)
        for df in [upper]:  
            for index, game in df.iterrows():
                home = df.loc[index, 'home'] 
                away = df.loc[index, 'away']

                if con.loc[con['team'] == home, 'games_won'].values[0] < 4 and con.loc[con['team'] == away, 'games_won'].values[0] < 4:
        
                    rating_grab(home, away)

                    team_stats(home, away)

                    game_sim(home, away, rating_diff_home_playoffs, con)
        
                    rating_update(home, away, E_home, home_score, away_score)
                else:
                    break

In [46]:
def stanley_cup(finals):
    finals['games_won'] = 0

    for final in [finals]:
        round_2_fun(final)
        for df in [upper]:  
            for index, game in df.iterrows():
                home = df.loc[index, 'home'] 
                away = df.loc[index, 'away']

                if final.loc[final['team'] == home, 'games_won'].values[0] < 4 and final.loc[final['team'] == away, 'games_won'].values[0] < 4:
        
                    rating_grab(home, away)

                    team_stats(home, away)

                    game_sim(home, away, rating_diff_home_playoffs, final)
        
                    rating_update(home, away, E_home, home_score, away_score)
                else:
                    break

In [47]:
times_made = pd.DataFrame({
    'team' : games_2022['home'].unique(),
    'made_playoffs' : 0,
    'round_2' : 0,
    'round_3' : 0,
    'finals' : 0,
    'champion' : 0
})

for i in range(1000):
    standings = standings.drop(columns = ['rating'])
    standings = standings.merge(
        right = season_rating_end,
        how = 'left',
        on = 'team'
    ).rename(columns = {'season_2022_end' : 'rating'})
    
    made_playoffs = atlantic['team'].to_list() + central['team'].to_list() + metro['team'].to_list() + pacific['team'].to_list()
    for team in made_playoffs:
        times_made.loc[times_made['team'] == team, 'made_playoffs'] += 1

    first_round()

    atlantic_round_2 = atlantic.loc[atlantic['games_won'] == 4].reset_index(drop = True)
    metro_round_2 = metro.loc[metro['games_won'] == 4].reset_index(drop = True)
    central_round_2 = central.loc[central['games_won'] == 4].reset_index(drop = True)
    pacific_round_2 = pacific.loc[pacific['games_won'] == 4].reset_index(drop = True)

    round_2 = atlantic_round_2['team'].to_list() + metro_round_2['team'].to_list() + central_round_2['team'].to_list() + pacific_round_2['team'].to_list()
    for team in round_2:
        times_made.loc[times_made['team'] == team, 'round_2'] += 1

    metro_round_2 = metro.loc[metro['games_won'] == 4].reset_index(drop = True)
    second_round(atlantic_round_2, metro_round_2, central_round_2, pacific_round_2)

    atlantic_round_3 = atlantic_round_2.loc[atlantic_round_2['games_won'] == 4].reset_index(drop = True)
    metro_round_3 = metro_round_2.loc[metro_round_2['games_won'] == 4].reset_index(drop = True)
    central_round_3 = central_round_2.loc[central_round_2['games_won'] == 4].reset_index(drop = True)
    pacific_round_3 = pacific_round_2.loc[pacific_round_2['games_won'] == 4].reset_index(drop = True)

    east = pd.concat([atlantic_round_3, metro_round_3]).sort_values(['pts', 'rw', 'otw', 'w'], ascending = False).reset_index(drop = True)
    west = pd.concat([central_round_3, pacific_round_3]).sort_values(['pts', 'rw', 'otw', 'w'], ascending = False).reset_index(drop = True)

    round_3 = east['team'].to_list() + west['team'].to_list()
    for team in round_3:
        times_made.loc[times_made['team'] == team, 'round_3'] += 1

    con_finals(east, west)

    east_winner = east.loc[east['games_won'] == 4]
    west_winner = west.loc[west['games_won'] == 4]

    finals = pd.concat([east_winner, west_winner]).sort_values(['pts', 'rw', 'otw', 'w'], ascending = False).reset_index(drop = True)

    final_round = finals['team'].to_list()
    for team in final_round:
        times_made.loc[times_made['team'] == team, 'finals'] += 1

    stanley_cup(finals)

    champ = finals.loc[finals['games_won'] == 4, 'team'].values[0]
    times_made.loc[times_made['team'] == champ, 'champion'] += 1

In [48]:
times_made

Unnamed: 0,team,made_playoffs,round_2,round_3,finals,champion
0,Nashville Predators,0,0,0,0,0
1,San Jose Sharks,0,0,0,0,0
2,New York Rangers,1000,630,347,143,78
3,Montréal Canadiens,0,0,0,0,0
4,Washington Capitals,0,0,0,0,0
5,Los Angeles Kings,1000,338,159,64,23
6,Carolina Hurricanes,1000,636,386,166,92
7,Colorado Avalanche,1000,701,404,222,100
8,Anaheim Ducks,0,0,0,0,0
9,Edmonton Oilers,1000,662,377,218,94


In [51]:
games_2022.loc[games_2022['round'] == 999][['gameID', 'date', 'home', 'away']].reset_index()

Unnamed: 0,gameID,date,home,away
26860,2022020001,2022-10-07,Nashville Predators,San Jose Sharks
26861,2022020002,2022-10-08,San Jose Sharks,Nashville Predators
26862,2022020003,2022-10-11,New York Rangers,Tampa Bay Lightning
26863,2022020007,2022-10-12,Montréal Canadiens,Toronto Maple Leafs
26864,2022020005,2022-10-12,Washington Capitals,Boston Bruins
...,...,...,...,...
28167,2022021308,2023-04-14,Edmonton Oilers,San Jose Sharks
28168,2022021309,2023-04-14,Colorado Avalanche,Winnipeg Jets
28169,2022021310,2023-04-14,Anaheim Ducks,Los Angeles Kings
28170,2022021311,2023-04-14,Arizona Coyotes,Vancouver Canucks
