In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import copy
import warnings
warnings.filterwarnings('ignore')

In [2]:
scores_df = pd.read_csv('C:/Users/Gabriel/Documents/MIE368/Data/season_data.csv')
playoff_scores_df = pd.read_csv('C:/Users/Gabriel/Documents/MIE368/Data/playoff_data.csv')

In [3]:
scores_df.head()

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season
0,2000-10-04,Colorado Avalanche,2.0,Dallas Stars,2.0,1,2001
1,2000-10-05,Ottawa Senators,4.0,Boston Bruins,4.0,1,2001
2,2000-10-05,Chicago Blackhawks,2.0,Buffalo Sabres,4.0,0,2001
3,2000-10-05,Detroit Red Wings,4.0,Calgary Flames,3.0,0,2001
4,2000-10-05,Vancouver Canucks,3.0,Philadelphia Flyers,6.0,0,2001


## Game Result Elo Adjustment

In [4]:
first_yr = scores_df[scores_df['Season'] == 2001]
Teams = first_yr['Home'].unique().tolist()

team_changes = {
    'Atlanta Thrashers': 'Winnipeg Jets',
    'Phoenix Coyotes': 'Arizona Coyotes',
    'Mighty Ducks of Anaheim': 'Anaheim Ducks'
    # Add more mappings as needed
}

K = 6

In [5]:
def calculate_prob_winning(home_team, away_team, elo_ratings):
    
    Elo_diff_home = elo_ratings[home_team]-elo_ratings[away_team] + 26  #add extra for home-ice advantage   
    #Elo_diff_away = elo_ratings[away_team] - elo_ratings[home_team]
    prob_win_home = 1/(10**(-1*Elo_diff_home/400)+1)
    #prob_win_away = 1/(10**(-1*Elo_diff_away/400)+1)
    prob_win_away = 1 - prob_win_home
    
    return prob_win_home, prob_win_away, Elo_diff_home

In [6]:
def margin_of_victory(home_goals, away_goals):
    
    mov = abs(home_goals - away_goals)
    mov_mult = 0.6686*np.log(mov)+0.8048
    
    return mov_mult

In [7]:
# Function to update ELO ratings after a game -- Could add input for elo_ratings dictionary 
def update_game_elo(home_team, away_team, home_goals, away_goals, season, df, idx, elo_ratings):
    
    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)
    
    #print('Home team is ' + str(home_team))
    #print('Away team is ' + str(away_team))

    if home_team not in elo_ratings:
        # New teams added past a certain year start with a different ELO rating
        elo_ratings[home_team] = 1490 if season >= 2005 else 1380
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1490 if season >= 2005 else 1380

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)
    
    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
     # Update Pregame Favorite Multiplier
    if home_goals > away_goals:
        home_win = 1
        #away_win = 0
        winner_elo_diff = Elo_diff_home
    elif home_goals < away_goals:
        home_win = 0
        #away_win = 1
        winner_elo_diff = -1*Elo_diff_home
    else:
        return df, elo_ratings
    
    pre_g_fav_h = home_win - home_prob  
    #pre_g_fav_a = away_win - away_prob  
    
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
     # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(home_goals, away_goals)
    #print('home goals:' + str(home_goals))
    #print('away goals:' + str(away_goals))
    #print('mult is: ' + str(mov_multiplier))
    
     # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
    #elo_shift_a = K * mov_multiplier * pre_g_fav_a
    #print('elo shift is: ' + str(elo_shift_h))
    
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    return df, elo_ratings

## End of Season Elo Adjustments

In [8]:
def elo_adjuster(df):
    
    # Initialize variables to store the final Elo ratings for the season
    final_elo_ratings = {}
    elo_ratings = {}
    season_weight = 0.7
    average_weight = 0.3
    current_season = None

    # Iterate over seasons and games
    for idx, row in df.iterrows():
        if current_season is None:
            current_season = row['Season']
    
        # Check if the season in the current row is different from the current season
        if row['Season'] != current_season:
            # Save the previous season's Elo ratings and calculate the starting Elo for the new season
            final_elo_ratings[current_season] = dict(elo_ratings)
            elo_ratings = {}  # Reset Elo ratings for the new season

            for team in final_elo_ratings[current_season]:
                prev_season_elo = final_elo_ratings[current_season][team]
                league_avg_elo = sum(final_elo_ratings[current_season].values()) / len(final_elo_ratings[current_season])
                starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
                elo_ratings[team] = round(starting_elo, 2)

            current_season = row['Season']
            
        # Update Elo ratings for the game
        df, elo_ratings = update_game_elo(row['Home'], row['Visitor'], row['G_Home'], row['G_Vis'], row['Season'], df, idx, elo_ratings)

    # Calculate the final Elo ratings for the last season
    final_elo_ratings[current_season] = dict(elo_ratings)

    return df, elo_ratings, final_elo_ratings

In [9]:
seasons, elo, final_season_elos = elo_adjuster(scores_df)

In [10]:
elo

{'Dallas Stars': 1437.4512451647386,
 'Colorado Avalanche': 1466.4722390598508,
 'Boston Bruins': 1528.3025271975873,
 'Ottawa Senators': 1365.4314407685683,
 'Buffalo Sabres': 1373.1191301885838,
 'Chicago Blackhawks': 1280.5007293655938,
 'Calgary Flames': 1402.0965186864369,
 'Detroit Red Wings': 1329.7269434571708,
 'Philadelphia Flyers': 1308.0747463014084,
 'Vancouver Canucks': 1372.6175557499603,
 'Arizona Coyotes': 1292.1821829789615,
 'St. Louis Blues': 1370.3650324863324,
 'Edmonton Oilers': 1467.4906608251094,
 'Florida Panthers': 1424.5079739719674,
 'Anaheim Ducks': 1256.9841444257206,
 'Minnesota Wild': 1430.6449109129542,
 'New Jersey Devils': 1419.5270424506105,
 'Montreal Canadiens': 1296.7275973585338,
 'Pittsburgh Penguins': 1391.2525506490285,
 'Nashville Predators': 1392.2066766854175,
 'San Jose Sharks': 1273.6736705710825,
 'Tampa Bay Lightning': 1425.557873532403,
 'New York Islanders': 1399.7865979546973,
 'Washington Capitals': 1361.6440785673105,
 'Los Angele

In [11]:
final_season_elos[2021]

{'Dallas Stars': 1377.865082081871,
 'Colorado Avalanche': 1458.5768215688167,
 'Boston Bruins': 1446.9891754457233,
 'Ottawa Senators': 1333.7246474104593,
 'Buffalo Sabres': 1302.9234867968662,
 'Chicago Blackhawks': 1351.4931033912126,
 'Calgary Flames': 1377.905943494975,
 'Detroit Red Wings': 1297.8635540554503,
 'Philadelphia Flyers': 1365.772278330636,
 'Vancouver Canucks': 1346.3137697467182,
 'Arizona Coyotes': 1356.7628583430082,
 'St. Louis Blues': 1403.0420677248223,
 'Edmonton Oilers': 1412.775969292768,
 'Florida Panthers': 1431.1824199731982,
 'Anaheim Ducks': 1306.6566238964485,
 'Minnesota Wild': 1416.4469260050241,
 'New Jersey Devils': 1314.1258478456364,
 'Montreal Canadiens': 1351.7541495789917,
 'Pittsburgh Penguins': 1444.4591438280618,
 'Nashville Predators': 1402.5617720419064,
 'San Jose Sharks': 1320.3737238225892,
 'Tampa Bay Lightning': 1441.8399164439033,
 'New York Islanders': 1411.230902147942,
 'Washington Capitals': 1437.2421161811437,
 'Los Angeles Ki

In [12]:
def forecast_game_elo(home_team, away_team, season, df, idx, elo_ratings, simulation_wins):

    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)

    if home_team not in elo_ratings:
        elo_ratings[home_team] = 1490 if season >= 2005 else 1380
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1490 if season >= 2005 else 1380

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)

    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
    home_win = np.random.choice([0, 1], p=[away_prob, home_prob])

    # Calculate the probability of going into overtime
    OT_prob = 1/(1+np.exp(-1*(0.092596 - 0.001466*abs(Elo_diff_home))))

    # Calculate base scores for the home and away team
    score_home = 2.845905 + 0.003584*Elo_diff_home
    score_away = 2.845905 + 0.003584*(-1)*Elo_diff_home

    conditions_satisfied = 0 

    while conditions_satisfied == 0:
        
        # poisson distribution to shift scores
        poisson_score_home = np.random.poisson(max(0, score_home))
        poisson_score_away = np.random.poisson(max(0, score_away))

        # Ensure non-negative scores by replacing any negative values with zero
        poisson_score_home = np.maximum(0, poisson_score_home)
        poisson_score_away = np.maximum(0, poisson_score_away)

        home_goal_diff = poisson_score_home - poisson_score_away
        
        # does it satsify conditions?
        if OT_prob >= 0.5:
            if home_win == 1:
                if (abs(home_goal_diff) == 1) and (home_goal_diff > 0):
                    conditions_satisfied = 1
            if home_win == 0:
                if (abs(home_goal_diff) == 1) and (home_goal_diff < 0): 
                    conditions_satisfied = 1
        
        if OT_prob < 0.5:
            if home_win == 1:
                if home_goal_diff > 0:
                    conditions_satisfied = 1
            if home_win == 0:
                if home_goal_diff < 0: 
                    conditions_satisfied = 1

    #------------------------------------------------------------------------#
    if home_win == 1:
        winner_elo_diff = Elo_diff_home
        simulation_wins[home_team] += 1
        
    if home_win == 0:
        winner_elo_diff = -1*Elo_diff_home
        simulation_wins[away_team] += 1

    pre_g_fav_h = home_win - home_prob  
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
    # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(poisson_score_home, poisson_score_away)

    # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
  
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    if OT_prob < 0.5:
        df.at[idx, 'OT Ind'] = 0
    else: 
        df.at[idx, 'OT Ind'] = 1

    df.at[idx, 'G_Home'] = poisson_score_home
    df.at[idx, 'G_Vis'] = poisson_score_away
    #--------------------------------------------------------------------------#

    return df, elo_ratings, simulation_wins

In [13]:
# Iterate over seasons and games
def season_forecaster(df, starting_elo):
    
    simulation_wins = {team: 0 for team in df['Home'].unique()}

    for idx, row in df.iterrows():
        df, starting_elo, simulation_wins = forecast_game_elo(row['Home'], row['Visitor'], row['Season'], df, idx, starting_elo, simulation_wins)  

    return df, starting_elo, simulation_wins

In [12]:
def forecast_game_elo2(home_team, away_team, season, df, idx, elo_ratings, simulation_wins):

    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)

    if home_team not in elo_ratings:
        elo_ratings[home_team] = 1490 if season >= 2005 else 1380
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1490 if season >= 2005 else 1380

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)

    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
    home_win = np.random.choice([0, 1], p=[away_prob, home_prob])

    # Calculate the probability of going into overtime
    OT_prob = 1/(1+np.exp(-1*(0.092596 - 0.001466*abs(Elo_diff_home))))

    # Calculate base scores for the home and away team
    score_home = 2.845905 + 0.003584*Elo_diff_home
    score_away = 2.845905 + 0.003584*(-1)*Elo_diff_home

    conditions_satisfied = 0 

    while conditions_satisfied == 0:
        
        # poisson distribution to shift scores
        poisson_score_home = np.random.poisson(max(0, score_home))
        poisson_score_away = np.random.poisson(max(0, score_home))

        # Ensure non-negative scores by replacing any negative values with zero
        poisson_score_home = np.maximum(0, poisson_score_home)
        poisson_score_away = np.maximum(0, poisson_score_away)

        home_goal_diff = poisson_score_home - poisson_score_away
        

        if home_win == 1:
            if home_goal_diff > 0:
                conditions_satisfied = 1
        if home_win == 0:
            if home_goal_diff < 0: 
                conditions_satisfied = 1

    #------------------------------------------------------------------------#
    if home_win == 1:
        winner_elo_diff = Elo_diff_home
        simulation_wins[home_team] += 1
        
    if home_win == 0:
        winner_elo_diff = -1*Elo_diff_home
        simulation_wins[away_team] += 1

    pre_g_fav_h = home_win - home_prob  
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
    # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(poisson_score_home, poisson_score_away)

    # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
  
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    df.at[idx, 'G_Home'] = poisson_score_home
    df.at[idx, 'G_Vis'] = poisson_score_away
    #--------------------------------------------------------------------------#

    return df, elo_ratings, simulation_wins

In [13]:
# Iterate over seasons and games
def season_forecaster2(df, starting_elo):
    
    simulation_wins = {team: 0 for team in df['Home'].unique()}

    for idx, row in df.iterrows():
        df, starting_elo, simulation_wins = forecast_game_elo2(row['Home'], row['Visitor'], row['Season'], df, idx, starting_elo, simulation_wins)  

    return df, starting_elo, simulation_wins

In [14]:
df2 = copy.deepcopy(scores_df[scores_df['Season'] == 2022])
season_2022 = df2.copy()
num_simulations = 100
team_wins = {team: np.zeros(num_simulations) for team in df2['Home'].unique()}

for simulation in range(num_simulations):
    elo = copy.deepcopy(final_season_elos[2021])  # Creating a deep copy of final_season_elos[2021]
    elo_end = elo.copy()  # Initial copy for modifications

    season_2022, elo_end, simulation_wins = season_forecaster2(season_2022, elo_end)

    for team, wins in simulation_wins.items():
        team_wins[team][simulation] = wins


for team, wins in team_wins.items():
    mean_wins = np.mean(wins)
    std_deviation = np.std(wins)
    print(f"{team}: Mean Wins = {mean_wins}, Standard Deviation = {std_deviation}")

Tampa Bay Lightning: Mean Wins = 47.33, Standard Deviation = 5.66578326447456
Vegas Golden Knights: Mean Wins = 48.82, Standard Deviation = 6.740000000000001
Anaheim Ducks: Mean Wins = 31.69, Standard Deviation = 5.627957000546468
Colorado Avalanche: Mean Wins = 48.28, Standard Deviation = 6.842631072913401
Edmonton Oilers: Mean Wins = 43.73, Standard Deviation = 6.418496708731726
Toronto Maple Leafs: Mean Wins = 46.18, Standard Deviation = 6.717707942445847
Washington Capitals: Mean Wins = 46.89, Standard Deviation = 6.760022189312694
Buffalo Sabres: Mean Wins = 31.69, Standard Deviation = 6.082261092718727
Carolina Hurricanes: Mean Wins = 47.83, Standard Deviation = 5.956601379981709
Columbus Blue Jackets: Mean Wins = 34.21, Standard Deviation = 6.793077358605598
Detroit Red Wings: Mean Wins = 30.8, Standard Deviation = 6.4404968752418466
Florida Panthers: Mean Wins = 46.92, Standard Deviation = 6.421339424138861
Los Angeles Kings: Mean Wins = 33.65, Standard Deviation = 6.6051116568

In [17]:
len(season_2022[season_2022['OT Ind'] == 1])/len(season_2022)

0.21951219512195122

In [15]:
mean_wins = pd.DataFrame(columns = ['Wins']).rename_axis('Team')

for team, wins in team_wins.items():
    mean_wins.loc[team] = [np.mean(wins)]



In [16]:
mean_wins.sort_values(by='Wins', ascending=False)

Unnamed: 0_level_0,Wins
Team,Unnamed: 1_level_1
Seattle Kraken,51.94
Boston Bruins,49.14
Vegas Golden Knights,48.82
Pittsburgh Penguins,48.71
Colorado Avalanche,48.28
Carolina Hurricanes,47.83
Tampa Bay Lightning,47.33
Florida Panthers,46.92
Washington Capitals,46.89
Toronto Maple Leafs,46.18


In [20]:
elo_df = pd.DataFrame(columns = ['Elo']).rename_axis('Team')

for team, elo in elo_end.items():
    elo_df.loc[team] = [elo]

elo_df.sort_values(by='Elo', ascending=False)

Unnamed: 0_level_0,Elo
Team,Unnamed: 1_level_1
San Jose Sharks,1494.677409
Winnipeg Jets,1474.414054
Dallas Stars,1470.819817
Detroit Red Wings,1452.988481
Calgary Flames,1443.563063
Anaheim Ducks,1427.18891
Minnesota Wild,1421.617582
Vegas Golden Knights,1418.466741
Montreal Canadiens,1415.531566
New Jersey Devils,1412.387961


In [17]:
elo_df = pd.DataFrame(columns = ['Elo']).rename_axis('Team')

for team, elo in final_season_elos[2021].items():
    elo_df.loc[team] = [elo]

elo_df.sort_values(by='Elo', ascending=False)

Unnamed: 0_level_0,Elo
Team,Unnamed: 1_level_1
Vegas Golden Knights,1465.070317
Colorado Avalanche,1458.576822
Boston Bruins,1446.989175
Pittsburgh Penguins,1444.459144
Tampa Bay Lightning,1441.839916
Carolina Hurricanes,1441.017845
Washington Capitals,1437.242116
Florida Panthers,1431.18242
Toronto Maple Leafs,1426.931101
Minnesota Wild,1416.446926


In [22]:
df2 = scores_df[scores_df['Season'] == 2022]
df2

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season,EloDiffHome,EloDiffAway,WinProbHome,WinProbVis
23662,2021-10-12,Pittsburgh Penguins,6.0,Tampa Bay Lightning,2.0,0,2022,24.160000,-24.160000,0.534713,0.465287
23663,2021-10-12,Seattle Kraken,3.0,Vegas Golden Knights,4.0,0,2022,-23.390000,23.390000,0.466390,0.533610
23664,2021-10-13,Winnipeg Jets,1.0,Anaheim Ducks,4.0,0,2022,-36.320000,36.320000,0.447921,0.552079
23665,2021-10-13,Chicago Blackhawks,2.0,Colorado Avalanche,4.0,0,2022,100.960000,-100.960000,0.641337,0.358663
23666,2021-10-13,Vancouver Canucks,2.0,Edmonton Oilers,3.0,1,2022,72.530000,-72.530000,0.602889,0.397111
...,...,...,...,...,...,...,...,...,...,...,...
24969,2022-04-29,San Jose Sharks,0.0,Seattle Kraken,3.0,0,2022,66.672113,-66.672113,0.594788,0.405212
24970,2022-04-29,Vegas Golden Knights,7.0,St. Louis Blues,4.0,0,2022,66.397732,-66.397732,0.594407,0.405593
24971,2022-04-29,Boston Bruins,2.0,Toronto Maple Leafs,5.0,0,2022,30.923598,-30.923598,0.544385,0.455615
24972,2022-04-29,Calgary Flames,1.0,Winnipeg Jets,3.0,0,2022,-47.081958,47.081958,0.432655,0.567345


In [23]:
season_2022

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season,EloDiffHome,EloDiffAway,WinProbHome,WinProbVis
23662,2021-10-12,Pittsburgh Penguins,2.0,Tampa Bay Lightning,4.0,0,2022,52.256744,-52.256744,0.574641,0.425359
23663,2021-10-12,Seattle Kraken,2.0,Vegas Golden Knights,4.0,0,2022,73.772344,-73.772344,0.604600,0.395400
23664,2021-10-13,Winnipeg Jets,1.0,Anaheim Ducks,3.0,0,2022,-27.242039,27.242039,0.460876,0.539124
23665,2021-10-13,Chicago Blackhawks,3.0,Colorado Avalanche,4.0,0,2022,-5.926525,5.926525,0.491472,0.508528
23666,2021-10-13,Vancouver Canucks,2.0,Edmonton Oilers,6.0,1,2022,-101.206743,101.206743,0.358336,0.641664
...,...,...,...,...,...,...,...,...,...,...,...
24969,2022-04-29,San Jose Sharks,0.0,Seattle Kraken,2.0,0,2022,-132.352059,132.352059,0.318238,0.681762
24970,2022-04-29,Vegas Golden Knights,6.0,St. Louis Blues,4.0,0,2022,-16.742328,16.742328,0.475924,0.524076
24971,2022-04-29,Boston Bruins,4.0,Toronto Maple Leafs,1.0,0,2022,37.485761,-37.485761,0.553738,0.446262
24972,2022-04-29,Calgary Flames,0.0,Winnipeg Jets,1.0,0,2022,50.267374,-50.267374,0.571840,0.428160
