In [58]:
import numpy as np
import pandas as pd
import seaborn as sns
import copy
import warnings
warnings.filterwarnings('ignore')

In [59]:
scores_df = pd.read_csv('C:/Users/Gabriel/Documents/MIE368/Data/season_data.csv')
playoff_scores_df = pd.read_csv('C:/Users/Gabriel/Documents/MIE368/Data/playoff_data.csv')

In [60]:
scores_df.head()

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season
0,2000-10-04,Colorado Avalanche,2.0,Dallas Stars,2.0,1,2001
1,2000-10-05,Ottawa Senators,4.0,Boston Bruins,4.0,1,2001
2,2000-10-05,Chicago Blackhawks,2.0,Buffalo Sabres,4.0,0,2001
3,2000-10-05,Detroit Red Wings,4.0,Calgary Flames,3.0,0,2001
4,2000-10-05,Vancouver Canucks,3.0,Philadelphia Flyers,6.0,0,2001


## Game Result Elo Adjustment

In [61]:
first_yr = scores_df[scores_df['Season'] == 2001]
Teams = first_yr['Home'].unique().tolist()

team_changes = {
    'Atlanta Thrashers': 'Winnipeg Jets',
    'Phoenix Coyotes': 'Arizona Coyotes',
    'Mighty Ducks of Anaheim': 'Anaheim Ducks'
    # Add more mappings as needed
}

K = 6

In [62]:
def calculate_prob_winning(home_team, away_team, elo_ratings):
    
    Elo_diff_home = elo_ratings[home_team]-elo_ratings[away_team] + 26  #add extra for home-ice advantage   
    #Elo_diff_away = elo_ratings[away_team] - elo_ratings[home_team]
    prob_win_home = 1/(10**(-1*Elo_diff_home/400)+1)
    #prob_win_away = 1/(10**(-1*Elo_diff_away/400)+1)
    prob_win_away = 1 - prob_win_home
    
    return prob_win_home, prob_win_away, Elo_diff_home

In [63]:
def margin_of_victory(home_goals, away_goals):
    
    mov = abs(home_goals - away_goals)
    mov_mult = 0.6686*np.log(mov)+0.8048
    
    return mov_mult

In [64]:
# Function to update ELO ratings after a game -- Could add input for elo_ratings dictionary 
def update_game_elo(home_team, away_team, home_goals, away_goals, season, df, idx, elo_ratings):
    
    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)
    
    #print('Home team is ' + str(home_team))
    #print('Away team is ' + str(away_team))

    if home_team not in elo_ratings:
        # New teams added past a certain year start with a different ELO rating
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)
    
    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
     # Update Pregame Favorite Multiplier
    if home_goals > away_goals:
        home_win = 1
        #away_win = 0
        winner_elo_diff = Elo_diff_home
    elif home_goals < away_goals:
        home_win = 0
        #away_win = 1
        winner_elo_diff = -1*Elo_diff_home
    else:
        return df, elo_ratings
    
    pre_g_fav_h = home_win - home_prob  
    #pre_g_fav_a = away_win - away_prob  
    
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
     # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(home_goals, away_goals)
    #print('home goals:' + str(home_goals))
    #print('away goals:' + str(away_goals))
    #print('mult is: ' + str(mov_multiplier))
    
     # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
    #elo_shift_a = K * mov_multiplier * pre_g_fav_a
    #print('elo shift is: ' + str(elo_shift_h))
    
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    return df, elo_ratings

## End of Season Elo Adjustments

In [65]:
def elo_adjuster(df):
    
    # Initialize variables to store the final Elo ratings for the season
    final_elo_ratings = {}
    elo_ratings = {}
    season_weight = 0.7
    average_weight = 0.3
    current_season = None

    # Iterate over seasons and games
    for idx, row in df.iterrows():
        if current_season is None:
            current_season = row['Season']
    
        # Check if the season in the current row is different from the current season
        if row['Season'] != current_season:
            # Save the previous season's Elo ratings and calculate the starting Elo for the new season
            final_elo_ratings[current_season] = dict(elo_ratings)
            elo_ratings = {}  # Reset Elo ratings for the new season

            for team in final_elo_ratings[current_season]:
                prev_season_elo = final_elo_ratings[current_season][team]
                league_avg_elo = sum(final_elo_ratings[current_season].values()) / len(final_elo_ratings[current_season])
                starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
                elo_ratings[team] = round(starting_elo, 2)

            current_season = row['Season']
            
        # Update Elo ratings for the game
        df, elo_ratings = update_game_elo(row['Home'], row['Visitor'], row['G_Home'], row['G_Vis'], row['Season'], df, idx, elo_ratings)

    # Calculate the final Elo ratings for the last season
    final_elo_ratings[current_season] = dict(elo_ratings)

    return df, elo_ratings, final_elo_ratings

In [66]:
seasons, elo, final_season_elos = elo_adjuster(scores_df)

In [67]:
elo

{'Dallas Stars': 1550.7819539073464,
 'Colorado Avalanche': 1579.9182021884367,
 'Boston Bruins': 1642.0502212110703,
 'Ottawa Senators': 1479.0506615440481,
 'Buffalo Sabres': 1486.7120784302224,
 'Chicago Blackhawks': 1393.8606927123315,
 'Calgary Flames': 1515.3348272021358,
 'Detroit Red Wings': 1443.4317712658462,
 'Philadelphia Flyers': 1421.6856205830084,
 'Vancouver Canucks': 1485.5682678752944,
 'Arizona Coyotes': 1405.4197600448936,
 'St. Louis Blues': 1483.661524120255,
 'Edmonton Oilers': 1580.5090602196963,
 'Florida Panthers': 1538.0042767487648,
 'Anaheim Ducks': 1370.185824502506,
 'Minnesota Wild': 1543.83706875618,
 'New Jersey Devils': 1533.2411289939719,
 'Montreal Canadiens': 1410.3105469348452,
 'Pittsburgh Penguins': 1504.8509804888336,
 'Nashville Predators': 1505.5125954824891,
 'San Jose Sharks': 1386.7944419061305,
 'Tampa Bay Lightning': 1539.0902552500847,
 'New York Islanders': 1513.2474188071392,
 'Washington Capitals': 1475.2423478319913,
 'Los Angeles K

In [84]:
def forecast_game_elo(home_team, away_team, season, df, idx, elo_ratings, simulation_wins):

    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)

    if home_team not in elo_ratings:
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)

    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
    home_win = np.random.choice([0, 1], p=[away_prob, home_prob])

    # Calculate base scores for the home and away team
    score_home = 2.845905 + 0.003584*Elo_diff_home
    score_away = 2.845905 + 0.003584*(-1)*Elo_diff_home

    conditions_satisfied = 0 

    while conditions_satisfied == 0:
        
        # poisson distribution to shift scores
        poisson_score_home = np.random.poisson(max(0, score_home))
        poisson_score_away = np.random.poisson(max(0, score_away))

        # Ensure non-negative scores by replacing any negative values with zero
        # poisson_score_home = np.maximum(0, poisson_score_home)
        # poisson_score_away = np.maximum(0, poisson_score_away)

        home_goal_diff = poisson_score_home - poisson_score_away
            
        # does it satsify conditions?
        if home_win == 1:
            if home_goal_diff > 0:
                conditions_satisfied = 1
        
        if home_win == 0:
            if home_goal_diff < 0: 
                conditions_satisfied = 1

    #------------------------------------------------------------------------#
    OT_choice = 0
    
    if abs(home_goal_diff) == 1:
        # Decide if the game goes to OT
        OT_choice = np.random.choice(a= [1, 0], p= [0.23, 0.77])
        df.at[idx, 'OT Ind'] = OT_choice
            
    if home_win == 1:
        winner_elo_diff = Elo_diff_home
        simulation_wins[home_team] += 2

        if OT_choice == 1:
            simulation_wins[away_team] += 1
        
    if home_win == 0:
        winner_elo_diff = -1*Elo_diff_home
        simulation_wins[away_team] += 2

        if OT_choice == 1:
            simulation_wins[home_team] += 1

    pre_g_fav_h = home_win - home_prob  
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
    # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(poisson_score_home, poisson_score_away)

    # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
  
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    df.at[idx, 'G_Home'] = poisson_score_home
    df.at[idx, 'G_Vis'] = poisson_score_away
    #--------------------------------------------------------------------------#

    return df, elo_ratings, simulation_wins

In [85]:
# Iterate over seasons and games
def season_forecaster(df, starting_elo):
    
    simulation_pts = {team: 0 for team in df['Home'].unique()}

    for idx, row in df.iterrows():
        df, starting_elo, simulation_pts = forecast_game_elo(row['Home'], row['Visitor'], row['Season'], df, idx, starting_elo, simulation_pts)  

    return df, starting_elo, simulation_pts

In [71]:
def old_forecast_game_elo2(home_team, away_team, season, df, idx, elo_ratings, simulation_wins):

    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)

    if home_team not in elo_ratings:
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)

    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
    home_win = np.random.choice([0, 1], p=[away_prob, home_prob])

    # Calculate the probability of going into overtime
    OT_prob = 1/(1+np.exp(-1*(0.092596 - 0.001466*abs(Elo_diff_home))))

    # Calculate base scores for the home and away team
    score_home = 2.845905 + 0.003584*Elo_diff_home
    score_away = 2.845905 + 0.003584*(-1)*Elo_diff_home

    conditions_satisfied = 0 

    while conditions_satisfied == 0:
        
        # poisson distribution to shift scores
        poisson_score_home = np.random.poisson(max(0, score_home))
        poisson_score_away = np.random.poisson(max(0, score_home))

        # Ensure non-negative scores by replacing any negative values with zero
        poisson_score_home = np.maximum(0, poisson_score_home)
        poisson_score_away = np.maximum(0, poisson_score_away)

        home_goal_diff = poisson_score_home - poisson_score_away
        

        if home_win == 1:
            if home_goal_diff > 0:
                conditions_satisfied = 1
        if home_win == 0:
            if home_goal_diff < 0: 
                conditions_satisfied = 1

    #------------------------------------------------------------------------#
    if home_win == 1:
        winner_elo_diff = Elo_diff_home
        simulation_wins[home_team] += 1
        
    if home_win == 0:
        winner_elo_diff = -1*Elo_diff_home
        simulation_wins[away_team] += 1

    pre_g_fav_h = home_win - home_prob  
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
    # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(poisson_score_home, poisson_score_away)

    # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
  
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    df.at[idx, 'G_Home'] = poisson_score_home
    df.at[idx, 'G_Vis'] = poisson_score_away
    #--------------------------------------------------------------------------#

    return df, elo_ratings, simulation_wins

In [72]:
# Iterate over seasons and games
def season_forecaster2(df, starting_elo):
    
    simulation_wins = {team: 0 for team in df['Home'].unique()}

    for idx, row in df.iterrows():
        df, starting_elo, simulation_wins = old_forecast_game_elo2(row['Home'], row['Visitor'], row['Season'], df, idx, starting_elo, simulation_wins)  

    return df, starting_elo, simulation_wins

In [86]:
df2 = copy.deepcopy(scores_df[scores_df['Season'] == 2022])
season_2022 = df2.copy()
num_simulations = 100
season_weight = 0.7
average_weight = 0.3
standings = {team: np.zeros(num_simulations) for team in df2['Home'].unique()}

for simulation in range(num_simulations):
    elo = copy.deepcopy(final_season_elos[2021])  # Creating a deep copy of final_season_elos[2021]
    elo_end = elo.copy()  # Initial copy for modifications

    elo_ratings = {}  # Reset Elo ratings for the new season

    for team in elo_end:
        prev_season_elo = elo_end[team]
        league_avg_elo = sum(elo_end.values()) / len(elo_end)
        starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
        elo_ratings[team] = round(starting_elo, 2)

    season_2022, elo_ratings, simulation_pts = season_forecaster(season_2022, elo_ratings)

    for team, pts in simulation_pts.items():
        standings[team][simulation] = pts


for team, pts in standings.items():
    mean_pts = np.mean(pts)
    std_deviation = np.std(pts)
    print(f"{team}: Mean point total = {mean_pts}, Standard Deviation = {std_deviation}")

Tampa Bay Lightning: Mean point total = 95.49, Standard Deviation = 13.608449581050735
Vegas Golden Knights: Mean point total = 98.78, Standard Deviation = 11.426793075924671
Anaheim Ducks: Mean point total = 71.91, Standard Deviation = 12.153267050468363
Colorado Avalanche: Mean point total = 98.02, Standard Deviation = 12.020798642353178
Edmonton Oilers: Mean point total = 89.98, Standard Deviation = 12.60791814694242
Toronto Maple Leafs: Mean point total = 91.83, Standard Deviation = 11.73631543543373
Washington Capitals: Mean point total = 93.52, Standard Deviation = 11.5442453196387
Buffalo Sabres: Mean point total = 74.09, Standard Deviation = 9.807237123675556
Carolina Hurricanes: Mean point total = 95.66, Standard Deviation = 12.66824376146907
Columbus Blue Jackets: Mean point total = 75.89, Standard Deviation = 13.569005121968226
Detroit Red Wings: Mean point total = 73.84, Standard Deviation = 13.214930949497997
Florida Panthers: Mean point total = 95.48, Standard Deviation =

In [87]:
len(season_2022[season_2022['OT Ind'] == 1])/len(season_2022)

0.2339939024390244

In [90]:
mean_pts = pd.DataFrame(columns = ['PTS']).rename_axis('Team')

for team, pts in standings.items():
    mean_pts.loc[team] = [np.mean(pts)]



In [91]:
mean_pts.sort_values(by='PTS', ascending=False)

Unnamed: 0_level_0,PTS
Team,Unnamed: 1_level_1
Vegas Golden Knights,98.78
Colorado Avalanche,98.02
Pittsburgh Penguins,96.43
Carolina Hurricanes,95.66
Tampa Bay Lightning,95.49
Florida Panthers,95.48
Boston Bruins,95.27
Washington Capitals,93.52
Toronto Maple Leafs,91.83
Nashville Predators,90.32


In [78]:
print("Elo ratings at the end of the simulation")

elo_df = pd.DataFrame(columns = ['Elo']).rename_axis('Team')

for team, elo in elo_ratings.items():
    elo_df.loc[team] = [elo]

elo_df.sort_values(by='Elo', ascending=False)

Elo ratings at the end of the simulation


Unnamed: 0_level_0,Elo
Team,Unnamed: 1_level_1
Vegas Golden Knights,1592.419193
Pittsburgh Penguins,1580.037156
Boston Bruins,1576.316141
Washington Capitals,1566.485759
Minnesota Wild,1539.620756
New York Islanders,1536.725924
St. Louis Blues,1535.776992
Edmonton Oilers,1533.446562
Philadelphia Flyers,1528.023857
Winnipeg Jets,1522.675666


In [79]:
print("Elo ratings at the end of the real prior season")

elo_df = pd.DataFrame(columns = ['Elo']).rename_axis('Team')

for team, elo in final_season_elos[2021].items():
    elo_df.loc[team] = [elo]

elo_df.sort_values(by='Elo', ascending=False)

Elo ratings at the end of the real prior season


Unnamed: 0_level_0,Elo
Team,Unnamed: 1_level_1
Vegas Golden Knights,1580.366387
Colorado Avalanche,1574.882544
Boston Bruins,1563.605372
Pittsburgh Penguins,1561.056207
Tampa Bay Lightning,1558.41656
Carolina Hurricanes,1557.582815
Washington Capitals,1553.844148
Florida Panthers,1547.747386
Toronto Maple Leafs,1543.472271
Minnesota Wild,1532.772853


In [56]:
df2 = scores_df[scores_df['Season'] == 2022]
df2

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season,EloDiffHome,EloDiffAway,WinProbHome,WinProbVis
23662,2021-10-12,Pittsburgh Penguins,6.0,Tampa Bay Lightning,2.0,0,2022,24.150000,-24.150000,0.534699,0.465301
23663,2021-10-12,Seattle Kraken,3.0,Vegas Golden Knights,4.0,0,2022,82.260000,-82.260000,0.616218,0.383782
23664,2021-10-13,Winnipeg Jets,1.0,Anaheim Ducks,4.0,0,2022,-36.470000,36.470000,0.447707,0.552293
23665,2021-10-13,Chicago Blackhawks,2.0,Colorado Avalanche,4.0,0,2022,100.820000,-100.820000,0.641152,0.358848
23666,2021-10-13,Vancouver Canucks,2.0,Edmonton Oilers,3.0,1,2022,72.510000,-72.510000,0.602861,0.397139
...,...,...,...,...,...,...,...,...,...,...,...
24969,2022-04-29,San Jose Sharks,0.0,Seattle Kraken,3.0,0,2022,28.181444,-28.181444,0.540468,0.459532
24970,2022-04-29,Vegas Golden Knights,7.0,St. Louis Blues,4.0,0,2022,67.212498,-67.212498,0.595538,0.404462
24971,2022-04-29,Boston Bruins,2.0,Toronto Maple Leafs,5.0,0,2022,30.475178,-30.475178,0.543745,0.456255
24972,2022-04-29,Calgary Flames,1.0,Winnipeg Jets,3.0,0,2022,-46.357503,46.357503,0.433679,0.566321


In [57]:
season_2022

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season,EloDiffHome,EloDiffAway,WinProbHome,WinProbVis
23662,2021-10-12,Pittsburgh Penguins,2.0,Tampa Bay Lightning,1.0,0,2022,24.150000,-24.150000,0.534699,0.465301
23663,2021-10-12,Seattle Kraken,4.0,Vegas Golden Knights,3.0,0,2022,82.260000,-82.260000,0.616218,0.383782
23664,2021-10-13,Winnipeg Jets,6.0,Anaheim Ducks,1.0,0,2022,-36.470000,36.470000,0.447707,0.552293
23665,2021-10-13,Chicago Blackhawks,4.0,Colorado Avalanche,5.0,0,2022,100.820000,-100.820000,0.641152,0.358848
23666,2021-10-13,Vancouver Canucks,2.0,Edmonton Oilers,6.0,1,2022,72.510000,-72.510000,0.602861,0.397139
...,...,...,...,...,...,...,...,...,...,...,...
24969,2022-04-29,San Jose Sharks,0.0,Seattle Kraken,4.0,0,2022,183.623802,-183.623802,0.742121,0.257879
24970,2022-04-29,Vegas Golden Knights,3.0,St. Louis Blues,1.0,0,2022,27.033554,-27.033554,0.538826,0.461174
24971,2022-04-29,Boston Bruins,0.0,Toronto Maple Leafs,3.0,0,2022,37.820841,-37.820841,0.554215,0.445785
24972,2022-04-29,Calgary Flames,2.0,Winnipeg Jets,1.0,0,2022,38.501191,-38.501191,0.555182,0.444818
