In [19]:
import numpy as np
import pandas as pd
import seaborn as sns
import copy
import warnings
warnings.filterwarnings('ignore')

In [20]:
scores_df = pd.read_csv('C:/Users/Gabriel/Documents/MIE368/Data/season_data.csv')
playoff_scores_df = pd.read_csv('C:/Users/Gabriel/Documents/MIE368/Data/playoff_data.csv')

In [21]:
scores_df.head()

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season
0,2000-10-04,Colorado Avalanche,2.0,Dallas Stars,2.0,1,2001
1,2000-10-05,Ottawa Senators,4.0,Boston Bruins,4.0,1,2001
2,2000-10-05,Chicago Blackhawks,2.0,Buffalo Sabres,4.0,0,2001
3,2000-10-05,Detroit Red Wings,4.0,Calgary Flames,3.0,0,2001
4,2000-10-05,Vancouver Canucks,3.0,Philadelphia Flyers,6.0,0,2001


## Game Result Elo Adjustment

In [22]:
first_yr = scores_df[scores_df['Season'] == 2001]
Teams = first_yr['Home'].unique().tolist()

team_changes = {
    'Atlanta Thrashers': 'Winnipeg Jets',
    'Phoenix Coyotes': 'Arizona Coyotes',
    'Mighty Ducks of Anaheim': 'Anaheim Ducks'
    # Add more mappings as needed
}

K = 6

In [23]:
def calculate_prob_winning(home_team, away_team, elo_ratings):
    
    Elo_diff_home = elo_ratings[home_team]-elo_ratings[away_team] + 26  #add extra for home-ice advantage   
    #Elo_diff_away = elo_ratings[away_team] - elo_ratings[home_team]
    prob_win_home = 1/(10**(-1*Elo_diff_home/400)+1)
    #prob_win_away = 1/(10**(-1*Elo_diff_away/400)+1)
    prob_win_away = 1 - prob_win_home
    
    return prob_win_home, prob_win_away, Elo_diff_home

In [24]:
def margin_of_victory(home_goals, away_goals):
    
    mov = abs(home_goals - away_goals)
    mov_mult = 0.6686*np.log(mov)+0.8048
    
    return mov_mult

In [25]:
# Function to update ELO ratings after a game -- Could add input for elo_ratings dictionary 
def update_game_elo(home_team, away_team, home_goals, away_goals, season, df, idx, elo_ratings):
    
    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)
    
    #print('Home team is ' + str(home_team))
    #print('Away team is ' + str(away_team))

    if home_team not in elo_ratings:
        # New teams added past a certain year start with a different ELO rating
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)
    
    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
     # Update Pregame Favorite Multiplier
    if home_goals > away_goals:
        home_win = 1
        #away_win = 0
        winner_elo_diff = Elo_diff_home
    elif home_goals < away_goals:
        home_win = 0
        #away_win = 1
        winner_elo_diff = -1*Elo_diff_home
    else:
        return df, elo_ratings
    
    pre_g_fav_h = home_win - home_prob  
    #pre_g_fav_a = away_win - away_prob  
    
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
     # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(home_goals, away_goals)
    #print('home goals:' + str(home_goals))
    #print('away goals:' + str(away_goals))
    #print('mult is: ' + str(mov_multiplier))
    
     # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
    #elo_shift_a = K * mov_multiplier * pre_g_fav_a
    #print('elo shift is: ' + str(elo_shift_h))
    
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    return df, elo_ratings

## End of Season Elo Adjustments

In [26]:
def elo_adjuster(df):
    
    # Initialize variables to store the final Elo ratings for the season
    final_elo_ratings = {}
    elo_ratings = {}
    season_weight = 0.7
    average_weight = 0.3
    current_season = None

    # Iterate over seasons and games
    for idx, row in df.iterrows():
        if current_season is None:
            current_season = row['Season']
    
        # Check if the season in the current row is different from the current season
        if row['Season'] != current_season:
            # Save the previous season's Elo ratings and calculate the starting Elo for the new season
            final_elo_ratings[current_season] = dict(elo_ratings)
            elo_ratings = {}  # Reset Elo ratings for the new season

            for team in final_elo_ratings[current_season]:
                prev_season_elo = final_elo_ratings[current_season][team]
                league_avg_elo = sum(final_elo_ratings[current_season].values()) / len(final_elo_ratings[current_season])
                starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
                elo_ratings[team] = round(starting_elo, 2)

            current_season = row['Season']
            
        # Update Elo ratings for the game
        df, elo_ratings = update_game_elo(row['Home'], row['Visitor'], row['G_Home'], row['G_Vis'], row['Season'], df, idx, elo_ratings)

    # Calculate the final Elo ratings for the last season
    final_elo_ratings[current_season] = dict(elo_ratings)

    return df, elo_ratings, final_elo_ratings

In [27]:
seasons, elo, final_season_elos = elo_adjuster(scores_df)

In [28]:
elo

{'Dallas Stars': 1550.7819539073464,
 'Colorado Avalanche': 1579.9182021884367,
 'Boston Bruins': 1642.0502212110703,
 'Ottawa Senators': 1479.0506615440481,
 'Buffalo Sabres': 1486.7120784302224,
 'Chicago Blackhawks': 1393.8606927123315,
 'Calgary Flames': 1515.3348272021358,
 'Detroit Red Wings': 1443.4317712658462,
 'Philadelphia Flyers': 1421.6856205830084,
 'Vancouver Canucks': 1485.5682678752944,
 'Arizona Coyotes': 1405.4197600448936,
 'St. Louis Blues': 1483.661524120255,
 'Edmonton Oilers': 1580.5090602196963,
 'Florida Panthers': 1538.0042767487648,
 'Anaheim Ducks': 1370.185824502506,
 'Minnesota Wild': 1543.83706875618,
 'New Jersey Devils': 1533.2411289939719,
 'Montreal Canadiens': 1410.3105469348452,
 'Pittsburgh Penguins': 1504.8509804888336,
 'Nashville Predators': 1505.5125954824891,
 'San Jose Sharks': 1386.7944419061305,
 'Tampa Bay Lightning': 1539.0902552500847,
 'New York Islanders': 1513.2474188071392,
 'Washington Capitals': 1475.2423478319913,
 'Los Angeles K

In [29]:
final_season_elos[2021]

{'Dallas Stars': 1494.3916267969453,
 'Colorado Avalanche': 1574.882543882543,
 'Boston Bruins': 1563.6053717814896,
 'Ottawa Senators': 1450.2829581081469,
 'Buffalo Sabres': 1419.5302348944945,
 'Chicago Blackhawks': 1468.0057002567307,
 'Calgary Flames': 1494.325041731969,
 'Detroit Red Wings': 1414.451306062558,
 'Philadelphia Flyers': 1482.3693576844323,
 'Vancouver Canucks': 1462.7898118949336,
 'Arizona Coyotes': 1473.0236465565463,
 'St. Louis Blues': 1519.3421757483302,
 'Edmonton Oilers': 1529.2295411379816,
 'Florida Panthers': 1547.747386131692,
 'Anaheim Ducks': 1422.9188837846677,
 'Minnesota Wild': 1532.7728525740092,
 'New Jersey Devils': 1430.727341093728,
 'Montreal Canadiens': 1468.3226682635918,
 'Pittsburgh Penguins': 1561.0562073703318,
 'Nashville Predators': 1519.0684935477377,
 'San Jose Sharks': 1436.6454570065216,
 'Tampa Bay Lightning': 1558.416559749915,
 'New York Islanders': 1527.842400827338,
 'Washington Capitals': 1553.8441475510676,
 'Los Angeles King

In [30]:
def forecast_game_elo(home_team, away_team, season, df, idx, elo_ratings, simulation_wins):

    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)

    if home_team not in elo_ratings:
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)

    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
    home_win = np.random.choice([0, 1], p=[away_prob, home_prob])

    # Calculate base scores for the home and away team
    score_home = 2.845905 + 0.003584*Elo_diff_home
    score_away = 2.845905 + 0.003584*(-1)*Elo_diff_home

    conditions_satisfied = 0 

    while conditions_satisfied == 0:
        
        # poisson distribution to shift scores
        poisson_score_home = np.random.poisson(max(0, score_home))
        poisson_score_away = np.random.poisson(max(0, score_away))

        # Ensure non-negative scores by replacing any negative values with zero
        # poisson_score_home = np.maximum(0, poisson_score_home)
        # poisson_score_away = np.maximum(0, poisson_score_away)

        home_goal_diff = poisson_score_home - poisson_score_away
            
        # does it satsify conditions?
        if home_win == 1:
            if home_goal_diff > 0:
                conditions_satisfied = 1
        
        if home_win == 0:
            if home_goal_diff < 0: 
                conditions_satisfied = 1

    #------------------------------------------------------------------------#
    if abs(home_goal_diff) == 1:
        # Decide if the game goes to OT
        OT_choice = np.random.choice(a= [1, 0], p= [0.23, 0.77])
        df.at[idx, 'OT Ind'] = OT_choice
            
    if home_win == 1:
        winner_elo_diff = Elo_diff_home
        simulation_wins[home_team] += 1
        
    if home_win == 0:
        winner_elo_diff = -1*Elo_diff_home
        simulation_wins[away_team] += 1

    pre_g_fav_h = home_win - home_prob  
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
    # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(poisson_score_home, poisson_score_away)

    # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
  
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    df.at[idx, 'G_Home'] = poisson_score_home
    df.at[idx, 'G_Vis'] = poisson_score_away
    #--------------------------------------------------------------------------#

    return df, elo_ratings, simulation_wins

In [31]:
# Iterate over seasons and games
def season_forecaster(df, starting_elo):
    
    simulation_wins = {team: 0 for team in df['Home'].unique()}

    for idx, row in df.iterrows():
        df, starting_elo, simulation_wins = forecast_game_elo(row['Home'], row['Visitor'], row['Season'], df, idx, starting_elo, simulation_wins)  

    return df, starting_elo, simulation_wins

In [32]:
def old_forecast_game_elo2(home_team, away_team, season, df, idx, elo_ratings, simulation_wins):

    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)

    if home_team not in elo_ratings:
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)

    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
    home_win = np.random.choice([0, 1], p=[away_prob, home_prob])

    # Calculate the probability of going into overtime
    OT_prob = 1/(1+np.exp(-1*(0.092596 - 0.001466*abs(Elo_diff_home))))

    # Calculate base scores for the home and away team
    score_home = 2.845905 + 0.003584*Elo_diff_home
    score_away = 2.845905 + 0.003584*(-1)*Elo_diff_home

    conditions_satisfied = 0 

    while conditions_satisfied == 0:
        
        # poisson distribution to shift scores
        poisson_score_home = np.random.poisson(max(0, score_home))
        poisson_score_away = np.random.poisson(max(0, score_home))

        # Ensure non-negative scores by replacing any negative values with zero
        poisson_score_home = np.maximum(0, poisson_score_home)
        poisson_score_away = np.maximum(0, poisson_score_away)

        home_goal_diff = poisson_score_home - poisson_score_away
        

        if home_win == 1:
            if home_goal_diff > 0:
                conditions_satisfied = 1
        if home_win == 0:
            if home_goal_diff < 0: 
                conditions_satisfied = 1

    #------------------------------------------------------------------------#
    if home_win == 1:
        winner_elo_diff = Elo_diff_home
        simulation_wins[home_team] += 1
        
    if home_win == 0:
        winner_elo_diff = -1*Elo_diff_home
        simulation_wins[away_team] += 1

    pre_g_fav_h = home_win - home_prob  
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
    # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(poisson_score_home, poisson_score_away)

    # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
  
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    df.at[idx, 'G_Home'] = poisson_score_home
    df.at[idx, 'G_Vis'] = poisson_score_away
    #--------------------------------------------------------------------------#

    return df, elo_ratings, simulation_wins

In [33]:
# Iterate over seasons and games
def season_forecaster2(df, starting_elo):
    
    simulation_wins = {team: 0 for team in df['Home'].unique()}

    for idx, row in df.iterrows():
        df, starting_elo, simulation_wins = old_forecast_game_elo2(row['Home'], row['Visitor'], row['Season'], df, idx, starting_elo, simulation_wins)  

    return df, starting_elo, simulation_wins

In [43]:
df2 = copy.deepcopy(scores_df[scores_df['Season'] == 2022])
season_2022 = df2.copy()
num_simulations = 100
season_weight = 0.7
average_weight = 0.3
team_wins = {team: np.zeros(num_simulations) for team in df2['Home'].unique()}

for simulation in range(num_simulations):
    elo = copy.deepcopy(final_season_elos[2021])  # Creating a deep copy of final_season_elos[2021]
    elo_end = elo.copy()  # Initial copy for modifications

    elo_ratings = {}  # Reset Elo ratings for the new season

    for team in elo_end:
        prev_season_elo = elo_end[team]
        league_avg_elo = sum(elo_end.values()) / len(elo_end)
        starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
        elo_ratings[team] = round(starting_elo, 2)

    season_2022, elo_ratings, simulation_wins = season_forecaster2(season_2022, elo_ratings)

    for team, wins in simulation_wins.items():
        team_wins[team][simulation] = wins


for team, wins in team_wins.items():
    mean_wins = np.mean(wins)
    std_deviation = np.std(wins)
    print(f"{team}: Mean Wins = {mean_wins}, Standard Deviation = {std_deviation}")

Tampa Bay Lightning: Mean Wins = 46.48, Standard Deviation = 6.937549999819822
Vegas Golden Knights: Mean Wins = 47.96, Standard Deviation = 6.406122071893416
Anaheim Ducks: Mean Wins = 35.61, Standard Deviation = 6.670674628551449
Colorado Avalanche: Mean Wins = 46.35, Standard Deviation = 6.2118837722545965
Edmonton Oilers: Mean Wins = 42.53, Standard Deviation = 6.66551573398488
Toronto Maple Leafs: Mean Wins = 44.47, Standard Deviation = 6.777101150196889
Washington Capitals: Mean Wins = 44.3, Standard Deviation = 6.380438856379708
Buffalo Sabres: Mean Wins = 35.22, Standard Deviation = 6.13446004143804
Carolina Hurricanes: Mean Wins = 44.57, Standard Deviation = 5.906360977793348
Columbus Blue Jackets: Mean Wins = 36.15, Standard Deviation = 5.795472370739076
Detroit Red Wings: Mean Wins = 34.48, Standard Deviation = 5.554241622399947
Florida Panthers: Mean Wins = 45.53, Standard Deviation = 6.2840353277173735
Los Angeles Kings: Mean Wins = 37.51, Standard Deviation = 6.9720800339

In [44]:
len(season_2022[season_2022['OT Ind'] == 1])/len(season_2022)

0.21951219512195122

In [45]:
mean_wins = pd.DataFrame(columns = ['Wins']).rename_axis('Team')

for team, wins in team_wins.items():
    mean_wins.loc[team] = [np.mean(wins)]



In [46]:
mean_wins.sort_values(by='Wins', ascending=False)

Unnamed: 0_level_0,Wins
Team,Unnamed: 1_level_1
Vegas Golden Knights,47.96
Tampa Bay Lightning,46.48
Colorado Avalanche,46.35
Pittsburgh Penguins,46.19
Boston Bruins,45.92
Florida Panthers,45.53
Carolina Hurricanes,44.57
Toronto Maple Leafs,44.47
Washington Capitals,44.3
Nashville Predators,43.86


In [48]:
elo_ratings

{'Dallas Stars': 1465.1441844360847,
 'Colorado Avalanche': 1509.850370737865,
 'Boston Bruins': 1517.4844941351328,
 'Ottawa Senators': 1465.1590561994979,
 'Buffalo Sabres': 1415.0497506441236,
 'Chicago Blackhawks': 1465.2877487775534,
 'Calgary Flames': 1481.2698473863206,
 'Detroit Red Wings': 1396.892852473804,
 'Philadelphia Flyers': 1473.0013175854442,
 'Vancouver Canucks': 1486.5771015637304,
 'Arizona Coyotes': 1549.8525255297877,
 'St. Louis Blues': 1528.3641726109088,
 'Edmonton Oilers': 1533.2872515047923,
 'Florida Panthers': 1548.8307888750098,
 'Anaheim Ducks': 1543.6964894730563,
 'Minnesota Wild': 1518.7713077331182,
 'New Jersey Devils': 1459.4737895769772,
 'Montreal Canadiens': 1539.4809385760236,
 'Pittsburgh Penguins': 1516.9996207181218,
 'Nashville Predators': 1485.7891063921222,
 'San Jose Sharks': 1417.1274456831468,
 'Tampa Bay Lightning': 1523.5155058930306,
 'New York Islanders': 1518.973367493013,
 'Washington Capitals': 1508.3553977879155,
 'Los Angeles 

In [47]:
elo_df = pd.DataFrame(columns = ['Elo']).rename_axis('Team')

for team, elo in elo_ratings.items():
    elo_df.loc[team] = [elo_ratings]

elo_df.sort_values(by='Elo', ascending=False)

TypeError: '<' not supported between instances of 'dict' and 'dict'

In [49]:
elo_df = pd.DataFrame(columns = ['Elo']).rename_axis('Team')

for team, elo in final_season_elos[2021].items():
    elo_df.loc[team] = [elo]

elo_df.sort_values(by='Elo', ascending=False)

Unnamed: 0_level_0,Elo
Team,Unnamed: 1_level_1
Vegas Golden Knights,1580.366387
Colorado Avalanche,1574.882544
Boston Bruins,1563.605372
Pittsburgh Penguins,1561.056207
Tampa Bay Lightning,1558.41656
Carolina Hurricanes,1557.582815
Washington Capitals,1553.844148
Florida Panthers,1547.747386
Toronto Maple Leafs,1543.472271
Minnesota Wild,1532.772853


In [18]:
np.mean(elo_df['Elo'])

1383.5470967741933

In [22]:
df2 = scores_df[scores_df['Season'] == 2022]
df2

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season,EloDiffHome,EloDiffAway,WinProbHome,WinProbVis
23662,2021-10-12,Pittsburgh Penguins,6.0,Tampa Bay Lightning,2.0,0,2022,24.160000,-24.160000,0.534713,0.465287
23663,2021-10-12,Seattle Kraken,3.0,Vegas Golden Knights,4.0,0,2022,-23.390000,23.390000,0.466390,0.533610
23664,2021-10-13,Winnipeg Jets,1.0,Anaheim Ducks,4.0,0,2022,-36.320000,36.320000,0.447921,0.552079
23665,2021-10-13,Chicago Blackhawks,2.0,Colorado Avalanche,4.0,0,2022,100.960000,-100.960000,0.641337,0.358663
23666,2021-10-13,Vancouver Canucks,2.0,Edmonton Oilers,3.0,1,2022,72.530000,-72.530000,0.602889,0.397111
...,...,...,...,...,...,...,...,...,...,...,...
24969,2022-04-29,San Jose Sharks,0.0,Seattle Kraken,3.0,0,2022,66.672113,-66.672113,0.594788,0.405212
24970,2022-04-29,Vegas Golden Knights,7.0,St. Louis Blues,4.0,0,2022,66.397732,-66.397732,0.594407,0.405593
24971,2022-04-29,Boston Bruins,2.0,Toronto Maple Leafs,5.0,0,2022,30.923598,-30.923598,0.544385,0.455615
24972,2022-04-29,Calgary Flames,1.0,Winnipeg Jets,3.0,0,2022,-47.081958,47.081958,0.432655,0.567345


In [23]:
season_2022

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season,EloDiffHome,EloDiffAway,WinProbHome,WinProbVis
23662,2021-10-12,Pittsburgh Penguins,2.0,Tampa Bay Lightning,4.0,0,2022,52.256744,-52.256744,0.574641,0.425359
23663,2021-10-12,Seattle Kraken,2.0,Vegas Golden Knights,4.0,0,2022,73.772344,-73.772344,0.604600,0.395400
23664,2021-10-13,Winnipeg Jets,1.0,Anaheim Ducks,3.0,0,2022,-27.242039,27.242039,0.460876,0.539124
23665,2021-10-13,Chicago Blackhawks,3.0,Colorado Avalanche,4.0,0,2022,-5.926525,5.926525,0.491472,0.508528
23666,2021-10-13,Vancouver Canucks,2.0,Edmonton Oilers,6.0,1,2022,-101.206743,101.206743,0.358336,0.641664
...,...,...,...,...,...,...,...,...,...,...,...
24969,2022-04-29,San Jose Sharks,0.0,Seattle Kraken,2.0,0,2022,-132.352059,132.352059,0.318238,0.681762
24970,2022-04-29,Vegas Golden Knights,6.0,St. Louis Blues,4.0,0,2022,-16.742328,16.742328,0.475924,0.524076
24971,2022-04-29,Boston Bruins,4.0,Toronto Maple Leafs,1.0,0,2022,37.485761,-37.485761,0.553738,0.446262
24972,2022-04-29,Calgary Flames,0.0,Winnipeg Jets,1.0,0,2022,50.267374,-50.267374,0.571840,0.428160
