In [45]:
import numpy as np
import pandas as pd
import seaborn as sns
import copy
import warnings
warnings.filterwarnings('ignore')

In [46]:
scores_df = pd.read_csv('C:/Users/Gabriel/Documents/GitHub/MIE368_PROJECT/team_proj/data/season_data.csv')
playoff_scores_df = pd.read_csv('C:/Users/Gabriel/Documents/GitHub/MIE368_PROJECT/team_proj/data/playoff_data.csv')
real_elo = pd.read_csv('C:/Users/Gabriel/Documents/GitHub/MIE368_PROJECT/team_proj/data/nhl_elo.csv')

In [47]:
scores_df.head()

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season
0,2000-10-04,Colorado Avalanche,2.0,Dallas Stars,2.0,1,2001
1,2000-10-05,Ottawa Senators,4.0,Boston Bruins,4.0,1,2001
2,2000-10-05,Chicago Blackhawks,2.0,Buffalo Sabres,4.0,0,2001
3,2000-10-05,Detroit Red Wings,4.0,Calgary Flames,3.0,0,2001
4,2000-10-05,Vancouver Canucks,3.0,Philadelphia Flyers,6.0,0,2001


## Game Result Elo Adjustment

In [27]:
first_yr = scores_df[scores_df['Season'] == 2001]
Teams = first_yr['Home'].unique().tolist()

team_changes = {
    'Atlanta Thrashers': 'Winnipeg Jets',
    'Phoenix Coyotes': 'Arizona Coyotes',
    'Mighty Ducks of Anaheim': 'Anaheim Ducks'
    # Add more mappings as needed
}

K = 6

In [28]:
def calculate_prob_winning(home_team, away_team, elo_ratings):
    
    Elo_diff_home = elo_ratings[home_team]-elo_ratings[away_team] + 26  #add extra for home-ice advantage   
    #Elo_diff_away = elo_ratings[away_team] - elo_ratings[home_team]
    prob_win_home = 1/(10**(-1*Elo_diff_home/400)+1)
    #prob_win_away = 1/(10**(-1*Elo_diff_away/400)+1)
    prob_win_away = 1 - prob_win_home
    
    return prob_win_home, prob_win_away, Elo_diff_home

In [29]:
def margin_of_victory(home_goals, away_goals):
    
    mov = abs(home_goals - away_goals)
    mov_mult = 0.6686*np.log(mov)+0.8048
    
    return mov_mult

In [30]:
# Function to update ELO ratings after a game -- Could add input for elo_ratings dictionary 
def update_game_elo(home_team, away_team, home_goals, away_goals, season, df, idx, elo_ratings):
    
    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)
    
    #print('Home team is ' + str(home_team))
    #print('Away team is ' + str(away_team))

    if home_team not in elo_ratings:
        # New teams added past a certain year start with a different ELO rating
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)
    
    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
     # Update Pregame Favorite Multiplier
    if home_goals > away_goals:
        home_win = 1
        #away_win = 0
        winner_elo_diff = Elo_diff_home
    elif home_goals < away_goals:
        home_win = 0
        #away_win = 1
        winner_elo_diff = -1*Elo_diff_home
    else:
        return df, elo_ratings
    
    pre_g_fav_h = home_win - home_prob  
    #pre_g_fav_a = away_win - away_prob  
    
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
     # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(home_goals, away_goals)
    #print('home goals:' + str(home_goals))
    #print('away goals:' + str(away_goals))
    #print('mult is: ' + str(mov_multiplier))
    
     # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
    #elo_shift_a = K * mov_multiplier * pre_g_fav_a
    #print('elo shift is: ' + str(elo_shift_h))
    
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    return df, elo_ratings

## End of Season Elo Adjustments

In [31]:
def elo_adjuster(df):
    
    # Initialize variables to store the final Elo ratings for the season
    final_elo_ratings = {}
    elo_ratings = {}
    season_weight = 0.7
    average_weight = 0.3
    current_season = None

    # Iterate over seasons and games
    for idx, row in df.iterrows():
        if current_season is None:
            current_season = row['Season']
    
        # Check if the season in the current row is different from the current season
        if row['Season'] != current_season:
            # Save the previous season's Elo ratings and calculate the starting Elo for the new season
            final_elo_ratings[current_season] = dict(elo_ratings)
            elo_ratings = {}  # Reset Elo ratings for the new season

            for team in final_elo_ratings[current_season]:
                prev_season_elo = final_elo_ratings[current_season][team]
                league_avg_elo = sum(final_elo_ratings[current_season].values()) / len(final_elo_ratings[current_season])
                starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
                elo_ratings[team] = round(starting_elo, 2)

            current_season = row['Season']
            
        # Update Elo ratings for the game
        df, elo_ratings = update_game_elo(row['Home'], row['Visitor'], row['G_Home'], row['G_Vis'], row['Season'], df, idx, elo_ratings)

    # Calculate the final Elo ratings for the last season
    final_elo_ratings[current_season] = dict(elo_ratings)

    return df, elo_ratings, final_elo_ratings

In [32]:
seasons, elo, final_season_elos = elo_adjuster(scores_df)

In [33]:
elo

{'Dallas Stars': 1550.7819539073464,
 'Colorado Avalanche': 1579.9182021884367,
 'Boston Bruins': 1642.0502212110703,
 'Ottawa Senators': 1479.0506615440481,
 'Buffalo Sabres': 1486.7120784302224,
 'Chicago Blackhawks': 1393.8606927123315,
 'Calgary Flames': 1515.3348272021358,
 'Detroit Red Wings': 1443.4317712658462,
 'Philadelphia Flyers': 1421.6856205830084,
 'Vancouver Canucks': 1485.5682678752944,
 'Arizona Coyotes': 1405.4197600448936,
 'St. Louis Blues': 1483.661524120255,
 'Edmonton Oilers': 1580.5090602196963,
 'Florida Panthers': 1538.0042767487648,
 'Anaheim Ducks': 1370.185824502506,
 'Minnesota Wild': 1543.83706875618,
 'New Jersey Devils': 1533.2411289939719,
 'Montreal Canadiens': 1410.3105469348452,
 'Pittsburgh Penguins': 1504.8509804888336,
 'Nashville Predators': 1505.5125954824891,
 'San Jose Sharks': 1386.7944419061305,
 'Tampa Bay Lightning': 1539.0902552500847,
 'New York Islanders': 1513.2474188071392,
 'Washington Capitals': 1475.2423478319913,
 'Los Angeles K

In [34]:
def forecast_game_elo(home_team, away_team, season, df, idx, elo_ratings, simulation_wins):

    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)

    if home_team not in elo_ratings:
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)

    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
    home_win = np.random.choice([0, 1], p=[away_prob, home_prob])

    # Calculate base scores for the home and away team
    score_home = 2.845905 + 0.003584*Elo_diff_home
    score_away = 2.845905 + 0.003584*(-1)*Elo_diff_home

    conditions_satisfied = 0 

    while conditions_satisfied == 0:
        
        # poisson distribution to shift scores
        poisson_score_home = np.random.poisson(max(0, score_home))
        poisson_score_away = np.random.poisson(max(0, score_away))

        # Ensure non-negative scores by replacing any negative values with zero
        # poisson_score_home = np.maximum(0, poisson_score_home)
        # poisson_score_away = np.maximum(0, poisson_score_away)

        home_goal_diff = poisson_score_home - poisson_score_away
            
        # does it satsify conditions?
        if home_win == 1:
            if home_goal_diff > 0:
                conditions_satisfied = 1
        
        if home_win == 0:
            if home_goal_diff < 0: 
                conditions_satisfied = 1

    #------------------------------------------------------------------------#
    OT_choice = 0
    
    if abs(home_goal_diff) == 1:
        # Decide if the game goes to OT
        OT_choice = np.random.choice(a= [1, 0], p= [0.23, 0.77])
        df.at[idx, 'OT Ind'] = OT_choice
            
    if home_win == 1:
        winner_elo_diff = Elo_diff_home
        simulation_wins[home_team] += 2

        if OT_choice == 1:
            simulation_wins[away_team] += 1
        
    if home_win == 0:
        winner_elo_diff = -1*Elo_diff_home
        simulation_wins[away_team] += 2

        if OT_choice == 1:
            simulation_wins[home_team] += 1

    pre_g_fav_h = home_win - home_prob  
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
    # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(poisson_score_home, poisson_score_away)

    # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
  
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    df.at[idx, 'G_Home'] = poisson_score_home
    df.at[idx, 'G_Vis'] = poisson_score_away
    #--------------------------------------------------------------------------#

    return df, elo_ratings, simulation_wins

In [126]:
# Iterate over seasons and games
def season_forecaster(df, starting_elo):
        
    # Create a dictionary to track point totals
    simulation_pts = {team: 0 for team in df['Home'].unique()}

    for idx, row in df.iterrows():
        df, starting_elo, simulation_pts = forecast_game_elo(row['Home'], row['Visitor'], row['Season'], df, idx, starting_elo, simulation_pts)  
        
    return df, starting_elo, simulation_pts

In [1]:
def playoff_tracker(simulation_pts, playoffs_made):

    conference_mapping = {
        'East': ['Florida Panthers', 'Toronto Maple Leafs', 'Tampa Bay Lightning', 'Boston Bruins', 'Buffalo Sabres', 'Detroit Red Wings',
                 'Ottawa Senators', 'Montreal Canadiens', 'Carolina Hurricanes', 'New York Rangers', 'Pittsburgh Penguins', 
                 'Washington Capitals', 'New York Islanders', 'Columbus Blue Jackets', 'New Jersey Devils', 'Philadelphia Flyers'],
        'West': ['Colorado Avalanche', 'Minnesota Wild', 'St. Louis Blues', 'Dallas Stars', 'Nashville Predators', 'Winnipeg Jets',
                 'Chicago Blackhawks', 'Arizona Coyotes', 'Calgary Flames', 'Edmonton Oilers', 'Los Angeles Kings', 'Vegas Golden Knights',
                 'Vancouver Canucks', 'San Jose Sharks', 'Anaheim Ducks', 'Seattle Kraken']
    }
    
    # Separate teams into East and West lists
    for conference, teams in conference_mapping.items():
        valid_teams = [i for i in teams if i in simulation_pts]
        sorted_teams = sorted(valid_teams, key=lambda x: simulation_pts[x], reverse=True)

        for team in sorted_teams[:8]:
            playoffs_made[team] += 1

    return playoffs_made

In [128]:
df2 = copy.deepcopy(scores_df[scores_df['Season'] == 2022])
season_2022 = df2.copy()
num_simulations = 100
season_weight = 0.7
average_weight = 0.3

# Create a dictionary to track overall standings
standings = {team: np.zeros(num_simulations) for team in df2['Home'].unique()}

# Create a dictionary to track playoff appearances
playoffs_counter = {team: 0 for team in df2['Home'].unique()}

for simulation in range(num_simulations):
    elo = copy.deepcopy(final_season_elos[2021])  # Creating a deep copy of final_season_elos[2021]
    elo_end = elo.copy()  # Initial copy for modifications

    elo_ratings = {}  # Reset Elo ratings for the new season

    for team in elo_end:
        prev_season_elo = elo_end[team]
        league_avg_elo = sum(elo_end.values()) / len(elo_end)
        starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
        elo_ratings[team] = round(starting_elo, 2)

    season_2022, elo_ratings, simulation_pts = season_forecaster(season_2022, elo_ratings)
    playoffs_counter = playoff_tracker(simulation_pts, playoffs_counter)

    for team, pts in simulation_pts.items():
        standings[team][simulation] = pts


for team, pts in standings.items():
    mean_pts = np.mean(pts)
    std_deviation = np.std(pts)
    print(f"{team}: Mean point total = {mean_pts}, Standard Deviation = {std_deviation}")

Tampa Bay Lightning: Mean point total = 94.29, Standard Deviation = 13.758848062247074
Vegas Golden Knights: Mean point total = 96.86, Standard Deviation = 11.827104463899861
Anaheim Ducks: Mean point total = 72.49, Standard Deviation = 12.05113687583043
Colorado Avalanche: Mean point total = 98.33, Standard Deviation = 14.198630215622913
Edmonton Oilers: Mean point total = 90.46, Standard Deviation = 12.808918767796133
Toronto Maple Leafs: Mean point total = 93.91, Standard Deviation = 12.312672333819332
Washington Capitals: Mean point total = 92.83, Standard Deviation = 13.717182655341439
Buffalo Sabres: Mean point total = 73.48, Standard Deviation = 12.658183123971623
Carolina Hurricanes: Mean point total = 95.05, Standard Deviation = 13.051724024051381
Columbus Blue Jackets: Mean point total = 75.59, Standard Deviation = 13.313222750333598
Detroit Red Wings: Mean point total = 69.71, Standard Deviation = 11.681862009114813
Florida Panthers: Mean point total = 94.15, Standard Deviat

In [129]:
len(season_2022[season_2022['OT Ind'] == 1])/len(season_2022)

0.22332317073170732

In [130]:
mean_pts = pd.DataFrame(columns = ['PTS']).rename_axis('Team')

for team, pts in standings.items():
    mean_pts.loc[team] = [np.mean(pts)]



In [131]:
mean_pts.sort_values(by='PTS', ascending=False)

Unnamed: 0_level_0,PTS
Team,Unnamed: 1_level_1
Colorado Avalanche,98.33
Vegas Golden Knights,96.86
Boston Bruins,95.88
Carolina Hurricanes,95.05
Tampa Bay Lightning,94.29
Pittsburgh Penguins,94.23
Florida Panthers,94.15
Toronto Maple Leafs,93.91
Washington Capitals,92.83
New York Islanders,91.79


In [132]:
print("Elo ratings at the end of the simulation")

elo_df = pd.DataFrame(columns = ['Elo']).rename_axis('Team')

for team, elo in elo_ratings.items():
    elo_df.loc[team] = [elo]

elo_df.sort_values(by='Elo', ascending=False)

Elo ratings at the end of the simulation


Unnamed: 0_level_0,Elo
Team,Unnamed: 1_level_1
Washington Capitals,1599.556568
Tampa Bay Lightning,1574.93908
Pittsburgh Penguins,1567.182912
Carolina Hurricanes,1559.041005
New York Islanders,1547.307066
Arizona Coyotes,1532.789055
Vegas Golden Knights,1532.414202
Minnesota Wild,1532.308247
Montreal Canadiens,1529.608428
Edmonton Oilers,1528.232277


In [133]:
print("Elo ratings at the end of the real prior season")

elo_df = pd.DataFrame(columns = ['Elo']).rename_axis('Team')

for team, elo in final_season_elos[2021].items():
    elo_df.loc[team] = [elo]

elo_df.sort_values(by='Elo', ascending=False)

Elo ratings at the end of the real prior season


Unnamed: 0_level_0,Elo
Team,Unnamed: 1_level_1
Vegas Golden Knights,1580.366387
Colorado Avalanche,1574.882544
Boston Bruins,1563.605372
Pittsburgh Penguins,1561.056207
Tampa Bay Lightning,1558.41656
Carolina Hurricanes,1557.582815
Washington Capitals,1553.844148
Florida Panthers,1547.747386
Toronto Maple Leafs,1543.472271
Minnesota Wild,1532.772853


In [140]:
print("Playoff Probabilities")

playoff_df = pd.DataFrame(columns = ['Playoff Prob']).rename_axis('Team')

playoffs_counter_normalized = {team: count / num_simulations for team, count in playoffs_counter.items()}

for team, playoff_prob in playoffs_counter_normalized.items():
    playoff_df.loc[team] = [playoff_prob]

playoff_df.sort_values(by='Playoff Prob', ascending=False)

Playoff Probabilities


Unnamed: 0_level_0,Playoff Prob
Team,Unnamed: 1_level_1
Vegas Golden Knights,0.86
Boston Bruins,0.81
Colorado Avalanche,0.78
Tampa Bay Lightning,0.75
Florida Panthers,0.75
Carolina Hurricanes,0.74
Pittsburgh Penguins,0.73
Toronto Maple Leafs,0.71
New York Islanders,0.7
St. Louis Blues,0.69
