In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import copy
import warnings

warnings.filterwarnings('ignore')

In [2]:
def calculate_prob_winning(home_team, away_team, elo_ratings):
    
    Elo_diff_home = elo_ratings[home_team]-elo_ratings[away_team] + 26  #add extra for home-ice advantage   
    prob_win_home = 1/(10**(-1*Elo_diff_home/400)+1)
    prob_win_away = 1 - prob_win_home
    
    return prob_win_home, prob_win_away, Elo_diff_home

def margin_of_victory(home_goals, away_goals):
    
    mov = abs(home_goals - away_goals)
    mov_mult = 0.6686*np.log(mov)+0.8048
    
    return mov_mult

def update_game_elo(home_team, away_team, home_goals, away_goals, season, df, idx, elo_ratings, K):
    
    team_changes = {
        'Atlanta Thrashers': 'Winnipeg Jets',
        'Phoenix Coyotes': 'Arizona Coyotes',
        'Mighty Ducks of Anaheim': 'Anaheim Ducks'
    }
    
    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)
    
    if home_team not in elo_ratings:
        # New teams added past a certain year start with a different ELO rating
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

    # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)
    
    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
     # Update Pregame Favorite Multiplier
    if home_goals > away_goals:
        home_win = 1
        winner_elo_diff = Elo_diff_home
    elif home_goals < away_goals:
        home_win = 0
        winner_elo_diff = -1*Elo_diff_home
    else:
        return df, elo_ratings
    
    pre_g_fav_h = home_win - home_prob  
    
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
     # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(home_goals, away_goals)

    elo_shift_h = K * mov_multiplier * pre_g_fav_h 

    
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    return df, elo_ratings

def elo_adjuster(df, season_weight, average_weight, K):
    
    # Initialize variables to store the final Elo ratings for the season
    final_elo_ratings = {}
    elo_ratings = {}
    current_season = None

    # Iterate over seasons and games
    for idx, row in df.iterrows():
        if current_season is None:
            current_season = row['Season']
    
        # Check if the season in the current row is different from the current season
        if row['Season'] != current_season:
            # Save the previous season's Elo ratings and calculate the starting Elo for the new season
            final_elo_ratings[current_season] = dict(elo_ratings)
            elo_ratings = {}  # Reset Elo ratings for the new season

            for team in final_elo_ratings[current_season]:
                prev_season_elo = final_elo_ratings[current_season][team]
                league_avg_elo = sum(final_elo_ratings[current_season].values()) / len(final_elo_ratings[current_season])
                starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
                elo_ratings[team] = round(starting_elo, 2)

            current_season = row['Season']
            
        # Update Elo ratings for the game
        df, elo_ratings = update_game_elo(row['Home'], row['Visitor'], row['G_Home'], row['G_Vis'], row['Season'], df, idx, elo_ratings, K)

    # Calculate the final Elo ratings for the last season
    final_elo_ratings[current_season] = dict(elo_ratings)

    return df, elo_ratings, final_elo_ratings

def forecast_game_elo(home_team, away_team, season, df, idx, elo_ratings, K, simulation_wins):

    team_changes = {
        'Atlanta Thrashers': 'Winnipeg Jets',
        'Phoenix Coyotes': 'Arizona Coyotes',
        'Mighty Ducks of Anaheim': 'Anaheim Ducks'
    }
    
    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)

    if home_team not in elo_ratings:
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)

    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob
    
    home_win = np.random.choice([0, 1], p=[away_prob, home_prob])

    # Calculate base scores for the home and away team
    score_home = 2.845905 + 0.003584*Elo_diff_home
    score_away = 2.845905 + 0.003584*(-1)*Elo_diff_home

    conditions_satisfied = 0 

    while conditions_satisfied == 0:
        
        # poisson distribution to shift scores
        poisson_score_home = np.random.poisson(max(0, score_home))
        poisson_score_away = np.random.poisson(max(0, score_away))

        home_goal_diff = poisson_score_home - poisson_score_away
            
        # does it satsify conditions?
        if home_win == 1:
            if home_goal_diff > 0:
                conditions_satisfied = 1
        
        if home_win == 0:
            if home_goal_diff < 0: 
                conditions_satisfied = 1

    #------------------------------------------------------------------------#
    OT_choice = 0
    
    if abs(home_goal_diff) == 1:
        # Decide if the game goes to OT
        OT_choice = np.random.choice(a= [1, 0], p= [0.505, 0.495])
            
    if home_win == 1:
        winner_elo_diff = Elo_diff_home
        simulation_wins[home_team] += 2

        if OT_choice == 1:
            simulation_wins[away_team] += 1
        
    if home_win == 0:
        winner_elo_diff = -1*Elo_diff_home
        simulation_wins[away_team] += 2

        if OT_choice == 1:
            simulation_wins[home_team] += 1

    pre_g_fav_h = home_win - home_prob  
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)
    
    # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(poisson_score_home, poisson_score_away)

    # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h 
  
    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    df.at[idx, 'G_Home'] = poisson_score_home
    df.at[idx, 'G_Vis'] = poisson_score_away
    df.at[idx, 'OT Ind'] = OT_choice

    #--------------------------------------------------------------------------#

    return df, elo_ratings, simulation_wins

def season_forecaster(df, starting_elo, K):
        
    # Create a dictionary to track point totals
    simulation_pts = {team: 0 for team in df['Home'].unique()}

    for idx, row in df.iterrows():
        df, starting_elo, simulation_pts = forecast_game_elo(row['Home'], row['Visitor'], row['Season'], df, idx, starting_elo, K, simulation_pts)  
        
    return df, starting_elo, simulation_pts

def playoff_tracker(simulation_pts, playoffs_made):

    conference_mapping = {
        'East': ['Florida Panthers', 'Toronto Maple Leafs', 'Tampa Bay Lightning', 'Boston Bruins', 'Buffalo Sabres', 'Detroit Red Wings',
                 'Ottawa Senators', 'Montreal Canadiens', 'Carolina Hurricanes', 'New York Rangers', 'Pittsburgh Penguins', 
                 'Washington Capitals', 'New York Islanders', 'Columbus Blue Jackets', 'New Jersey Devils', 'Philadelphia Flyers'],
        'West': ['Colorado Avalanche', 'Minnesota Wild', 'St. Louis Blues', 'Dallas Stars', 'Nashville Predators', 'Winnipeg Jets',
                 'Chicago Blackhawks', 'Arizona Coyotes', 'Calgary Flames', 'Edmonton Oilers', 'Los Angeles Kings', 'Vegas Golden Knights',
                 'Vancouver Canucks', 'San Jose Sharks', 'Anaheim Ducks', 'Seattle Kraken']
    }
    
    # Separate teams into East and West lists
    for conference, teams in conference_mapping.items():
        valid_teams = [i for i in teams if i in simulation_pts]
        sorted_teams = sorted(valid_teams, key=lambda x: simulation_pts[x], reverse=True)

        for team in sorted_teams[:8]:
            playoffs_made[team] += 1

    return playoffs_made

def simulator(df, season_weight, average_weight, nsims, season, elo_rating_end, K):
    np.random.seed(0)

    df2 = copy.deepcopy(df[df['Season'] == season])
    season_df = df2.copy() 
    
    # Create a dictionary to track overall standings
    standings = {team: np.zeros(nsims) for team in df2['Home'].unique()}
    
    # Create a dictionary to track playoff appearances
    playoffs_counter = {team: 0 for team in df2['Home'].unique()}
    
    for simulation in range(nsims):
        elo_end = copy.deepcopy(elo_rating_end)  # Creating a deep copy of final_season_elos[2021]
    
        elo_ratings = {}  # Reset Elo ratings for the new season
    
        for team in elo_end:
            prev_season_elo = elo_end[team]
            league_avg_elo = sum(elo_end.values()) / len(elo_end)
            starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
            elo_ratings[team] = round(starting_elo, 2)
    
        season_df, elo_ratings, simulation_pts = season_forecaster(season_df, elo_ratings, K)
        playoffs_counter = playoff_tracker(simulation_pts, playoffs_counter)
    
        for team, pts in simulation_pts.items():
            standings[team][simulation] = pts

    return standings

In [3]:
def forecast_tester(K, season_weight, average_weight, nsims, season_yr, real_scores):
    scores_df = pd.read_csv('C:/Users/Gabriel/Documents/GitHub/MIE368_PROJECT/team_proj/data/season_data.csv')

    seasons, elo, final_season_elos = elo_adjuster(scores_df, season_weight, average_weight, K)

    standings = simulator(scores_df, season_weight, average_weight, nsims, season_yr, final_season_elos[season_yr-1], K)

    mean_pts = pd.DataFrame(columns = ['PTS']).rename_axis('Team')

    for team, pts in standings.items():
        mean_pts.loc[team] = [np.mean(pts)]

    real_scores_df = pd.DataFrame(real_scores[season_yr])
    real_scores_df = real_scores_df.rename(columns={0:'Team', 1:'PTS'})
    real_scores_df['PTS'] = pd.to_numeric(real_scores_df['PTS'])
    
    points_df = pd.merge(mean_pts, real_scores_df, on='Team')
    points_df['Diff'] = (np.abs(points_df['PTS_y']-points_df['PTS_x']))
    
    mean_x = np.mean(points_df['PTS_x'])
    std_dev_x = np.std(points_df['PTS_x'])
    
    mean_y = np.mean(points_df['PTS_y'])
    std_dev_y = np.std(points_df['PTS_y'])
    
    mean_diff = np.mean(points_df['Diff'])
    std_dev_diff = np.std(points_df['Diff'])
    
    test = pd.DataFrame({'Mean':[mean_x, mean_y, mean_diff],'Std Dev':[std_dev_x, std_dev_y, std_dev_diff]})
    test.rename({0:'Model', 1:'Actual', 2:'Diff'}, axis=0, inplace = True)

    MeanAE = np.sum(points_df['Diff'])/points_df['Diff'].shape[0]
    RSS = np.sum((points_df['Diff'])**2)
    #add_testing_row(K, season_weight, average_weight, nsims, 13, 5000)

    return mean_pts, MeanAE, RSS

In [25]:
# Initialize an empty DataFrame with the desired columns
columns = ['K', 'Season Weight', 'Avg Weight', 'NSims', 'MAE', 'RSS']
all_models = pd.DataFrame(columns=columns)

# Function to add a new row with specified values
def add_testing_row(all_models, K, season_weight, avg_weight, nsims, MAE, RSS):
    # Append a new row to the DataFrame
    all_models.loc[len(all_models)] = [K, season_weight, avg_weight, nsims, MAE, RSS]

    return all_models

In [5]:
real_scores = {2023: np.array([['Boston Bruins',135],['Toronto Maple Leafs',111],['Tampa Bay Lightning',98],
              ['Florida Panthers',92],['Buffalo Sabres',91],['Ottawa Senators',86],['Detroit Red Wings',80],
              ['Montreal Canadiens',68],['Carolina Hurricanes',113],['New Jersey Devils',112],['New York Rangers',107],
              ['New York Islanders',93],['Pittsburgh Penguins',91],['Washington Capitals',80],['Philadelphia Flyers',75],
              ['Columbus Blue Jackets',59],['Colorado Avalanche',109],['Dallas Stars',108],['Minnesota Wild',103],
              ['Winnipeg Jets',95],['Nashville Predators',92],['St. Louis Blues',81],['Arizona Coyotes',70],
              ['Chicago Blackhawks',59],['Vegas Golden Knights',111],['Edmonton Oilers',109],['Los Angeles Kings',104],
              ['Seattle Kraken',100],['Calgary Flames',93],['Vancouver Canucks',83],['San Jose Sharks',60],['Anaheim Ducks',58]]),
               2022: np.array([['Florida Panthers',122],['Toronto Maple Leafs',115],['Tampa Bay Lightning',110],
              ['Boston Bruins',107],['Buffalo Sabres',75],['Detroit Red Wings',74],['Ottawa Senators',73],
              ['Montreal Canadiens',55],['Carolina Hurricanes',116],['New York Rangers',110],['Pittsburgh Penguins',103],
              ['Washington Capitals',100],['New York Islanders',84],['Columbus Blue Jackets',81],['New Jersey Devils',63],
              ['Philadelphia Flyers',61],['Colorado Avalanche',119],['Minnesota Wild',113],['St. Louis Blues',109],
              ['Dallas Stars',98],['Nashville Predators',97],['Winnipeg Jets',89],['Chicago Blackhawks',68],
              ['Arizona Coyotes',57],['Calgary Flames',111],['Edmonton Oilers',104],['Los Angeles Kings',99],
              ['Vegas Golden Knights',94],['Vancouver Canucks',92],['San Jose Sharks',77],['Anaheim Ducks',76],['Seattle Kraken',60]]),
               2019: np.array([['Tampa Bay Lightning',128],['Boston Bruins',107],['Toronto Maple Leafs',100],
              ['Montreal Canadiens',96],['Florida Panthers',86],['Buffalo Sabres',76],['Detroit Red Wings',74],
              ['Ottawa Senators',64],['Washington Capitals',104],['New York Islanders',103],['Pittsburgh Penguins',100],
              ['Carolina Hurricanes',99],['Columbus Blue Jackets',98],['Philadelphia Flyers',82],['New York Rangers',78],
              ['New Jersey Devils',72],['Nashville Predators',100],['Winnipeg Jets',99],['St. Louis Blues',99],
              ['Dallas Stars',93],['Colorado Avalanche',90],['Chicago Blackhawks',84],['Minnesota Wild',83],
              ['Calgary Flames',107],['San Jose Sharks',101],['Vegas Golden Knights',93],
              ['Vancouver Canucks',81],['Anaheim Ducks',80],['Edmonton Oilers',79],['Los Angeles Kings',71]])}

In [26]:
np.random.seed(0)

season_weight_ls = [0.65, 0.75]
#average_weight = 0.3
K_list = [6,8,10]
num_simulations = 1000
year = 2022

for K in K_list:
    for s_w in season_weight_ls:
        avg_w = 1 - s_w
        mean_pts, MeanAE, RSS = forecast_tester(K, s_w, avg_w, num_simulations, year, real_scores)
        all_models = add_testing_row(all_models, K, s_w, avg_w, num_simulations, MeanAE, RSS)
    print(f'{K} is done')

all_models.to_csv('C:/Users/Gabriel/Documents/MIE368/Data/new_elo_sim_test_results_2022.csv', index=False)

6 is done
8 is done
10 is done


In [24]:
season_weight_ls = [0.8]
#average_weight = 0.3
K_list = [8]
num_simulations = 1000
year = 2019

for K in K_list:
    for s_w in season_weight_ls:
        avg_w = 1 - s_w
        mean_pts, MeanAE, RSS = forecast_tester(K, s_w, avg_w, num_simulations, year, real_scores)
        all_models = add_testing_row(all_models, K, s_w, avg_w, num_simulations, MeanAE, RSS)
    
    print(f'{K} is done')

all_models.to_csv('C:/Users/Gabriel/Documents/MIE368/Data/elo_sim_test_results_2019.csv', index=False)

8 is done


In [19]:
print(all_models)

       K  Season Weight  Avg Weight   NSims        MAE          RSS
0    6.0            0.7         0.3  1000.0   8.867400  4030.551840
1    6.0            0.8         0.2  1000.0   9.074700  4127.649169
2    6.0            0.9         0.1  1000.0   9.277367  4269.552435
3   10.0            0.7         0.3  1000.0   9.511400  4580.774778
4   10.0            0.8         0.2  1000.0   9.585400  4821.267356
5   10.0            0.9         0.1  1000.0  10.258967  5281.897507
6   12.0            0.7         0.3  1000.0   9.889000  5031.585206
7   12.0            0.8         0.2  1000.0  10.060000  5195.831618
8   12.0            0.9         0.1  1000.0  10.784833  5785.813667
9   15.0            0.7         0.3  1000.0  10.294033  5411.811813
10  15.0            0.8         0.2  1000.0  10.768300  5834.520757
11  15.0            0.9         0.1  1000.0  11.341167  6312.874641
