In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as scs
import statistics
import copy
import warnings
warnings.filterwarnings('ignore')

In [None]:
scores_df = pd.read_csv('C:/Users/Gabriel/Documents/GitHub/MIE368_PROJECT/team_proj/data/season_data.csv')
playoff_scores_df = pd.read_csv('C:/Users/Gabriel/Documents/GitHub/MIE368_PROJECT/team_proj/data/playoff_data.csv')
real_elo = pd.read_csv('C:/Users/Gabriel/Documents/GitHub/MIE368_PROJECT/team_proj/data/nhl_elo.csv')

In [2]:
scores_df = pd.read_csv(r'/content/season_data.csv')
playoff_df = pd.read_csv(r'/content/playoff_data.csv')
real_elo = pd.read_csv(r'/content/nhl_elo.csv')

In [3]:
scores_df.head()

Unnamed: 0,Date,Visitor,G_Vis,Home,G_Home,OT Ind,Season
0,2000-10-04,Colorado Avalanche,2.0,Dallas Stars,2.0,1,2001
1,2000-10-05,Ottawa Senators,4.0,Boston Bruins,4.0,1,2001
2,2000-10-05,Chicago Blackhawks,2.0,Buffalo Sabres,4.0,0,2001
3,2000-10-05,Detroit Red Wings,4.0,Calgary Flames,3.0,0,2001
4,2000-10-05,Vancouver Canucks,3.0,Philadelphia Flyers,6.0,0,2001


## Game Result Elo Adjustment

In [4]:
first_yr = scores_df[scores_df['Season'] == 2001]
Teams = first_yr['Home'].unique().tolist()

team_changes = {
    'Atlanta Thrashers': 'Winnipeg Jets',
    'Phoenix Coyotes': 'Arizona Coyotes',
    'Mighty Ducks of Anaheim': 'Anaheim Ducks'
    # Add more mappings as needed
}

K = 6

In [5]:
def calculate_prob_winning(home_team, away_team, elo_ratings):

    Elo_diff_home = elo_ratings[home_team]-elo_ratings[away_team] + 26  #add extra for home-ice advantage
    #Elo_diff_away = elo_ratings[away_team] - elo_ratings[home_team]
    prob_win_home = 1/(10**(-1*Elo_diff_home/400)+1)
    #prob_win_away = 1/(10**(-1*Elo_diff_away/400)+1)
    prob_win_away = 1 - prob_win_home

    return prob_win_home, prob_win_away, Elo_diff_home

In [9]:
def margin_of_victory(home_goals, away_goals):

    mov = abs(home_goals - away_goals)
    mov_mult = 0.6686*np.log(mov)+0.8048

    return mov_mult

In [6]:
# Function to update ELO ratings after a game -- Could add input for elo_ratings dictionary
def update_game_elo(home_team, away_team, home_goals, away_goals, season, df, idx, elo_ratings):

    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)

    #print('Home team is ' + str(home_team))
    #print('Away team is ' + str(away_team))

    if home_team not in elo_ratings:
        # New teams added past a certain year start with a different ELO rating
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)

    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob

     # Update Pregame Favorite Multiplier
    if home_goals > away_goals:
        home_win = 1
        #away_win = 0
        winner_elo_diff = Elo_diff_home
    elif home_goals < away_goals:
        home_win = 0
        #away_win = 1
        winner_elo_diff = -1*Elo_diff_home
    else:
        return df, elo_ratings

    pre_g_fav_h = home_win - home_prob
    #pre_g_fav_a = away_win - away_prob

    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)

     # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(home_goals, away_goals)
    #print('home goals:' + str(home_goals))
    #print('away goals:' + str(away_goals))
    #print('mult is: ' + str(mov_multiplier))

     # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h
    #elo_shift_a = K * mov_multiplier * pre_g_fav_a
    #print('elo shift is: ' + str(elo_shift_h))

    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    return df, elo_ratings

## End of Season Elo Adjustments

In [7]:
def elo_adjuster(df):

    # Initialize variables to store the final Elo ratings for the season
    final_elo_ratings = {}
    elo_ratings = {}
    season_weight = 0.7
    average_weight = 0.3
    current_season = None

    # Iterate over seasons and games
    for idx, row in df.iterrows():
        if current_season is None:
            current_season = row['Season']

        # Check if the season in the current row is different from the current season
        if row['Season'] != current_season:
            # Save the previous season's Elo ratings and calculate the starting Elo for the new season
            final_elo_ratings[current_season] = dict(elo_ratings)
            elo_ratings = {}  # Reset Elo ratings for the new season

            for team in final_elo_ratings[current_season]:
                prev_season_elo = final_elo_ratings[current_season][team]
                league_avg_elo = sum(final_elo_ratings[current_season].values()) / len(final_elo_ratings[current_season])
                starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
                elo_ratings[team] = round(starting_elo, 2)

            current_season = row['Season']

        # Update Elo ratings for the game
        df, elo_ratings = update_game_elo(row['Home'], row['Visitor'], row['G_Home'], row['G_Vis'], row['Season'], df, idx, elo_ratings)

    # Calculate the final Elo ratings for the last season
    final_elo_ratings[current_season] = dict(elo_ratings)

    return df, elo_ratings, final_elo_ratings

In [10]:
seasons, elo, final_season_elos = elo_adjuster(scores_df)

In [11]:
elo

{'Dallas Stars': 1550.7819539073464,
 'Colorado Avalanche': 1579.9182021884367,
 'Boston Bruins': 1642.0502212110703,
 'Ottawa Senators': 1479.0506615440481,
 'Buffalo Sabres': 1486.7120784302224,
 'Chicago Blackhawks': 1393.8606927123315,
 'Calgary Flames': 1515.3348272021358,
 'Detroit Red Wings': 1443.4317712658462,
 'Philadelphia Flyers': 1421.6856205830084,
 'Vancouver Canucks': 1485.5682678752944,
 'Arizona Coyotes': 1405.4197600448936,
 'St. Louis Blues': 1483.661524120255,
 'Edmonton Oilers': 1580.5090602196963,
 'Florida Panthers': 1538.0042767487648,
 'Anaheim Ducks': 1370.185824502506,
 'Minnesota Wild': 1543.83706875618,
 'New Jersey Devils': 1533.2411289939719,
 'Montreal Canadiens': 1410.3105469348452,
 'Pittsburgh Penguins': 1504.8509804888336,
 'Nashville Predators': 1505.5125954824891,
 'San Jose Sharks': 1386.7944419061305,
 'Tampa Bay Lightning': 1539.0902552500847,
 'New York Islanders': 1513.2474188071392,
 'Washington Capitals': 1475.2423478319913,
 'Los Angeles K

In [12]:
def forecast_game_elo(home_team, away_team, season, df, idx, elo_ratings, simulation_wins):

    # Map old team names to new ones
    home_team = team_changes.get(home_team, home_team)
    away_team = team_changes.get(away_team, away_team)

    if home_team not in elo_ratings:
        elo_ratings[home_team] = 1500
    if away_team not in elo_ratings:
        elo_ratings[away_team] = 1500

     # Calculate the probability of winning the game for each team
    home_prob, away_prob, Elo_diff_home = calculate_prob_winning(home_team, away_team, elo_ratings)

    # Update DataFrame values
    df.at[idx, 'EloDiffHome'] = Elo_diff_home
    df.at[idx, 'EloDiffAway'] = -1 * Elo_diff_home
    df.at[idx, 'WinProbHome'] = home_prob
    df.at[idx, 'WinProbVis'] = away_prob

    home_win = np.random.choice([0, 1], p=[away_prob, home_prob])

    # Calculate base scores for the home and away team
    score_home = 2.845905 + 0.003584*Elo_diff_home
    score_away = 2.845905 + 0.003584*(-1)*Elo_diff_home

    conditions_satisfied = 0

    while conditions_satisfied == 0:

        # poisson distribution to shift scores
        poisson_score_home = np.random.poisson(max(0, score_home))
        poisson_score_away = np.random.poisson(max(0, score_away))

        # Ensure non-negative scores by replacing any negative values with zero
        # poisson_score_home = np.maximum(0, poisson_score_home)
        # poisson_score_away = np.maximum(0, poisson_score_away)

        home_goal_diff = poisson_score_home - poisson_score_away

        # does it satsify conditions?
        if home_win == 1:
            if home_goal_diff > 0:
                conditions_satisfied = 1

        if home_win == 0:
            if home_goal_diff < 0:
                conditions_satisfied = 1

    #------------------------------------------------------------------------#
    OT_choice = 0

    if abs(home_goal_diff) == 1:
        # Decide if the game goes to OT
        OT_choice = np.random.choice(a= [1, 0], p= [0.23, 0.77])
        df.at[idx, 'OT Ind'] = OT_choice

    if home_win == 1:
        winner_elo_diff = Elo_diff_home
        simulation_wins[home_team] += 2

        if OT_choice == 1:
            simulation_wins[away_team] += 1

    if home_win == 0:
        winner_elo_diff = -1*Elo_diff_home
        simulation_wins[away_team] += 2

        if OT_choice == 1:
            simulation_wins[home_team] += 1

    pre_g_fav_h = home_win - home_prob
    auto_corr = 2.05/(winner_elo_diff*0.001 + 2.05)

    # Adjust ELO shift for margin of victory
    mov_multiplier = auto_corr * margin_of_victory(poisson_score_home, poisson_score_away)

    # Calculate ELO shift based on game result
    elo_shift_h = K * mov_multiplier * pre_g_fav_h

    elo_ratings[home_team] += elo_shift_h
    elo_ratings[away_team] -= elo_shift_h

    df.at[idx, 'G_Home'] = poisson_score_home
    df.at[idx, 'G_Vis'] = poisson_score_away
    #--------------------------------------------------------------------------#

    return df, elo_ratings, simulation_wins

In [13]:
# Iterate over seasons and games
def season_forecaster(df, starting_elo):

    # Create a dictionary to track point totals
    simulation_pts = {team: 0 for team in df['Home'].unique()}

    for idx, row in df.iterrows():
        df, starting_elo, simulation_pts = forecast_game_elo(row['Home'], row['Visitor'], row['Season'], df, idx, starting_elo, simulation_pts)

    return df, starting_elo, simulation_pts

In [14]:
def playoff_tracker(simulation_pts, playoffs_made):

    conference_mapping = {
        'East': ['Florida Panthers', 'Toronto Maple Leafs', 'Tampa Bay Lightning', 'Boston Bruins', 'Buffalo Sabres', 'Detroit Red Wings',
                 'Ottawa Senators', 'Montreal Canadiens', 'Carolina Hurricanes', 'New York Rangers', 'Pittsburgh Penguins',
                 'Washington Capitals', 'New York Islanders', 'Columbus Blue Jackets', 'New Jersey Devils', 'Philadelphia Flyers'],
        'West': ['Colorado Avalanche', 'Minnesota Wild', 'St. Louis Blues', 'Dallas Stars', 'Nashville Predators', 'Winnipeg Jets',
                 'Chicago Blackhawks', 'Arizona Coyotes', 'Calgary Flames', 'Edmonton Oilers', 'Los Angeles Kings', 'Vegas Golden Knights',
                 'Vancouver Canucks', 'San Jose Sharks', 'Anaheim Ducks', 'Seattle Kraken']
    }

    # Separate teams into East and West lists
    for conference, teams in conference_mapping.items():
        valid_teams = [i for i in teams if i in simulation_pts]
        sorted_teams = sorted(valid_teams, key=lambda x: simulation_pts[x], reverse=True)

        for team in sorted_teams[:8]:
            playoffs_made[team] += 1

    return playoffs_made

In [16]:
df2 = copy.deepcopy(scores_df[scores_df['Season'] == 2022])
season_2022 = df2.copy()
num_simulations = 100
season_weight = 0.7
average_weight = 0.3

# Create a dictionary to track overall standings
standings = {team: np.zeros(num_simulations) for team in df2['Home'].unique()}

# Create a dictionary to track playoff appearances
playoffs_counter = {team: 0 for team in df2['Home'].unique()}

for simulation in range(num_simulations):
    elo = copy.deepcopy(final_season_elos[2021])  # Creating a deep copy of final_season_elos[2021]
    elo_end = elo.copy()  # Initial copy for modifications

    elo_ratings = {}  # Reset Elo ratings for the new season

    for team in elo_end:
        prev_season_elo = elo_end[team]
        league_avg_elo = sum(elo_end.values()) / len(elo_end)
        starting_elo = (season_weight * prev_season_elo) + (average_weight * league_avg_elo)
        elo_ratings[team] = round(starting_elo, 2)

    season_2022, elo_ratings, simulation_pts = season_forecaster(season_2022, elo_ratings)
    playoffs_counter = playoff_tracker(simulation_pts, playoffs_counter)

    for team, pts in simulation_pts.items():
        standings[team][simulation] = pts


for team, pts in standings.items():
    #print(pts)
    mean_pts = np.mean(pts)
    std_deviation = np.std(pts)
    print(f"{team}: Mean point total = {mean_pts}, Standard Deviation = {std_deviation}")

Tampa Bay Lightning: Mean point total = 94.7, Standard Deviation = 14.325152704247172
Vegas Golden Knights: Mean point total = 97.51, Standard Deviation = 11.596978054648547
Anaheim Ducks: Mean point total = 72.52, Standard Deviation = 12.350287446047561
Colorado Avalanche: Mean point total = 97.21, Standard Deviation = 13.190371488324354
Edmonton Oilers: Mean point total = 89.19, Standard Deviation = 13.63355786286177
Toronto Maple Leafs: Mean point total = 91.8, Standard Deviation = 12.782800945019835
Washington Capitals: Mean point total = 95.39, Standard Deviation = 14.548467273221602
Buffalo Sabres: Mean point total = 74.7, Standard Deviation = 13.876959321119307
Carolina Hurricanes: Mean point total = 96.69, Standard Deviation = 13.189158426525932
Columbus Blue Jackets: Mean point total = 73.22, Standard Deviation = 12.627414620578513
Detroit Red Wings: Mean point total = 72.26, Standard Deviation = 13.246599563661613
Florida Panthers: Mean point total = 92.59, Standard Deviation

In [17]:
len(season_2022[season_2022['OT Ind'] == 1])/len(season_2022)

0.21722560975609756

In [18]:
mean_pts = pd.DataFrame(columns = ['PTS']).rename_axis('Team')

for team, pts in standings.items():
    mean_pts.loc[team] = [np.mean(pts)]



In [19]:
mean_pts.sort_values(by='PTS', ascending=False)

Unnamed: 0_level_0,PTS
Team,Unnamed: 1_level_1
Vegas Golden Knights,97.51
Colorado Avalanche,97.21
Carolina Hurricanes,96.69
Pittsburgh Penguins,96.11
Washington Capitals,95.39
Tampa Bay Lightning,94.7
Boston Bruins,94.15
Florida Panthers,92.59
Toronto Maple Leafs,91.8
New York Islanders,91.42


In [20]:
print("Elo ratings at the end of the simulation")

elo_df = pd.DataFrame(columns = ['Elo']).rename_axis('Team')

for team, elo in elo_ratings.items():
    elo_df.loc[team] = [elo]

elo_df.sort_values(by='Elo', ascending=False)

Elo ratings at the end of the simulation


Unnamed: 0_level_0,Elo
Team,Unnamed: 1_level_1
Tampa Bay Lightning,1622.15967
Winnipeg Jets,1592.342874
Carolina Hurricanes,1570.218563
Boston Bruins,1559.304013
Washington Capitals,1556.198446
St. Louis Blues,1548.567322
Calgary Flames,1537.281046
Vegas Golden Knights,1535.603218
Colorado Avalanche,1523.892485
Edmonton Oilers,1516.017653


In [21]:
print("Elo ratings at the end of the real prior season")

elo_df = pd.DataFrame(columns = ['Elo']).rename_axis('Team')

for team, elo in final_season_elos[2021].items():
    elo_df.loc[team] = [elo]

elo_df.sort_values(by='Elo', ascending=False)

Elo ratings at the end of the real prior season


Unnamed: 0_level_0,Elo
Team,Unnamed: 1_level_1
Vegas Golden Knights,1580.366387
Colorado Avalanche,1574.882544
Boston Bruins,1563.605372
Pittsburgh Penguins,1561.056207
Tampa Bay Lightning,1558.41656
Carolina Hurricanes,1557.582815
Washington Capitals,1553.844148
Florida Panthers,1547.747386
Toronto Maple Leafs,1543.472271
Minnesota Wild,1532.772853


In [22]:
print("Playoff Probabilities")

playoff_df = pd.DataFrame(columns = ['Playoff Prob']).rename_axis('Team')

playoffs_counter_normalized = {team: count / num_simulations for team, count in playoffs_counter.items()}

for team, playoff_prob in playoffs_counter_normalized.items():
    playoff_df.loc[team] = [playoff_prob]

playoff_df.sort_values(by='Playoff Prob', ascending=False)

Playoff Probabilities


Unnamed: 0_level_0,Playoff Prob
Team,Unnamed: 1_level_1
Vegas Golden Knights,0.85
Carolina Hurricanes,0.8
Colorado Avalanche,0.79
Pittsburgh Penguins,0.77
Boston Bruins,0.76
Tampa Bay Lightning,0.74
Minnesota Wild,0.72
Washington Capitals,0.72
Florida Panthers,0.71
St. Louis Blues,0.71


In [41]:
# 2022-23 Season
real_scores = np.array([['Boston Bruins',135],['Toronto Maple Leafs',111],['Tampa Bay Lightning',98],
              ['Florida Panthers',92],['Buffalo Sabres',91],['Ottawa Senators',86],['Detroit Red Wings',80],
              ['Montreal Canadiens',68],['Carolina Hurricanes',113],['New Jersey Devils',112],['New York Rangers',107],
              ['New York Islanders',93],['Pittsburgh Penguins',91],['Washington Capitals',80],['Philadelphia Flyers',75],
              ['Columbus Blue Jackets',59],['Colorado Avalanche',109],['Dallas Stars',108],['Minnesota Wild',103],
              ['Winnipeg Jets',95],['Nashville Predators',92],['St. Louis Blues',81],['Arizona Coyotes',70],
              ['Chicago Blackhawks',59],['Vegas Golden Knights',111],['Edmonton Oilers',109],['Los Angeles Kings',104],
              ['Seattle Kraken',100],['Calgary Flames',93],['Vancouver Canucks',83],['San Jose Sharks',60],['Anaheim Ducks',58]])

real_scores = pd.DataFrame(real_scores)
real_scores = real_scores.rename(columns={0:'Team', 1:'PTS'})
real_scores['PTS'] = pd.to_numeric(real_scores['PTS'])

In [39]:
# 2021-22 Season
real_scores = np.array([['Florida Panthers',122],['Toronto Maple Leafs',115],['Tampa Bay Lightning',110],
              ['Boston Bruins',107],['Buffalo Sabres',75],['Detroit Red Wings',74],['Ottawa Senators',73],
              ['Montreal Canadiens',55],['Carolina Hurricanes',116],['New York Rangers',110],['Pittsburgh Penguins',103],
              ['Washington Capitals',100],['New York Islanders',84],['Columbus Blue Jackets',81],['New Jersey Devils',63],
              ['Philadelphia Flyers',61],['Colorado Avalanche',119],['Minnesota Wild',113],['St. Louis Blues',109],
              ['Dallas Stars',98],['Nashville Predators',97],['Winnipeg Jets',89],['Chicago Blackhawks',68],
              ['Arizona Coyotes',57],['Calgary Flames',111],['Edmonton Oilers',104],['Los Angeles Kings',99],
              ['Vegas Golden Knights',94],['Vancouver Canucks',92],['San Jose Sharks',77],['Anaheim Ducks',76],['Seattle Kraken',60]])

real_scores = pd.DataFrame(real_scores)
real_scores = real_scores.rename(columns={0:'Team', 1:'PTS'})
real_scores['PTS'] = pd.to_numeric(real_scores['PTS'])

In [40]:
#2018-19 Season
real_scores = np.array([['Tampa Bay Lightning',128],['Boston Bruins',107],['Toronto Maple Leafs',100],
              ['Montreal Canadiens',96],['Florida Panthers',86],['Buffalo Sabres',76],['Detroit Red Wings',74],
              ['Ottawa Senators',64],['Washington Capitals',104],['New York Islanders',103],['Pittsburgh Penguins',100],
              ['Carolina Hurricanes',99],['Columbus Blue Jackets',98],['Philadelphia Flyers',82],['New York Rangers',78],
              ['New Jersey Devils',72],['Nashville Predators',100],['Winnipeg Jets',99],['St. Louis Blues',99],
              ['Dallas Stars',93],['Colorado Avalanche',90],['Chicago Blackhawks',84],['Minnesota Wild',83],
              ['Calgary Flames',107],['San Jose Sharks',101],['Vegas Golden Knights',93],
              ['Vancouver Canucks',81],['Anaheim Ducks',80],['Edmonton Oilers',79],['Los Angeles Kings',71]])

real_scores = pd.DataFrame(real_scores)
real_scores = real_scores.rename(columns={0:'Team', 1:'PTS'})
real_scores['PTS'] = pd.to_numeric(real_scores['PTS'])

In [42]:
real_scores.sort_values(by='PTS', ascending=False)

Unnamed: 0,Team,PTS
0,Boston Bruins,135
8,Carolina Hurricanes,113
9,New Jersey Devils,112
24,Vegas Golden Knights,111
1,Toronto Maple Leafs,111
25,Edmonton Oilers,109
16,Colorado Avalanche,109
17,Dallas Stars,108
10,New York Rangers,107
26,Los Angeles Kings,104


In [44]:
points_df = pd.merge(mean_pts, real_scores, on='Team')
points_df['Diff'] = (np.abs(points_df['PTS_y']-points_df['PTS_x']))
display(points_df)

Unnamed: 0,Team,PTS_x,PTS_y,Diff
0,Tampa Bay Lightning,94.7,98,3.3
1,Vegas Golden Knights,97.51,111,13.49
2,Anaheim Ducks,72.52,58,14.52
3,Colorado Avalanche,97.21,109,11.79
4,Edmonton Oilers,89.19,109,19.81
5,Toronto Maple Leafs,91.8,111,19.2
6,Washington Capitals,95.39,80,15.39
7,Buffalo Sabres,74.7,91,16.3
8,Carolina Hurricanes,96.69,113,16.31
9,Columbus Blue Jackets,73.22,59,14.22


In [45]:
mean_x = np.mean(points_df['PTS_x'])
std_dev_x = np.std(points_df['PTS_x'])

mean_y = np.mean(points_df['PTS_y'])
std_dev_y = np.std(points_df['PTS_y'])

mean_diff = np.mean(points_df['Diff'])
std_dev_diff = np.std(points_df['Diff'])

test = pd.DataFrame({'Mean':[mean_x, mean_y, mean_diff],'Std Dev':[std_dev_x, std_dev_y, std_dev_diff]})
test.rename({0:'Model', 1:'Actual', 2:'Diff'},axis=0)

Unnamed: 0,Mean,Std Dev
Model,85.467187,7.983707
Actual,91.4375,18.60097
Diff,13.770937,8.881216


In [46]:
def model_accuracy_stats(model_points_results, real_points_results):
  points_df = pd.merge(model_points_results, real_points_results, on='Team')
  points_df['Diff'] = (np.abs(points_df['PTS_y']-points_df['PTS_x']))

  data = ['ttest_ind', 'f_oneway', 'mannwhitneyu',
                     'ranksums', 'brunnermunzel', 'mood', 'ansari',
                     'ks_2samp', 'kruskal', 'fligner',
                     'levene', 'bartlett', 'median_test']
  stats = []
  p_value = []

  stats.append(scs.ttest_ind(points_df['PTS_x'], points_df['PTS_y'], equal_var=False)[0])
  stats.append(scs.f_oneway(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.mannwhitneyu(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.ranksums(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.brunnermunzel(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.mood(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.ansari(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.ks_2samp(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.kruskal(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.fligner(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.levene(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.bartlett(points_df['PTS_x'], points_df['PTS_y'])[0])
  stats.append(scs.median_test(points_df['PTS_x'], points_df['PTS_y'])[0])

  p_value.append(scs.ttest_ind(points_df['PTS_x'], points_df['PTS_y'], equal_var=False)[1])
  p_value.append(scs.f_oneway(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.mannwhitneyu(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.ranksums(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.brunnermunzel(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.mood(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.ansari(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.ks_2samp(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.kruskal(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.fligner(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.levene(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.bartlett(points_df['PTS_x'], points_df['PTS_y'])[1])
  p_value.append(scs.median_test(points_df['PTS_x'], points_df['PTS_y'])[1])

  data = pd.DataFrame([data, stats, p_value])
  data = data.transpose()
  data = data.rename(columns={0:'Test', 1:'Statistic', 2:'P-value'})

  MeanAE = np.sum(points_df['Diff'])/points_df['Diff'].shape[0]
  RSS = np.sum((points_df['Diff'])**2)
  print(f'The Mean Absolute Error is: {MeanAE}')
  print(f'The Residual Sum of Squares is: {RSS}')
  display(data)

In [48]:
model_accuracy_stats(mean_pts, real_scores)

The Mean Absolute Error is: 13.770937499999999
The Residual Sum of Squares is: 8592.4709


Unnamed: 0,Test,Statistic,P-value
0,ttest_ind,-1.6422,0.108008
1,f_oneway,2.696821,0.105612
2,mannwhitneyu,374.0,0.064837
3,ranksums,-1.852947,0.06389
4,brunnermunzel,1.808973,0.078146
5,mood,-3.507039,0.000453
6,ansari,648.0,0.001261
7,ks_2samp,0.40625,0.009516
8,kruskal,3.433964,0.063869
9,fligner,12.763282,0.000353
