In [1]:
# make predictions on upcoming games


In [2]:
import pandas as pd

In [3]:
# read in upcoming games data
# write upcoming games to excel
upcoming_games = pd.read_excel(f'upcoming_nhl_games_2025.xlsx', header=0)
upcoming_games.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114 entries, 0 to 113
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Game Link        114 non-null    object        
 1   Home_Team        114 non-null    object        
 2   Away_Team        114 non-null    object        
 3   Time             114 non-null    object        
 4   Date             114 non-null    datetime64[ns]
 5   Season           114 non-null    int64         
 6   Day_of_Week      114 non-null    object        
 7   Month            114 non-null    object        
 8   Game_Start_Hour  114 non-null    int64         
dtypes: datetime64[ns](1), int64(2), object(6)
memory usage: 8.1+ KB


In [4]:
# trim to games that are upcoming today only
upcoming_games_trim = upcoming_games[upcoming_games['Date'] == pd.Timestamp.now().normalize()]
upcoming_games_trim

Unnamed: 0,Game Link,Home_Team,Away_Team,Time,Date,Season,Day_of_Week,Month,Game_Start_Hour
0,https://www.flashscore.com/match/hockey/caroli...,Carolina Hurricanes,Vancouver Canucks,19:00,2025-11-14,2025,Friday,November,19
1,https://www.flashscore.com/match/hockey/philad...,St. Louis Blues,Philadelphia Flyers,20:00,2025-11-14,2025,Friday,November,20
2,https://www.flashscore.com/match/hockey/new-yo...,Utah Mammoth,New York Islanders,21:00,2025-11-14,2025,Friday,November,21


In [5]:
# read in modeling data
modeling_data = pd.read_excel(r'data/modeling_data.xlsx', header=0)

# filter to target season
target_season = upcoming_games_trim['Season'].iloc[0]
modeling_data = modeling_data[modeling_data['Season'] == target_season]

# drop cols that contain P1-P3 in the col name
modeling_data = modeling_data[modeling_data.columns.drop(list(modeling_data.filter(regex='P[1-3]')))]

# drop other cols
# list of drop cols that won't be used in modeling
drop_cols = ['Game_ID', 'Reg_Home_Win', 'Reg_Away_Win',
         'Conf_Pair', 'Team_Pair', 'Div_Pair', 'Conf_Matchup', 'Div_Matchup',
        'Odds_1', 'Odds_2', 'Odds_X', 'Start_Hour_Group', 'Season', 'Month', 'Day_of_Week',
        'Reg_Tie'
    ]

modeling_data = modeling_data.drop(columns=drop_cols)

# inspect
modeling_data.info()
modeling_data.head()


<class 'pandas.core.frame.DataFrame'>
Index: 374 entries, 89 to 3404
Data columns (total 17 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   Date                              374 non-null    datetime64[ns]
 1   Home_Team                         374 non-null    object        
 2   Away_Team                         374 non-null    object        
 3   prop_Reg_Home_Win_Home            374 non-null    float64       
 4   prop_Reg_Away_Win_Home            374 non-null    float64       
 5   prop_Reg_Tie_Home                 374 non-null    float64       
 6   prop_reg_home_goal_diff_Home      374 non-null    float64       
 7   prop_reg_away_goal_diff_Home      374 non-null    float64       
 8   avg_reg_home_goals_per_game_Home  374 non-null    float64       
 9   avg_reg_away_goals_per_game_Home  374 non-null    float64       
 10  prop_Reg_Home_Win_Away            374 non-null    flo

Unnamed: 0,Date,Home_Team,Away_Team,prop_Reg_Home_Win_Home,prop_Reg_Away_Win_Home,prop_Reg_Tie_Home,prop_reg_home_goal_diff_Home,prop_reg_away_goal_diff_Home,avg_reg_home_goals_per_game_Home,avg_reg_away_goals_per_game_Home,prop_Reg_Home_Win_Away,prop_Reg_Away_Win_Away,prop_Reg_Tie_Away,prop_reg_home_goal_diff_Away,prop_reg_away_goal_diff_Away,avg_reg_home_goals_per_game_Away,avg_reg_away_goals_per_game_Away
89,2025-09-22,Anaheim Ducks,Utah Mammoth,1.0,0.0,0.0,0.857143,0.142857,6.0,1.0,1.0,0.0,0.0,0.75,0.25,4.5,1.5
90,2025-09-24,Anaheim Ducks,Los Angeles Kings,0.5,0.5,0.0,0.6,0.4,3.0,2.0,0.0,1.0,0.0,0.142857,0.857143,0.5,3.0
91,2025-09-29,Anaheim Ducks,San Jose Sharks,0.666667,0.333333,0.0,0.6,0.4,3.0,2.0,1.0,0.0,0.0,0.6,0.4,3.0,2.0
92,2025-10-14,Anaheim Ducks,Pittsburgh Penguins,0.75,0.25,0.0,0.590909,0.409091,3.25,2.25,0.333333,0.5,0.166667,0.464286,0.535714,2.166667,2.5
93,2025-10-16,Anaheim Ducks,Carolina Hurricanes,0.6,0.4,0.0,0.518519,0.481481,2.8,2.6,0.2,0.4,0.4,0.40625,0.59375,2.6,3.8


In [6]:
# store list of home teams and away teams in upcoming games
home_teams = upcoming_games_trim['Home_Team'].unique().tolist()
away_teams = upcoming_games_trim['Away_Team'].unique().tolist()

print(f'Home Teams: {home_teams}')
print(f'Away Teams: {away_teams}')

Home Teams: ['Carolina Hurricanes', 'St. Louis Blues', 'Utah Mammoth']
Away Teams: ['Vancouver Canucks', 'Philadelphia Flyers', 'New York Islanders']


In [7]:

# filter to teams in upcoming games
home_data = modeling_data[modeling_data['Home_Team'].isin(home_teams)]
away_data = modeling_data[modeling_data['Away_Team'].isin(away_teams)]

# get last row for each Home_Team
last_rows_home = home_data.groupby('Home_Team').tail(1).reset_index(drop=True)

# drop cols that end with _Away
last_rows_home = last_rows_home[last_rows_home.columns.drop(list(last_rows_home.filter(regex='_Away$')))]

# drop Away_Team
last_rows_home = last_rows_home.drop(columns=['Away_Team', 'Date'])
last_rows_home

Unnamed: 0,Home_Team,prop_Reg_Home_Win_Home,prop_Reg_Away_Win_Home,prop_Reg_Tie_Home,prop_reg_home_goal_diff_Home,prop_reg_away_goal_diff_Home,avg_reg_home_goals_per_game_Home,avg_reg_away_goals_per_game_Home
0,Carolina Hurricanes,0.5,0.4,0.1,0.529412,0.470588,3.6,3.2
1,St. Louis Blues,0.307692,0.461538,0.230769,0.440476,0.559524,2.846154,3.615385
2,Utah Mammoth,0.555556,0.333333,0.111111,0.518519,0.481481,3.111111,2.888889


In [8]:

# get last row for each Away_Team
last_rows_away = away_data.groupby('Away_Team').tail(1).reset_index(drop=True)

# drop cols that end with _Home
last_rows_away = last_rows_away[last_rows_away.columns.drop(list(last_rows_away.filter(regex='_Home$')))]

# drop Home_Team
last_rows_away = last_rows_away.drop(columns=['Home_Team', 'Date'])

last_rows_away.info()
last_rows_away.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 8 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Away_Team                         3 non-null      object 
 1   prop_Reg_Home_Win_Away            3 non-null      float64
 2   prop_Reg_Away_Win_Away            3 non-null      float64
 3   prop_Reg_Tie_Away                 3 non-null      float64
 4   prop_reg_home_goal_diff_Away      3 non-null      float64
 5   prop_reg_away_goal_diff_Away      3 non-null      float64
 6   avg_reg_home_goals_per_game_Away  3 non-null      float64
 7   avg_reg_away_goals_per_game_Away  3 non-null      float64
dtypes: float64(7), object(1)
memory usage: 320.0+ bytes


Unnamed: 0,Away_Team,prop_Reg_Home_Win_Away,prop_Reg_Away_Win_Away,prop_Reg_Tie_Away,prop_reg_home_goal_diff_Away,prop_reg_away_goal_diff_Away,avg_reg_home_goals_per_game_Away,avg_reg_away_goals_per_game_Away
0,Philadelphia Flyers,0.666667,0.0,0.333333,0.6875,0.3125,3.666667,1.666667
1,Vancouver Canucks,0.428571,0.428571,0.142857,0.446809,0.553191,3.0,3.714286
2,New York Islanders,0.444444,0.444444,0.111111,0.553846,0.446154,4.0,3.222222


In [9]:
# left join home and away data on team names
upcoming_games_final = pd.merge(upcoming_games_trim, last_rows_home, on='Home_Team', how='left')
upcoming_games_final = pd.merge(upcoming_games_final, last_rows_away, on='Away_Team', how='left')

upcoming_games_final.info()
upcoming_games_final.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 23 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   Game Link                         3 non-null      object        
 1   Home_Team                         3 non-null      object        
 2   Away_Team                         3 non-null      object        
 3   Time                              3 non-null      object        
 4   Date                              3 non-null      datetime64[ns]
 5   Season                            3 non-null      int64         
 6   Day_of_Week                       3 non-null      object        
 7   Month                             3 non-null      object        
 8   Game_Start_Hour                   3 non-null      int64         
 9   prop_Reg_Home_Win_Home            3 non-null      float64       
 10  prop_Reg_Away_Win_Home            3 non-null      floa

Unnamed: 0,Game Link,Home_Team,Away_Team,Time,Date,Season,Day_of_Week,Month,Game_Start_Hour,prop_Reg_Home_Win_Home,...,prop_reg_away_goal_diff_Home,avg_reg_home_goals_per_game_Home,avg_reg_away_goals_per_game_Home,prop_Reg_Home_Win_Away,prop_Reg_Away_Win_Away,prop_Reg_Tie_Away,prop_reg_home_goal_diff_Away,prop_reg_away_goal_diff_Away,avg_reg_home_goals_per_game_Away,avg_reg_away_goals_per_game_Away
0,https://www.flashscore.com/match/hockey/caroli...,Carolina Hurricanes,Vancouver Canucks,19:00,2025-11-14,2025,Friday,November,19,0.5,...,0.470588,3.6,3.2,0.428571,0.428571,0.142857,0.446809,0.553191,3.0,3.714286
1,https://www.flashscore.com/match/hockey/philad...,St. Louis Blues,Philadelphia Flyers,20:00,2025-11-14,2025,Friday,November,20,0.307692,...,0.559524,2.846154,3.615385,0.666667,0.0,0.333333,0.6875,0.3125,3.666667,1.666667
2,https://www.flashscore.com/match/hockey/new-yo...,Utah Mammoth,New York Islanders,21:00,2025-11-14,2025,Friday,November,21,0.555556,...,0.481481,3.111111,2.888889,0.444444,0.444444,0.111111,0.553846,0.446154,4.0,3.222222


In [10]:
# read in trained model
import joblib

basic_model = joblib.load('model/catboost_model_reg_tie.pkl')


In [11]:
# isolate features model needs for upcoming games
feature_cols = basic_model.feature_names_

# trim cols for data to predict
predict_df = upcoming_games_final[feature_cols]
predict_df.info()
predict_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 17 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Season                            3 non-null      int64  
 1   Month                             3 non-null      object 
 2   Day_of_Week                       3 non-null      object 
 3   prop_Reg_Home_Win_Home            3 non-null      float64
 4   prop_Reg_Away_Win_Home            3 non-null      float64
 5   prop_Reg_Tie_Home                 3 non-null      float64
 6   prop_reg_home_goal_diff_Home      3 non-null      float64
 7   prop_reg_away_goal_diff_Home      3 non-null      float64
 8   avg_reg_home_goals_per_game_Home  3 non-null      float64
 9   avg_reg_away_goals_per_game_Home  3 non-null      float64
 10  prop_Reg_Home_Win_Away            3 non-null      float64
 11  prop_Reg_Away_Win_Away            3 non-null      float64
 12  prop_Reg_Tie

Unnamed: 0,Season,Month,Day_of_Week,prop_Reg_Home_Win_Home,prop_Reg_Away_Win_Home,prop_Reg_Tie_Home,prop_reg_home_goal_diff_Home,prop_reg_away_goal_diff_Home,avg_reg_home_goals_per_game_Home,avg_reg_away_goals_per_game_Home,prop_Reg_Home_Win_Away,prop_Reg_Away_Win_Away,prop_Reg_Tie_Away,prop_reg_home_goal_diff_Away,prop_reg_away_goal_diff_Away,avg_reg_home_goals_per_game_Away,avg_reg_away_goals_per_game_Away
0,2025,November,Friday,0.5,0.4,0.1,0.529412,0.470588,3.6,3.2,0.428571,0.428571,0.142857,0.446809,0.553191,3.0,3.714286
1,2025,November,Friday,0.307692,0.461538,0.230769,0.440476,0.559524,2.846154,3.615385,0.666667,0.0,0.333333,0.6875,0.3125,3.666667,1.666667
2,2025,November,Friday,0.555556,0.333333,0.111111,0.518519,0.481481,3.111111,2.888889,0.444444,0.444444,0.111111,0.553846,0.446154,4.0,3.222222


In [12]:
from catboost import CatBoostClassifier

# make predictions
# 5. Predict and evaluate proba
y_pred_proba = basic_model.predict_proba(predict_df)[:, 1]

# add preds toupcoming_games_final
upcoming_games_final['Pred_Tie_Proba'] = y_pred_proba

# inspect
inspect_cols = ['Date', 'Time', 'Home_Team', 'Away_Team', 'Pred_Tie_Proba'] + feature_cols
upcoming_games_final[inspect_cols].sort_values(by='Pred_Tie_Proba', ascending=False)


Unnamed: 0,Date,Time,Home_Team,Away_Team,Pred_Tie_Proba,Season,Month,Day_of_Week,prop_Reg_Home_Win_Home,prop_Reg_Away_Win_Home,...,prop_reg_away_goal_diff_Home,avg_reg_home_goals_per_game_Home,avg_reg_away_goals_per_game_Home,prop_Reg_Home_Win_Away,prop_Reg_Away_Win_Away,prop_Reg_Tie_Away,prop_reg_home_goal_diff_Away,prop_reg_away_goal_diff_Away,avg_reg_home_goals_per_game_Away,avg_reg_away_goals_per_game_Away
1,2025-11-14,20:00,St. Louis Blues,Philadelphia Flyers,0.691516,2025,November,Friday,0.307692,0.461538,...,0.559524,2.846154,3.615385,0.666667,0.0,0.333333,0.6875,0.3125,3.666667,1.666667
0,2025-11-14,19:00,Carolina Hurricanes,Vancouver Canucks,0.256656,2025,November,Friday,0.5,0.4,...,0.470588,3.6,3.2,0.428571,0.428571,0.142857,0.446809,0.553191,3.0,3.714286
2,2025-11-14,21:00,Utah Mammoth,New York Islanders,0.15201,2025,November,Friday,0.555556,0.333333,...,0.481481,3.111111,2.888889,0.444444,0.444444,0.111111,0.553846,0.446154,4.0,3.222222
