<a href="https://colab.research.google.com/github/alb495/bettingModelAttempt/blob/main/HockeyTalk.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Hockey Modeling

Putting together hockey data from NHL Edge API

1. Create a data frame of the current standings
2. Print the top of the overall standings (Not sorted by division or conference)

In [1]:
import pandas as pd
import requests

def get_nhl_standings():
    # The new official NHL web API endpoint (NHL Edge)
    url = "https://api-web.nhle.com/v1/standings/now"
    response = requests.get(url)
    data = response.json()

    standings = []
    for team in data['standings']:
        standings.append({
            'team': team['teamName']['default'],
            'abbrev': team['teamAbbrev']['default'],
            'wins': team['wins'],
            'losses': team['losses'],
            'ot_losses': team['otLosses'],
            'points': team['points'],
            'goals_for': team['goalFor'],
            'goals_against': team['goalAgainst'],
            'l10_wins': team['l10Wins'],
            'streak': team['streakCount'] if team['streakCode'] == 'W' else -team['streakCount']
        })
    #1.
    return pd.DataFrame(standings)
#2.
df_nhl = get_nhl_standings()
print(df_nhl.head())

                   team abbrev  wins  losses  ot_losses  points  goals_for  \
0    Colorado Avalanche    COL    24       2          7      55        133   
1          Dallas Stars    DAL    22       7          5      49        115   
2   Carolina Hurricanes    CAR    22       9          2      46        112   
3        Minnesota Wild    MIN    20       9          5      45        104   
4  Vegas Golden Knights    VGK    16       6         10      42         97   

   goals_against  l10_wins  streak  
0             77         7       3  
1             90         7       1  
2             91         8       5  
3             87         7       5  
4             92         6      -1  


Power Ratings

1. Feature Engineering
2. The NHL Power Model.
We weight Goal Differential (50%) and Recent Form (50%)
3. Sort by our new ML-derived Power Rating

In [2]:
# 1.
df_nhl['games_played'] = df_nhl['wins'] + df_nhl['losses'] + df_nhl['ot_losses']
df_nhl['goal_diff_per_game'] = (df_nhl['goals_for'] - df_nhl['goals_against']) / df_nhl['games_played']
df_nhl['l10_win_pct'] = df_nhl['l10_wins'] / 10

# 2.
df_nhl['power_rating'] = (df_nhl['goal_diff_per_game'] * 10) + (df_nhl['l10_win_pct'] * 10) + (df_nhl['streak'] * 0.5)

# 3.
nhl_rankings = df_nhl.sort_values('power_rating', ascending=False)
print("\n--- 2026 NHL POWER RANKINGS ---")
print(nhl_rankings[['team', 'power_rating', 'points']].head(20))


--- 2026 NHL POWER RANKINGS ---
                    team  power_rating  points
0     Colorado Avalanche     25.469697      55
2    Carolina Hurricanes     16.863636      46
1           Dallas Stars     14.852941      49
3         Minnesota Wild     14.500000      45
9    Washington Capitals     10.454545      40
10         Boston Bruins      9.558824      40
11   Tampa Bay Lightning      8.954545      39
13      Florida Panthers      8.409091      38
8    Philadelphia Flyers      7.687500      40
6     New York Islanders      7.264706      41
5          Anaheim Ducks      7.147059      42
4   Vegas Golden Knights      7.062500      42
19          Utah Mammoth      6.888889      37
21   Toronto Maple Leafs      6.812500      35
15       Edmonton Oilers      5.911765      38
18       San Jose Sharks      4.264706      37
22       Ottawa Senators      3.562500      34
7      Detroit Red Wings      3.500000      41
25        Buffalo Sabres      3.062500      32
16   Pittsburgh Penguins   

#Save Power Rating Results to CSV


In [3]:
nhl_rankings.to_csv('nhl_2026_power_rankings.csv', index=False)

#Monte Carlo Simulation of the NHL Season

1. Get data proper. Use the 'power_rating' we built earlier for each team
  - For this INCOMPLETE simulation, we'll use a simplified probability based on Rating Diff
  - The RF model would take more features in a full version (H2H, rest days, etc.
2. Simulate the rest of the season
  - Run 100 season simulations
  - Add points based on simulated win probability
  - In a full sim, we'd iterate through actual upcoming matchups
3. Show Predicted Final Standings
  - This shows what playing 100 more games would do
  - It is not indicative of the remainder of this season

In [5]:
import pandas as pd
import numpy as np
import requests
from sklearn.ensemble import RandomForestClassifier

# 1.
team_stats = nhl_rankings.set_index('abbrev')['power_rating'].to_dict()

def predict_game(home_abr, away_abr, rf_model):
    # Feature: Difference in Power Ratings
    rating_diff = team_stats.get(home_abr, 0) - team_stats.get(away_abr, 0)
    prob_home_win = 1 / (1 + np.exp(-0.5 * rating_diff)) # Sigmoid curve
    return 1 if np.random.random() < prob_home_win else 0

# 2.
# Note: These are not the actual games remaining
# This just works because there are enough games left to make it random
remaining_games = 1200 - len(df_nhl) # Simplified estimate
simulated_standings = df_nhl.set_index('team')['points'].to_dict()

for _ in range(100):
    for team, current_pts in simulated_standings.items():
        win_chance = 0.5 + (nhl_rankings.loc[nhl_rankings['team'] == team, 'power_rating'].values[0] / 100)
        if np.random.random() < win_chance:
            simulated_standings[team] += 2 # Simplified 2 pts for a win

# 3.
final_df = pd.DataFrame(list(simulated_standings.items()), columns=['Team', 'Predicted_Points'])
final_df = final_df.sort_values('Predicted_Points', ascending=False)
print(final_df.head(32))

                     Team  Predicted_Points
0      Colorado Avalanche               209
2     Carolina Hurricanes               192
1            Dallas Stars               181
3          Minnesota Wild               177
13       Florida Panthers               168
4    Vegas Golden Knights               166
8     Philadelphia Flyers               166
25         Buffalo Sabres               158
9     Washington Capitals               156
5           Anaheim Ducks               156
19           Utah Mammoth               155
12      New Jersey Devils               155
10          Boston Bruins               154
18        San Jose Sharks               153
11    Tampa Bay Lightning               151
14     MontrÃ©al Canadiens               150
6      New York Islanders               149
7       Detroit Red Wings               149
15        Edmonton Oilers               148
27     Chicago Blackhawks               146
17      Los Angeles Kings               139
24        St. Louis Blues      

#Get Remaining Games
1. Get abbreviations
2. Fetch and filter remaining games
  - Make sure not to count Home vs Away twice as Away vs Home
3. Iterate through all teams getting their remaining schedule
4. Only grab games that haven't been played yet (GameState = 'FUT' or 'PRE')



In [7]:
# 1.
standings_url = "https://api-web.nhle.com/v1/standings/now"
standings_data = requests.get(standings_url).json()
teams = [t['teamAbbrev']['default'] for t in standings_data['standings']]

# 2.
all_remaining_games = []
seen_games = set() # To avoid counting Home vs Away twice

print("Fetching schedules...")
for team in teams:
    # 3.
    sched_url = f"https://api-web.nhle.com/v1/club-schedule-season/{team}/20252026"
    sched_data = requests.get(sched_url).json()

    for game in sched_data['games']:
        # 4.
        if game['gameState'] in ['FUT', 'PRE']:
            game_id = game['id']
            if game_id not in seen_games:
                all_remaining_games.append({
                    'id': game_id,
                    'home': game['homeTeam']['abbrev'],
                    'away': game['awayTeam']['abbrev']
                })
                seen_games.add(game_id)

print(f"Total remaining games found: {len(all_remaining_games)}")

Fetching schedules...
Total remaining games found: 779


#Prediciting With Real Schedule and Live Rating
1.  Create a simple lookup for our Power Ratings. (Ensure nhl_rankings is defined from our previous step)
2. Simulate each game
  - ML logic: Higher rating is a higher probability to win
  - "Home Ice" advantage included as +0.5 rating boost
  - Calculate using logistic function
3. Convert to Final Table
  - Top 16 are "Playoff Teams"
  - This ignores the conferences and disivions for the "Super 16"

In [8]:
# 1.
power_dict = nhl_rankings.set_index('abbrev')['power_rating'].to_dict()

# 2.
final_sim_pts = df_nhl.set_index('abbrev')['points'].to_dict()

for game in all_remaining_games:
    h_rating = power_dict.get(game['home'], 0)
    a_rating = power_dict.get(game['away'], 0)

    # ML Logic: Higher rating = Higher win probability
    # We add a +0.5 'Home Ice' bonus
    diff = (h_rating + 0.5) - a_rating

    # Calculate win probability using a logistic function
    win_prob = 1 / (1 + np.exp(-diff/5))

    if np.random.random() < win_prob:
        final_sim_pts[game['home']] += 2 # Home wins
    else:
        final_sim_pts[game['away']] += 2 # Away wins

# 3.
final_standings = pd.DataFrame(list(final_sim_pts.items()), columns=['Team', 'Projected_Points'])
final_standings = final_standings.sort_values('Projected_Points', ascending=False)

print("\n--- PREDICTED FINAL 2026 STANDINGS ---")
print(final_standings.head(16))


--- PREDICTED FINAL 2026 STANDINGS ---
   Team  Projected_Points
0   COL               153
2   CAR               132
3   MIN               127
1   DAL               123
6   NYI               117
9   WSH               110
13  FLA               110
11  TBL               107
4   VGK               104
5   ANA               104
10  BOS               100
8   PHI                98
19  UTA                93
18  SJS                87
21  TOR                87
15  EDM                80


#Live Simulation Attempt
This cell uses the real-time schedule to simulate the future.

1. Setup the model
  - a.  Use .values here to strip feature names and avoid warnings during prediction

2. Define the Simulation Function
  - a. Train the model
  - b. Create a lookup for stats
  - c. Start with current points
  - d. Simulate each game in a loop
    - Get stats for both teams
    - Create feature array (Home stats - Away stats)
    - Get Home Win probability
    - Award teams points
    - Format Results
3. Run and Display Output

In [16]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# 1.
def train_nhl_model(df):
    # a.
    X = df[['goal_diff_per_game', 'l10_win_pct', 'streak']].values
    y = (df['power_rating'] > df['power_rating'].median()).astype(int)

    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X, y)
    return rf

# 2.
def run_live_simulation(rankings_df, remaining_games_list):
    # a.
    rf_model = train_nhl_model(rankings_df)

    # b.
    stats_lookup = rankings_df.set_index('abbrev').to_dict('index')

    # c.
    projected_points = rankings_df.set_index('abbrev')['points'].to_dict()

    # d.
    for game in remaining_games_list:
        home, away = game['home'], game['away']

        # Stats
        h = stats_lookup.get(home, {'goal_diff_per_game':0, 'l10_win_pct':0.5, 'streak':0})
        a = stats_lookup.get(away, {'goal_diff_per_game':0, 'l10_win_pct':0.5, 'streak':0})

        # Feature Array
        h_vals = np.array([h['goal_diff_per_game'], h['l10_win_pct'], h['streak']])
        a_vals = np.array([a['goal_diff_per_game'], a['l10_win_pct'], a['streak']])
        diff_features = (h_vals - a_vals).reshape(1, -1)

        # Home Win Prob
        probs = rf_model.predict_proba(diff_features)[0]

        # Award Points
        if np.random.random() < probs[1]:
            projected_points[home] += 2 # Home team wins
        else:
            projected_points[away] += 2 # Away team wins

    # Format the results
    results_df = pd.DataFrame(list(projected_points.items()), columns=['abbrev', 'Final_Points'])
    results_df = results_df.merge(rankings_df[['abbrev', 'team']], on='abbrev')
    return results_df.sort_values('Final_Points', ascending=False)

# 3.
final_output = run_live_simulation(nhl_rankings, all_remaining_games)

print("\n--- FINAL PROJECTED 2026 STANDINGS (RF MODEL) ---")
print(final_output[['team', 'Final_Points']].head(16))


--- FINAL PROJECTED 2026 STANDINGS (RF MODEL) ---
                    team  Final_Points
0     Colorado Avalanche           139
2           Dallas Stars           127
1    Carolina Hurricanes           124
3         Minnesota Wild           123
6    Tampa Bay Lightning           113
5          Boston Bruins           112
8    Philadelphia Flyers           112
4    Washington Capitals           108
9     New York Islanders           105
7       Florida Panthers           104
10         Anaheim Ducks           102
11  Vegas Golden Knights            98
12          Utah Mammoth            95
14       Edmonton Oilers            88
13   Toronto Maple Leafs            87
23     Los Angeles Kings            83
