In [1]:
import os
import sys
from pathlib import Path

# lägg till projektroten (mappen ovanför notebooks/) på sys.path
root_dir = Path().absolute()

if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])

root_dir = str(root_dir) 
print(f"Root dir: {root_dir}")
print("Local environment")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Root dir: /Users/jacobbjareklint/Code/GitHub/hockey-agent
Local environment
Added the following directory to the PYTHONPATH: /Users/jacobbjareklint/Code/GitHub/hockey-agent


In [None]:
import hopsworks
from config import settings
import requests
import pandas as pd
import util

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY,
    host = settings.HOPSWORKS_HOST
)


2025-12-28 13:36:48,836 INFO: Initializing external client
2025-12-28 13:36:48,837 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-28 13:36:49,685 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3193


In [None]:
season_ids = util.generate_season_ids(2000)
season_ids[:5], season_ids[-3:]

(['20002001', '20012002', '20022003', '20032004', '20042005'],
 ['20232024', '20242025', '20252026'])

In [None]:
games_df = util.fetch_teams()

team_id_to_name = dict(
    zip(games_df["id"], games_df["fullName"])
)


In [None]:
all_teams = []

for season_id in season_ids:
    try:
        print(f"Hämtar säsong {season_id}")
        df_season = util.fetch_games_from_nhl(season_id)
        all_teams.append(df_season)
    except Exception as e:
        print(f"Misslyckades för {season_id}: {e}")

games_df = pd.concat(all_teams, ignore_index=True)
games_df

Hämtar säsong 20002001
Hämtar säsong 20012002
Hämtar säsong 20022003
Hämtar säsong 20032004
Hämtar säsong 20042005
Hämtar säsong 20052006
Hämtar säsong 20062007
Hämtar säsong 20072008
Hämtar säsong 20082009
Hämtar säsong 20092010
Hämtar säsong 20102011
Hämtar säsong 20112012
Hämtar säsong 20122013
Hämtar säsong 20132014
Hämtar säsong 20142015
Hämtar säsong 20152016
Hämtar säsong 20162017
Hämtar säsong 20172018
Hämtar säsong 20182019
Hämtar säsong 20192020
Hämtar säsong 20202021
Hämtar säsong 20212022
Hämtar säsong 20222023
Hämtar säsong 20232024
Hämtar säsong 20242025
Hämtar säsong 20252026


Unnamed: 0,id,easternStartTime,gameDate,gameNumber,gameScheduleStateId,gameStateId,gameType,homeScore,homeTeamId,period,season,visitingScore,visitingTeamId
0,2000020001,2000-10-04T19:00:00,2000-10-04,1,1,7,2,2,25,4,20002001,2,21
1,2000020002,2000-10-05T19:00:00,2000-10-05,2,1,7,2,4,6,4,20002001,4,9
2,2000020003,2000-10-05T19:00:00,2000-10-05,3,1,7,2,4,7,3,20002001,2,16
3,2000020004,2000-10-05T19:00:00,2000-10-05,4,1,7,2,6,4,3,20002001,3,23
4,2000020005,2000-10-05T19:00:00,2000-10-05,5,1,7,2,3,20,3,20002001,4,17
...,...,...,...,...,...,...,...,...,...,...,...,...,...
30217,2025021308,2026-04-16T20:00:00,2026-04-16,1308,1,1,2,0,52,1,20252026,0,28
30218,2025021309,2026-04-16T20:00:00,2026-04-16,1309,1,1,2,0,68,1,20252026,0,19
30219,2025021310,2026-04-16T21:00:00,2026-04-16,1310,1,1,2,0,20,1,20252026,0,26
30220,2025021311,2026-04-16T21:00:00,2026-04-16,1311,1,1,2,0,22,1,20252026,0,23


In [None]:
games_df = games_df.rename(columns={
    col: util.to_snake(col) for col in games_df.columns
})

games_df["home_team_name"] = games_df["home_team_id"].map(team_id_to_name)
games_df["away_team_name"] = games_df["visiting_team_id"].map(team_id_to_name)

games_df

Unnamed: 0,id,eastern_start_time,game_date,game_number,game_schedule_state_id,game_state_id,game_type,home_score,home_team_id,period,season,visiting_score,visiting_team_id,home_team_name,away_team_name
0,2000020001,2000-10-04T19:00:00,2000-10-04,1,1,7,2,2,25,4,20002001,2,21,Dallas Stars,Colorado Avalanche
1,2000020002,2000-10-05T19:00:00,2000-10-05,2,1,7,2,4,6,4,20002001,4,9,Boston Bruins,Ottawa Senators
2,2000020003,2000-10-05T19:00:00,2000-10-05,3,1,7,2,4,7,3,20002001,2,16,Buffalo Sabres,Chicago Blackhawks
3,2000020004,2000-10-05T19:00:00,2000-10-05,4,1,7,2,6,4,3,20002001,3,23,Philadelphia Flyers,Vancouver Canucks
4,2000020005,2000-10-05T19:00:00,2000-10-05,5,1,7,2,3,20,3,20002001,4,17,Calgary Flames,Detroit Red Wings
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30217,2025021308,2026-04-16T20:00:00,2026-04-16,1308,1,1,2,0,52,1,20252026,0,28,Winnipeg Jets,San Jose Sharks
30218,2025021309,2026-04-16T20:00:00,2026-04-16,1309,1,1,2,0,68,1,20252026,0,19,Utah Mammoth,St. Louis Blues
30219,2025021310,2026-04-16T21:00:00,2026-04-16,1310,1,1,2,0,20,1,20252026,0,26,Calgary Flames,Los Angeles Kings
30220,2025021311,2026-04-16T21:00:00,2026-04-16,1311,1,1,2,0,22,1,20252026,0,23,Edmonton Oilers,Vancouver Canucks


In [8]:
fs = project.get_feature_store()

matches_fg = fs.get_or_create_feature_group(
    name="matches",
    description="NHL matches per season since 2000",
    version=1,
    primary_key=["id"]
)

matches_fg.insert(games_df)

Uploading Dataframe: 100.00% |██████████| Rows 30222/30222 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: matches_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/matches_1_offline_fg_materialization/executions


(Job('matches_1_offline_fg_materialization', 'SPARK'), None)

Here we get the form of the players

In [None]:
all_players = []

for season_id in [season_ids[-2], season_ids[-1]]: # Only take the two latest seasons. 
    try:
        print(f"Hämtar säsong {season_id}")
        df_season = util.fetch_player_form_for_season(season_id)
        all_players.append(df_season)
    except Exception as e:
        print(f"Misslyckades för {season_id}: {e}")

players_df = pd.concat(all_players, ignore_index=True)
print(players_df.shape)

Hämtar säsong 20242025
Downloading 2024-10-01 to 2024-10-31 for season 20242025
Shape: (5975, 29)
Downloading 2024-11-01 to 2024-11-30 for season 20242025
Shape: (7920, 29)
Downloading 2024-12-01 to 2024-12-31 for season 20242025
Shape: (7704, 29)
Downloading 2025-01-01 to 2025-01-31 for season 20242025
Shape: (8063, 29)
Downloading 2025-02-01 to 2025-02-28 for season 20242025
Shape: (4392, 29)
Downloading 2025-03-01 to 2025-03-31 for season 20242025
Shape: (8423, 29)
Downloading 2025-04-01 to 2025-04-30 for season 20242025
Shape: (4747, 29)
Hämtar säsong 20252026
Downloading 2025-10-01 to 2025-10-31 for season 20252026
Shape: (6480, 29)
Downloading 2025-11-01 to 2025-11-30 for season 20252026
Shape: (8100, 29)
Downloading 2025-12-01 to 2025-12-31 for season 20252026
Shape: (5615, 29)
Downloading 2026-01-01 to 2026-01-31 for season 20252026
Downloading 2026-02-01 to 2026-02-28 for season 20252026
Downloading 2026-03-01 to 2026-03-31 for season 20252026
Downloading 2026-04-01 to 2026-04

In [None]:
players_df = players_df.rename(columns={
    col: util.to_snake(col) for col in players_df.columns
})
players_df

Unnamed: 0,assists,ev_goals,ev_points,faceoff_win_pct,game_date,game_id,game_winning_goals,games_played,goals,home_road,...,pp_points,sh_goals,sh_points,shooting_pct,shoots_catches,shots,skater_full_name,team_abbrev,time_on_ice_per_game,season_id
0,1,0,1,,2024-10-14,2024020045,0,1,0,H,...,0,0,0,0.00000,R,1,Jacob Trouba,NYR,1284.0,20242025
1,0,0,0,,2024-10-28,2024020140,0,1,0,H,...,0,0,0,0.00000,R,1,Connor Clifton,BUF,974.0,20242025
2,1,0,1,0.0,2024-10-22,2024020096,0,1,0,H,...,0,0,0,0.00000,R,1,Josh Anderson,MTL,982.0,20242025
3,1,0,1,,2024-10-12,2024020026,0,1,0,H,...,0,0,0,0.00000,L,2,Morgan Rielly,TOR,1208.0,20242025
4,1,0,1,,2024-10-17,2024020061,0,1,0,H,...,0,0,0,0.00000,R,3,Cole Caufield,MTL,1327.0,20242025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67414,0,0,0,,2025-12-16,2025020526,0,1,0,H,...,0,0,0,,R,0,David Jiricek,MIN,851.0,20252026
67415,0,2,2,,2025-12-18,2025020539,0,1,2,H,...,0,0,0,0.66666,L,3,Jakob Chychrun,WSH,1451.0,20252026
67416,0,1,1,,2025-12-19,2025020547,0,1,1,R,...,0,0,0,1.00000,R,1,Connor Brown,NJD,1336.0,20252026
67417,1,0,1,0.6,2025-12-20,2025020549,0,1,0,H,...,0,0,0,,L,0,J.T. Miller,NYR,1124.0,20252026


In [11]:
matthews_df = players_df[players_df['skater_full_name'] == 'Auston Matthews']
print(matthews_df)

       assists  ev_goals  ev_points  faceoff_win_pct   game_date     game_id  \
354          0         0          0          0.69565  2024-10-24  2024020110   
992          1         0          1          0.57142  2024-10-28  2024020143   
1743         1         1          2          0.64705  2024-10-31  2024020161   
1801         0         0          0          0.60000  2024-10-12  2024020026   
2307         0         0          0          0.41666  2024-10-22  2024020097   
...        ...       ...        ...              ...         ...         ...   
63968        1         0          1          0.90000  2025-12-02  2025020413   
63991        0         0          0          0.60000  2025-12-11  2025020480   
66281        1         0          1          0.63636  2025-12-16  2025020520   
66909        0         0          0          0.60000  2025-12-20  2025020558   
67128        0         0          0          0.72727  2025-12-18  2025020539   

       game_winning_goals  games_played

In [13]:
players_df["season_id"] = players_df["season_id"].astype(str)

In [14]:
fs = project.get_feature_store()

players_form_fg = fs.get_or_create_feature_group(
    name="players_form",
    description="NHL player in game stats, since 2000",
    version=1,
    primary_key=["player_id", "season_id", "game_id"]
)

players_form_fg.insert(players_df)

Uploading Dataframe: 100.00% |██████████| Rows 67419/67419 | Elapsed Time: 00:08 | Remaining Time: 00:00


Launching job: players_form_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/players_form_1_offline_fg_materialization/executions


(Job('players_form_1_offline_fg_materialization', 'SPARK'), None)

In [None]:
all_goalies = []

for season_id in [season_ids[-2], season_ids[-1]]: # Only take the two latest seasons. 
    try:
        print(f"Hämtar säsong {season_id}")
        df_season = util.fetch_goalie_form_for_season(season_id)
        all_goalies.append(df_season)
    except Exception as e:
        print(f"Misslyckades för {season_id}: {e}")

goalies_form_df = pd.concat(all_goalies, ignore_index=True)

goalies_form_df = goalies_form_df.rename(columns={
    col: util.to_snake(col) for col in goalies_form_df.columns
})
goalies_form_df = goalies_form_df.drop(columns=['ties'], errors='ignore')
goalies_form_df

Hämtar säsong 20242025
Hämtar säsong 20252026


Unnamed: 0,assists,game_date,game_id,games_played,games_started,goalie_full_name,goals,goals_against,goals_against_average,home_road,...,points,save_pct,saves,shoots_catches,shots_against,shutouts,team_abbrev,time_on_ice,wins,season_id
0,0,2025-01-26,2024020793,1,1,Spencer Knight,0,3,3.17460,R,...,0,0.88000,22,L,25,0,FLA,3402,0,20242025
1,0,2025-03-26,2024021140,1,1,Spencer Knight,0,4,4.20437,H,...,0,0.80000,16,L,20,0,CHI,3425,0,20242025
2,0,2025-01-16,2024020711,1,1,Elvis Merzlikins,0,1,1.00000,H,...,0,0.94736,18,L,19,0,CBJ,3600,1,20242025
3,0,2025-04-15,2024021297,1,1,Jordan Binnington,0,1,1.00000,H,...,0,0.95652,22,L,23,0,STL,3600,1,20242025
4,0,2024-11-11,2024020239,1,0,Samuel Montembeault,0,0,0.00000,R,...,0,1.00000,4,L,4,0,MTL,969,1,20242025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4022,0,2025-12-23,2025020581,1,1,Sergei Bobrovsky,0,2,2.01567,R,...,0,0.89473,17,L,19,0,FLA,3572,1,20252026
4023,0,2025-12-27,2025020598,1,1,Dustin Wolf,0,2,2.00000,H,...,0,0.93548,29,L,31,0,CGY,3600,1,20252026
4024,0,2025-12-27,2025020594,1,1,Jesper Wallstedt,0,3,3.07517,R,...,0,0.88461,23,L,26,0,MIN,3512,1,20252026
4025,0,2025-12-27,2025020590,1,0,Leevi Meriläinen,0,2,3.88140,R,...,0,0.88888,16,L,18,0,OTT,1855,0,20252026


In [6]:
# Skapa feature group
fs = project.get_feature_store()
goalies_form_fg = fs.get_or_create_feature_group(
    name="goalies_form",
    description="NHL goalie game-by-game stats",
    version=1,
    primary_key=["player_id","goalie_full_name", "season_id", "game_id"]
)
goalies_form_fg.insert(goalies_form_df)

Feature Group created successfully, explore it at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/fs/3140/fg/2201


Uploading Dataframe: 100.00% |██████████| Rows 4027/4027 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: goalies_form_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/goalies_form_1_offline_fg_materialization/executions


(Job('goalies_form_1_offline_fg_materialization', 'SPARK'), None)