In [1]:
import os
import sys
from pathlib import Path

# lägg till projektroten (mappen ovanför notebooks/) på sys.path
root_dir = Path().absolute()

if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])

root_dir = str(root_dir) 
print(f"Root dir: {root_dir}")
print("Local environment")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Root dir: /Users/jacobbjareklint/Code/GitHub/hockey-agent
Local environment
Added the following directory to the PYTHONPATH: /Users/jacobbjareklint/Code/GitHub/hockey-agent


In [2]:
import hopsworks
from config import settings
import requests
import pandas as pd
from util import *

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY,
    host = settings.HOPSWORKS_HOST
)

2025-12-20 18:27:54,846 INFO: Initializing external client
2025-12-20 18:27:54,846 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-20 18:27:56,239 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3193


In [4]:
teams_df = fetch_teams()

team_id_to_name = dict(
    zip(teams_df["id"], teams_df["fullName"])
)

In [5]:
import datetime
today = datetime.date.today()
season_id = get_season(today)

games_df = fetch_games_from_nhl(season_id)

games_df


Unnamed: 0,id,easternStartTime,gameDate,gameNumber,gameScheduleStateId,gameStateId,gameType,homeScore,homeTeamId,period,season,visitingScore,visitingTeamId
0,2025020001,2025-10-07T17:00:00,2025-10-07,1,1,7,2,3,13,3,20252026,2,16
1,2025020002,2025-10-07T20:00:00,2025-10-07,2,1,7,2,0,3,3,20252026,3,5
2,2025020003,2025-10-07T22:30:00,2025-10-07,3,1,7,2,1,26,3,20252026,4,21
3,2025020004,2025-10-08T19:00:00,2025-10-08,4,1,7,2,5,10,3,20252026,2,8
4,2025020005,2025-10-08T19:30:00,2025-10-08,5,1,7,2,1,15,3,20252026,3,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1307,2025021308,2026-04-16T20:00:00,2026-04-16,1308,1,1,2,0,52,1,20252026,0,28
1308,2025021309,2026-04-16T20:00:00,2026-04-16,1309,1,1,2,0,68,1,20252026,0,19
1309,2025021310,2026-04-16T21:00:00,2026-04-16,1310,1,1,2,0,20,1,20252026,0,26
1310,2025021311,2026-04-16T21:00:00,2026-04-16,1311,1,1,2,0,22,1,20252026,0,23


In [6]:
games_df = games_df.rename(columns={
    col: to_snake(col) for col in games_df.columns
})

games_df["home_team_name"] = games_df["home_team_id"].map(team_id_to_name)
games_df["away_team_name"] = games_df["visiting_team_id"].map(team_id_to_name)

games_df

Unnamed: 0,id,eastern_start_time,game_date,game_number,game_schedule_state_id,game_state_id,game_type,home_score,home_team_id,period,season,visiting_score,visiting_team_id,home_team_name,away_team_name
0,2025020001,2025-10-07T17:00:00,2025-10-07,1,1,7,2,3,13,3,20252026,2,16,Florida Panthers,Chicago Blackhawks
1,2025020002,2025-10-07T20:00:00,2025-10-07,2,1,7,2,0,3,3,20252026,3,5,New York Rangers,Pittsburgh Penguins
2,2025020003,2025-10-07T22:30:00,2025-10-07,3,1,7,2,1,26,3,20252026,4,21,Los Angeles Kings,Colorado Avalanche
3,2025020004,2025-10-08T19:00:00,2025-10-08,4,1,7,2,5,10,3,20252026,2,8,Toronto Maple Leafs,Montréal Canadiens
4,2025020005,2025-10-08T19:30:00,2025-10-08,5,1,7,2,1,15,3,20252026,3,6,Washington Capitals,Boston Bruins
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1307,2025021308,2026-04-16T20:00:00,2026-04-16,1308,1,1,2,0,52,1,20252026,0,28,Winnipeg Jets,San Jose Sharks
1308,2025021309,2026-04-16T20:00:00,2026-04-16,1309,1,1,2,0,68,1,20252026,0,19,Utah Mammoth,St. Louis Blues
1309,2025021310,2026-04-16T21:00:00,2026-04-16,1310,1,1,2,0,20,1,20252026,0,26,Calgary Flames,Los Angeles Kings
1310,2025021311,2026-04-16T21:00:00,2026-04-16,1311,1,1,2,0,22,1,20252026,0,23,Edmonton Oilers,Vancouver Canucks


In [7]:
games_df["game_date"] = pd.to_datetime(games_df["game_date"])

today = pd.Timestamp.today().normalize()
yesterday = today - pd.Timedelta(days=1)

mask = (games_df["game_date"] >= yesterday) & (games_df["game_date"] <= today)
recent_games = games_df.loc[mask].copy() 

recent_games["game_date"] = recent_games["game_date"].astype(str)

recent_games

Unnamed: 0,id,eastern_start_time,game_date,game_number,game_schedule_state_id,game_state_id,game_type,home_score,home_team_id,period,season,visiting_score,visiting_team_id,home_team_name,away_team_name
543,2025020544,2025-12-19T19:00:00,2025-12-19,544,1,7,2,4,13,5,20252026,3,12,Florida Panthers,Carolina Hurricanes
544,2025020545,2025-12-19T19:00:00,2025-12-19,545,1,7,2,1,2,3,20252026,4,23,New York Islanders,Vancouver Canucks
545,2025020546,2025-12-19T21:00:00,2025-12-19,546,1,7,2,3,21,3,20252026,2,52,Colorado Avalanche,Winnipeg Jets
546,2025020547,2025-12-19T21:00:00,2025-12-19,547,1,7,2,1,68,3,20252026,2,1,Utah Mammoth,New Jersey Devils
547,2025020548,2025-12-19T22:00:00,2025-12-19,548,1,7,2,3,24,3,20252026,8,25,Anaheim Ducks,Dallas Stars
548,2025020549,2025-12-20T12:30:00,2025-12-20,549,1,2,2,0,3,1,20252026,0,4,New York Rangers,Philadelphia Flyers
549,2025020550,2025-12-20T12:30:00,2025-12-20,550,1,2,2,0,15,1,20252026,0,17,Washington Capitals,Detroit Red Wings
550,2025020551,2025-12-20T15:00:00,2025-12-20,551,1,1,2,0,9,1,20252026,0,16,Ottawa Senators,Chicago Blackhawks
551,2025020552,2025-12-20T15:00:00,2025-12-20,552,1,1,2,0,30,1,20252026,0,22,Minnesota Wild,Edmonton Oilers
552,2025020553,2025-12-20T17:00:00,2025-12-20,553,1,1,2,0,7,1,20252026,0,2,Buffalo Sabres,New York Islanders


In [8]:
fs = project.get_feature_store()

matches_fg = fs.get_feature_group(name = 'matches', version = 1,)

matches_fg.insert(recent_games)

Uploading Dataframe: 100.00% |██████████| Rows 18/18 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: matches_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/matches_1_offline_fg_materialization/executions


(Job('matches_1_offline_fg_materialization', 'SPARK'), None)

In [9]:

players_df = fetch_player_form_for_season(season_id)

players_df = players_df.rename(columns={
    col: to_snake(col) for col in players_df.columns
})
players_df      

Unnamed: 0,assists,ev_goals,ev_points,faceoff_win_pct,game_date,game_id,game_winning_goals,games_played,goals,home_road,...,pp_points,sh_goals,sh_points,shooting_pct,shoots_catches,shots,skater_full_name,team_abbrev,time_on_ice_per_game,season_id
0,0,0,0,0.66666,2025-10-30,2025020175,0,1,0,R,...,0,0,0,,R,0,Mika Zibanejad,NYR,1308.0,20252026
1,0,1,1,,2025-11-29,2025020392,1,1,1,H,...,0,0,0,0.33333,R,3,Brent Burns,COL,1211.0,20252026
2,0,0,0,0.25000,2025-12-18,2025020540,0,1,0,R,...,0,0,0,0.00000,L,2,Ben Jones,MIN,422.0,20252026
3,0,0,0,,2025-10-26,2025020147,0,1,0,R,...,0,0,0,0.00000,L,4,Darnell Nurse,EDM,1412.0,20252026
4,0,1,1,0.00000,2025-10-22,2025020108,0,1,1,R,...,0,0,0,0.14285,L,7,Matt Boldy,MIN,1289.0,20252026
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,0,0,0,,2025-10-28,2025020150,0,1,0,H,...,0,0,0,0.00000,L,6,Nick Seeler,PHI,1233.0,20252026
9996,0,2,2,,2025-10-25,2025020134,0,1,2,R,...,0,0,0,0.40000,L,5,Dmitri Voronkov,CBJ,1033.0,20252026
9997,1,0,1,,2025-11-19,2025020315,0,1,0,R,...,0,0,0,,L,0,Mattias Janmark,EDM,574.0,20252026
9998,0,0,0,0.42857,2025-12-13,2025020498,0,1,0,H,...,0,0,0,,L,0,Casey Cizikas,NYI,856.0,20252026


In [10]:
fs = project.get_feature_store()

players_form_fg = fs.get_feature_group(name = 'players_form', version = 1,)

players_form_fg.insert(players_df)

Uploading Dataframe: 100.00% |██████████| Rows 10000/10000 | Elapsed Time: 00:01 | Remaining Time: 00:00


Launching job: players_form_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/players_form_1_offline_fg_materialization/executions


(Job('players_form_1_offline_fg_materialization', 'SPARK'), None)

In [11]:
goalies_form_df = fetch_goalie_form_for_season(season_id)

goalies_form_df = goalies_form_df.rename(columns={
    col: to_snake(col) for col in goalies_form_df.columns
})
goalies_form_df = goalies_form_df.drop(columns=['ties'], errors='ignore')
goalies_form_df

Unnamed: 0,assists,game_date,game_id,games_played,games_started,goalie_full_name,goals,goals_against,goals_against_average,home_road,...,points,save_pct,saves,shoots_catches,shots_against,shutouts,team_abbrev,time_on_ice,wins,season_id
0,0,2025-11-17,2025020304,1,1,Charlie Lindgren,0,1,1.00000,H,...,0,0.96774,30,R,31,0,WSH,3600,1,20252026
1,0,2025-10-07,2025020003,1,1,Scott Wedgewood,0,1,1.00194,R,...,0,0.96000,24,L,25,0,COL,3593,1,20252026
2,0,2025-12-05,2025020438,1,1,Jake Oettinger,0,1,1.00000,H,...,0,0.94117,16,L,17,0,DAL,3600,1,20252026
3,0,2025-10-28,2025020153,1,1,Daniil Tarasov,0,2,1.85375,H,...,0,0.88235,15,L,17,0,FLA,3884,0,20252026
4,0,2025-12-09,2025020475,1,1,Scott Wedgewood,0,3,2.88770,R,...,0,0.90322,28,L,31,0,COL,3740,0,20252026
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1151,0,2025-12-18,2025020535,1,1,Samuel Ersson,0,4,4.11781,R,...,0,0.85185,23,L,27,0,PHI,3497,0,20252026
1152,0,2025-12-18,2025020537,1,1,Linus Ullmark,0,0,0.00000,H,...,0,1.00000,25,L,25,1,OTT,3600,1,20252026
1153,0,2025-12-18,2025020543,1,1,Jake Oettinger,0,3,3.00000,R,...,0,0.91891,34,L,37,0,DAL,3600,1,20252026
1154,0,2025-12-18,2025020535,1,1,Alex Lyon,0,3,3.00668,H,...,0,0.88888,24,L,27,0,BUF,3592,1,20252026


In [12]:
fs = project.get_feature_store()

goalies_form_fg = fs.get_feature_group(name = 'goalies_form', version = 1,)

goalies_form_fg.insert(goalies_form_df)

Uploading Dataframe: 100.00% |██████████| Rows 1156/1156 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: goalies_form_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/goalies_form_1_offline_fg_materialization/executions


(Job('goalies_form_1_offline_fg_materialization', 'SPARK'), None)