In [1]:
import os
import sys
from pathlib import Path

# lägg till projektroten (mappen ovanför notebooks/) på sys.path
root_dir = Path().absolute()

if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])

root_dir = str(root_dir) 
print(f"Root dir: {root_dir}")
print("Local environment")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Root dir: /Users/jacobbjareklint/Code/GitHub/hockey-agent
Local environment
Added the following directory to the PYTHONPATH: /Users/jacobbjareklint/Code/GitHub/hockey-agent


In [2]:
import hopsworks
from config import settings
import requests
import pandas as pd
from util import *

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY,
    host = settings.HOPSWORKS_HOST
)

2025-12-21 12:45:25,424 INFO: Initializing external client
2025-12-21 12:45:25,425 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-21 12:45:26,653 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3193


In [4]:
teams_df = fetch_teams()

team_id_to_name = dict(
    zip(teams_df["id"], teams_df["fullName"])
)

In [5]:
from datetime import date, timedelta
today = date.today()
season_id = get_season(today)

games_df = fetch_games_from_nhl(season_id)

games_df


Unnamed: 0,id,easternStartTime,gameDate,gameNumber,gameScheduleStateId,gameStateId,gameType,homeScore,homeTeamId,period,season,visitingScore,visitingTeamId
0,2025020001,2025-10-07T17:00:00,2025-10-07,1,1,7,2,3,13,3,20252026,2,16
1,2025020002,2025-10-07T20:00:00,2025-10-07,2,1,7,2,0,3,3,20252026,3,5
2,2025020003,2025-10-07T22:30:00,2025-10-07,3,1,7,2,1,26,3,20252026,4,21
3,2025020004,2025-10-08T19:00:00,2025-10-08,4,1,7,2,5,10,3,20252026,2,8
4,2025020005,2025-10-08T19:30:00,2025-10-08,5,1,7,2,1,15,3,20252026,3,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1307,2025021308,2026-04-16T20:00:00,2026-04-16,1308,1,1,2,0,52,1,20252026,0,28
1308,2025021309,2026-04-16T20:00:00,2026-04-16,1309,1,1,2,0,68,1,20252026,0,19
1309,2025021310,2026-04-16T21:00:00,2026-04-16,1310,1,1,2,0,20,1,20252026,0,26
1310,2025021311,2026-04-16T21:00:00,2026-04-16,1311,1,1,2,0,22,1,20252026,0,23


In [6]:
games_df = games_df.rename(columns={
    col: to_snake(col) for col in games_df.columns
})

games_df["home_team_name"] = games_df["home_team_id"].map(team_id_to_name)
games_df["away_team_name"] = games_df["visiting_team_id"].map(team_id_to_name)

games_df

Unnamed: 0,id,eastern_start_time,game_date,game_number,game_schedule_state_id,game_state_id,game_type,home_score,home_team_id,period,season,visiting_score,visiting_team_id,home_team_name,away_team_name
0,2025020001,2025-10-07T17:00:00,2025-10-07,1,1,7,2,3,13,3,20252026,2,16,Florida Panthers,Chicago Blackhawks
1,2025020002,2025-10-07T20:00:00,2025-10-07,2,1,7,2,0,3,3,20252026,3,5,New York Rangers,Pittsburgh Penguins
2,2025020003,2025-10-07T22:30:00,2025-10-07,3,1,7,2,1,26,3,20252026,4,21,Los Angeles Kings,Colorado Avalanche
3,2025020004,2025-10-08T19:00:00,2025-10-08,4,1,7,2,5,10,3,20252026,2,8,Toronto Maple Leafs,Montréal Canadiens
4,2025020005,2025-10-08T19:30:00,2025-10-08,5,1,7,2,1,15,3,20252026,3,6,Washington Capitals,Boston Bruins
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1307,2025021308,2026-04-16T20:00:00,2026-04-16,1308,1,1,2,0,52,1,20252026,0,28,Winnipeg Jets,San Jose Sharks
1308,2025021309,2026-04-16T20:00:00,2026-04-16,1309,1,1,2,0,68,1,20252026,0,19,Utah Mammoth,St. Louis Blues
1309,2025021310,2026-04-16T21:00:00,2026-04-16,1310,1,1,2,0,20,1,20252026,0,26,Calgary Flames,Los Angeles Kings
1310,2025021311,2026-04-16T21:00:00,2026-04-16,1311,1,1,2,0,22,1,20252026,0,23,Edmonton Oilers,Vancouver Canucks


In [7]:
games_df["game_date"] = pd.to_datetime(games_df["game_date"])

today = pd.Timestamp.today().normalize()
yesterday = today - pd.Timedelta(days=1)

mask = (games_df["game_date"] >= yesterday) & (games_df["game_date"] <= today)
recent_games = games_df.loc[mask].copy() 

recent_games["game_date"] = recent_games["game_date"].astype(str)

recent_games

Unnamed: 0,id,eastern_start_time,game_date,game_number,game_schedule_state_id,game_state_id,game_type,home_score,home_team_id,period,season,visiting_score,visiting_team_id,home_team_name,away_team_name
548,2025020549,2025-12-20T12:30:00,2025-12-20,549,1,7,2,5,3,5,20252026,4,4,New York Rangers,Philadelphia Flyers
549,2025020550,2025-12-20T12:30:00,2025-12-20,550,1,7,2,2,15,3,20252026,5,17,Washington Capitals,Detroit Red Wings
550,2025020551,2025-12-20T15:00:00,2025-12-20,551,1,7,2,6,9,3,20252026,4,16,Ottawa Senators,Chicago Blackhawks
551,2025020552,2025-12-20T15:00:00,2025-12-20,552,1,7,2,5,30,3,20252026,2,22,Minnesota Wild,Edmonton Oilers
552,2025020553,2025-12-20T17:00:00,2025-12-20,553,1,7,2,3,7,5,20252026,2,2,Buffalo Sabres,New York Islanders
553,2025020554,2025-12-20T18:00:00,2025-12-20,554,1,7,2,2,13,3,20252026,6,19,Florida Panthers,St. Louis Blues
554,2025020555,2025-12-20T19:00:00,2025-12-20,555,1,7,2,4,6,5,20252026,5,23,Boston Bruins,Vancouver Canucks
555,2025020556,2025-12-20T19:00:00,2025-12-20,556,1,7,2,4,8,3,20252026,0,5,Montréal Canadiens,Pittsburgh Penguins
556,2025020557,2025-12-20T19:00:00,2025-12-20,557,1,7,2,6,14,3,20252026,4,12,Tampa Bay Lightning,Carolina Hurricanes
557,2025020558,2025-12-20T19:00:00,2025-12-20,558,1,7,2,5,18,3,20252026,3,10,Nashville Predators,Toronto Maple Leafs


In [None]:
if recent_games.empty:
    print("Empty DataFrame, nothing uploaded!")
    
else:
    fs = project.get_feature_store()

    matches_fg = fs.get_feature_group(name = 'matches', version = 1,)

    matches_fg.insert(recent_games)

Uploading Dataframe: 100.00% |██████████| Rows 22/22 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: matches_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/matches_1_offline_fg_materialization/executions


(Job('matches_1_offline_fg_materialization', 'SPARK'), None)

In [9]:
PLAYER_GAME_LOG_URL = "https://api.nhle.com/stats/rest/en/skater/summary"

today = date.today()
yesterday = today - timedelta(days=1)

print(today)
print(yesterday)
params = {
        "isGame": "true",  
        "cayenneExp": (
        f"gameTypeId=2 and seasonId={season_id} "
        f"and gameDate>='{yesterday}' and gameDate<='{today}'"),
        "limit": -1  
    }
    
resp = requests.get(PLAYER_GAME_LOG_URL, params=params, timeout=20)
resp.raise_for_status()
data = resp.json().get("data")
if not data:
    players_df = pd.DataFrame()
    
else:
    players_df = pd.DataFrame(data)


2025-12-21
2025-12-20


In [10]:
if "seasonId" not in players_df.columns:
    players_df["seasonId"] = str(season_id)

players_df = players_df.rename(columns={
    col: to_snake(col) for col in players_df.columns
})
players_df    

Unnamed: 0,assists,ev_goals,ev_points,faceoff_win_pct,game_date,game_id,game_winning_goals,games_played,goals,home_road,...,pp_points,sh_goals,sh_points,shooting_pct,shoots_catches,shots,skater_full_name,team_abbrev,time_on_ice_per_game,season_id
0,0,0,0,1.0,2025-12-20,2025020559,0,1,0,R,...,0,0,0,0.0,R,1,Braeden Bowman,VGK,887.0,20252026
1,0,0,0,,2025-12-20,2025020554,0,1,0,H,...,0,0,0,,L,0,Niko Mikkola,FLA,1293.0,20252026
2,0,0,0,,2025-12-20,2025020557,0,1,0,R,...,0,0,0,0.0,L,2,Taylor Hall,CAR,952.0,20252026
3,0,0,0,,2025-12-20,2025020554,0,1,0,R,...,0,0,0,0.0,R,2,Colton Parayko,STL,1295.0,20252026
4,1,0,1,,2025-12-20,2025020560,0,1,0,R,...,0,0,0,0.0,L,1,Ivan Provorov,CBJ,1578.0,20252026
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
463,0,0,0,,2025-12-20,2025020560,0,1,0,R,...,0,0,0,,L,0,Yegor Chinakhov,CBJ,441.0,20252026
464,0,0,0,,2025-12-20,2025020555,0,1,0,H,...,0,0,0,0.0,L,2,Michael Eyssimont,BOS,568.0,20252026
465,1,0,1,0.6,2025-12-20,2025020549,0,1,0,H,...,0,0,0,,L,0,J.T. Miller,NYR,1124.0,20252026
466,0,0,0,,2025-12-20,2025020559,0,1,0,R,...,0,0,0,0.0,L,2,Ben Hutton,VGK,1160.0,20252026


In [None]:
if players_df.empty:
    print("DataFrame empty, nothing uploaded!")

else:
    fs = project.get_feature_store()

    players_form_fg = fs.get_feature_group(name = 'players_form', version = 1,)

    players_form_fg.insert(players_df)

Uploading Dataframe: 100.00% |██████████| Rows 468/468 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: players_form_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/players_form_1_offline_fg_materialization/executions


(Job('players_form_1_offline_fg_materialization', 'SPARK'), None)

In [12]:
goalies_form_df = fetch_goalie_form_for_season(season_id)

goalies_form_df = goalies_form_df.rename(columns={
    col: to_snake(col) for col in goalies_form_df.columns
})
goalies_form_df["game_date"] = pd.to_datetime(goalies_form_df["game_date"])

today = pd.Timestamp.today().normalize()
yesterday = today - pd.Timedelta(days=1)

goalies_form_df = goalies_form_df.drop(columns=['ties'], errors='ignore')

mask = (goalies_form_df["game_date"] >= yesterday) & (goalies_form_df["game_date"] <= today)
recent_games_goalies = goalies_form_df.loc[mask].copy() 
recent_games_goalies["game_date"] = recent_games_goalies["game_date"].astype(str)
recent_games_goalies

Unnamed: 0,assists,game_date,game_id,games_played,games_started,goalie_full_name,goals,goals_against,goals_against_average,home_road,...,points,save_pct,saves,shoots_catches,shots_against,shutouts,team_abbrev,time_on_ice,wins,season_id
1096,0,2025-12-20,2025020555,1,1,Jeremy Swayman,0,4,3.69799,H,...,0,0.81818,18,L,22,0,BOS,3894,0,20252026
1099,0,2025-12-20,2025020549,1,1,Samuel Ersson,0,4,3.6923,R,...,0,0.85185,23,L,27,0,PHI,3900,0,20252026
1102,0,2025-12-20,2025020561,1,1,Yaroslav Askarov,0,3,3.08043,H,...,0,0.90322,28,R,31,0,SJS,3506,0,20252026
1103,0,2025-12-20,2025020558,1,1,Juuse Saros,0,3,3.00083,H,...,0,0.86363,19,L,22,0,NSH,3599,1,20252026
1104,1,2025-12-20,2025020553,1,1,Alex Lyon,0,2,1.84852,H,...,1,0.94117,32,L,34,0,BUF,3895,1,20252026
1110,0,2025-12-20,2025020551,1,1,Arvid Soderblom,0,6,6.11378,R,...,0,0.85,34,L,40,0,CHI,3533,0,20252026
1113,0,2025-12-20,2025020556,1,1,Stuart Skinner,0,3,3.23547,R,...,0,0.85,17,L,20,0,PIT,3338,0,20252026
1114,0,2025-12-20,2025020552,1,1,Calvin Pickard,0,4,4.15944,R,...,0,0.88888,32,L,36,0,EDM,3462,0,20252026
1118,0,2025-12-20,2025020560,1,1,Lukas Dostal,0,3,3.0,H,...,0,0.88461,23,L,26,0,ANA,3600,1,20252026
1127,0,2025-12-20,2025020560,1,1,Elvis Merzlikins,0,4,4.03927,R,...,0,0.85714,24,L,28,0,CBJ,3565,0,20252026


In [None]:
if recent_games_goalies.empty:
    print("Empty DataFrame, nothing uploaded!")
    
else:
    fs = project.get_feature_store()

    goalies_form_fg = fs.get_feature_group(name = 'goalies_form', version = 1,)

    goalies_form_fg.insert(recent_games_goalies)

Uploading Dataframe: 100.00% |██████████| Rows 26/26 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: goalies_form_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/goalies_form_1_offline_fg_materialization/executions


(Job('goalies_form_1_offline_fg_materialization', 'SPARK'), None)