In [1]:
import os
import sys
from pathlib import Path

# lägg till projektroten (mappen ovanför notebooks/) på sys.path
root_dir = Path().absolute()

if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])

root_dir = str(root_dir) 
print(f"Root dir: {root_dir}")
print("Local environment")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Root dir: /Users/jacobbjareklint/Code/GitHub/hockey-agent
Local environment
Added the following directory to the PYTHONPATH: /Users/jacobbjareklint/Code/GitHub/hockey-agent


In [2]:
import hopsworks
import requests
import pandas as pd
from config import settings
import util

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY,
    host = settings.HOPSWORKS_HOST
)

2025-12-30 11:08:22,393 INFO: Initializing external client
2025-12-30 11:08:22,394 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-30 11:08:23,704 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3193


In [4]:
import datetime
today = datetime.date.today()

In [5]:
STATS_BASE = settings.NHL_STATS_BASE_URL
season_id = util.get_season(today)
endpoiint = "en/goalie/summary"

url = f"{STATS_BASE}/{endpoiint}"
cayenne = f"gameTypeId=2 and seasonId={season_id}"

base_params = {
    "isAggregate": "false",
    "isGame": "false",
    "start": 0,
    "limit": -1,
    "cayenneExp": cayenne,
}

resp = requests.get(url, params=base_params, timeout=20)
resp.raise_for_status()

summary = resp.json()["data"]
df_sum = pd.DataFrame(summary)
df_sum["seasonId"] = season_id
print(df_sum.columns.tolist())

df_sum

['assists', 'gamesPlayed', 'gamesStarted', 'goalieFullName', 'goals', 'goalsAgainst', 'goalsAgainstAverage', 'lastName', 'losses', 'otLosses', 'penaltyMinutes', 'playerId', 'points', 'savePct', 'saves', 'seasonId', 'shootsCatches', 'shotsAgainst', 'shutouts', 'teamAbbrevs', 'ties', 'timeOnIce', 'wins']


Unnamed: 0,assists,gamesPlayed,gamesStarted,goalieFullName,goals,goalsAgainst,goalsAgainstAverage,lastName,losses,otLosses,...,savePct,saves,seasonId,shootsCatches,shotsAgainst,shutouts,teamAbbrevs,ties,timeOnIce,wins
0,0,9,8,Pyotr Kochetkov,0,19,2.32581,Kochetkov,2,0,...,0.89947,170,20252026,L,189,1,CAR,,29409,6
1,0,9,7,Petr Mrazek,0,30,3.78243,Mrazek,4,0,...,0.86547,193,20252026,L,223,0,ANA,,28553,3
2,0,12,11,Daniil Tarasov,0,34,2.96820,Tarasov,6,1,...,0.89970,305,20252026,L,339,0,FLA,,41237,4
3,1,27,27,Sergei Bobrovsky,0,74,2.79526,Bobrovsky,9,1,...,0.88770,585,20252026,L,659,3,FLA,,95304,17
4,0,15,15,David Rittich,0,35,2.29973,Rittich,4,2,...,0.91860,395,20252026,L,430,2,NYI,,54789,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,0,2,1,Trent Miner,0,4,2.11982,Miner,0,2,...,0.90909,40,20252026,L,44,0,COL,,6793,0
81,1,16,16,Thatcher Demko,0,40,2.62797,Demko,7,0,...,0.90930,391,20252026,L,430,1,VAN,,54795,8
82,1,22,22,John Gibson,0,65,3.06523,Gibson,8,1,...,0.89075,530,20252026,L,595,2,DET,,76340,12
83,1,12,11,Vitek Vanecek,0,34,2.92103,Vanecek,9,1,...,0.87857,246,20252026,L,280,0,UTA,,41903,2


In [6]:
df_sum = df_sum.rename(columns={col: util.to_snake(col) for col in df_sum.columns})
print(df_sum.columns.tolist())
df_sum.info

['assists', 'games_played', 'games_started', 'goalie_full_name', 'goals', 'goals_against', 'goals_against_average', 'last_name', 'losses', 'ot_losses', 'penalty_minutes', 'player_id', 'points', 'save_pct', 'saves', 'season_id', 'shoots_catches', 'shots_against', 'shutouts', 'team_abbrevs', 'ties', 'time_on_ice', 'wins']


<bound method DataFrame.info of     assists  games_played  games_started  goalie_full_name  goals  \
0         0             9              8   Pyotr Kochetkov      0   
1         0             9              7       Petr Mrazek      0   
2         0            12             11    Daniil Tarasov      0   
3         1            27             27  Sergei Bobrovsky      0   
4         0            15             15     David Rittich      0   
..      ...           ...            ...               ...    ...   
80        0             2              1       Trent Miner      0   
81        1            16             16    Thatcher Demko      0   
82        1            22             22       John Gibson      0   
83        1            12             11     Vitek Vanecek      0   
84        0             5              4       Matt Murray      0   

    goals_against  goals_against_average  last_name  losses  ot_losses  ...  \
0              19                2.32581  Kochetkov       2 

In [7]:
float_cols = ['assists', 'games_played', 'games_started', 'goals', 'goals_against', 
              'goals_against_average', 'losses', 'ot_losses', 'penalty_minutes', 'player_id', 
              'points', 'save_pct', 'saves', 'shots_against', 'shutouts', 
                'ties', 'time_on_ice', 'wins']




df_sum[float_cols] = df_sum[float_cols].astype("float64")

In [8]:
fs = project.get_feature_store()

teams_fg = fs.get_feature_group(name = 'goalies', version = 1,)

teams_fg.insert(df_sum)

Uploading Dataframe: 100.00% |██████████| Rows 85/85 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: goalies_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/goalies_1_offline_fg_materialization/executions


(Job('goalies_1_offline_fg_materialization', 'SPARK'), None)