In [1]:
import os
import sys
from pathlib import Path

# lägg till projektroten (mappen ovanför notebooks/) på sys.path
root_dir = Path().absolute()

if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])

root_dir = str(root_dir) 
print(f"Root dir: {root_dir}")
print("Local environment")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Root dir: c:\Users\Chris\hockey-agent
Local environment
Added the following directory to the PYTHONPATH: c:\Users\Chris\hockey-agent


In [2]:
import hopsworks
from config import settings
import requests
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY,
    host = settings.HOPSWORKS_HOST
)


2025-12-20 08:55:58,357 INFO: Initializing external client
2025-12-20 08:55:58,357 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-20 08:56:00,019 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3193


In [4]:
from datetime import datetime

def generate_season_ids(start_year=2000):
    current_year = datetime.now().year

    season_ids = []
    for year in range(start_year, current_year+1):
        season_ids.append(f"{year}{year+1}")

    return season_ids

season_ids = generate_season_ids(2000)
season_ids[:5], season_ids[-3:]


(['20002001', '20012002', '20022003', '20032004', '20042005'],
 ['20232024', '20242025', '20252026'])

In [5]:
import requests
import pandas as pd

GOALIE_URL = "https://api.nhle.com/stats/rest/en/goalie/summary"

def fetch_goalies_for_season(season_id: str) -> pd.DataFrame:
    params = {
        "cayenneExp": f"gameTypeId=2 and seasonId={season_id}",
        "limit": -1
    }

    resp = requests.get(GOALIE_URL, params=params, timeout=20)
    resp.raise_for_status()

    data = resp.json()["data"]
    df = pd.DataFrame(data)

    df["seasonId"] = season_id  # säkerställ att den finns
    return df


In [6]:
all_goalies = []

for season_id in season_ids:
    try:
        print(f"Hämtar säsong {season_id}")
        df_season = fetch_goalies_for_season(season_id)
        all_goalies.append(df_season)
    except Exception as e:
        print(f"Misslyckades för {season_id}: {e}")

goalies_df = pd.concat(all_goalies, ignore_index=True)
print(goalies_df.shape)

Hämtar säsong 20002001
Hämtar säsong 20012002
Hämtar säsong 20022003
Hämtar säsong 20032004
Hämtar säsong 20042005
Hämtar säsong 20052006
Hämtar säsong 20062007
Hämtar säsong 20072008
Hämtar säsong 20082009
Hämtar säsong 20092010
Hämtar säsong 20102011
Hämtar säsong 20112012
Hämtar säsong 20122013
Hämtar säsong 20132014
Hämtar säsong 20142015
Hämtar säsong 20152016
Hämtar säsong 20162017
Hämtar säsong 20172018
Hämtar säsong 20182019
Hämtar säsong 20192020
Hämtar säsong 20202021
Hämtar säsong 20212022
Hämtar säsong 20222023
Hämtar säsong 20232024
Hämtar säsong 20242025
Hämtar säsong 20252026
(2316, 23)




In [7]:
import re

def to_snake(name: str) -> str:
    # splitta CamelCase till snake_case
    s = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", name)
    s = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s)
    return s.lower()


In [8]:
goalies_df

Unnamed: 0,assists,gamesPlayed,gamesStarted,goalieFullName,goals,goalsAgainst,goalsAgainstAverage,lastName,losses,otLosses,...,savePct,saves,seasonId,shootsCatches,shotsAgainst,shutouts,teamAbbrevs,ties,timeOnIce,wins
0,0.0,1.0,0.0,Evgeny Konstantinov,0.0,0.0,0.00000,Konstantinov,0.0,,...,,0.0,20002001,L,0.0,0.0,TBL,0.0,24.0,0.0
1,1.0,62.0,60.0,Sean Burke,0.0,138.0,2.27235,Burke,22.0,,...,0.92190,1629.0,20002001,L,1767.0,4.0,PHX,13.0,218628.0,25.0
2,1.0,42.0,41.0,Trevor Kidd,0.0,130.0,3.31369,Kidd,23.0,,...,0.89317,1087.0,20002001,L,1217.0,1.0,FLA,6.0,141232.0,10.0
3,0.0,5.0,4.0,Joaquin Gage,0.0,15.0,3.45511,Gage,2.0,,...,0.88000,110.0,20002001,L,125.0,0.0,EDM,0.0,15629.0,2.0
4,0.0,3.0,2.0,Michel Larocque,0.0,9.0,3.55419,Larocque,2.0,,...,0.84745,50.0,20002001,L,59.0,0.0,CHI,0.0,9116.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2311,0.0,2.0,1.0,Trent Miner,0.0,4.0,2.11982,Miner,0.0,2.0,...,0.90909,40.0,20252026,L,44.0,0.0,COL,,6793.0,0.0
2312,1.0,14.0,14.0,Thatcher Demko,0.0,31.0,2.34084,Demko,5.0,0.0,...,0.91267,324.0,20252026,L,355.0,1.0,VAN,,47675.0,8.0
2313,1.0,19.0,19.0,John Gibson,0.0,56.0,3.06980,Gibson,7.0,1.0,...,0.89019,454.0,20252026,L,510.0,2.0,DET,,65672.0,10.0
2314,1.0,10.0,9.0,Vitek Vanecek,0.0,29.0,2.98746,Vanecek,7.0,1.0,...,0.87280,199.0,20252026,L,228.0,0.0,UTA,,34946.0,2.0


In [9]:
goalies_df = goalies_df.rename(columns={
    col: to_snake(col) for col in goalies_df.columns
})


In [10]:
primary_key = ["player_id", "season_id"]

In [11]:
goalies_df = goalies_df.drop_duplicates(
    subset=["player_id", "season_id"]
)

In [12]:
goalies_df

Unnamed: 0,assists,games_played,games_started,goalie_full_name,goals,goals_against,goals_against_average,last_name,losses,ot_losses,...,save_pct,saves,season_id,shoots_catches,shots_against,shutouts,team_abbrevs,ties,time_on_ice,wins
0,0.0,1.0,0.0,Evgeny Konstantinov,0.0,0.0,0.00000,Konstantinov,0.0,,...,,0.0,20002001,L,0.0,0.0,TBL,0.0,24.0,0.0
1,1.0,62.0,60.0,Sean Burke,0.0,138.0,2.27235,Burke,22.0,,...,0.92190,1629.0,20002001,L,1767.0,4.0,PHX,13.0,218628.0,25.0
2,1.0,42.0,41.0,Trevor Kidd,0.0,130.0,3.31369,Kidd,23.0,,...,0.89317,1087.0,20002001,L,1217.0,1.0,FLA,6.0,141232.0,10.0
3,0.0,5.0,4.0,Joaquin Gage,0.0,15.0,3.45511,Gage,2.0,,...,0.88000,110.0,20002001,L,125.0,0.0,EDM,0.0,15629.0,2.0
4,0.0,3.0,2.0,Michel Larocque,0.0,9.0,3.55419,Larocque,2.0,,...,0.84745,50.0,20002001,L,59.0,0.0,CHI,0.0,9116.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2311,0.0,2.0,1.0,Trent Miner,0.0,4.0,2.11982,Miner,0.0,2.0,...,0.90909,40.0,20252026,L,44.0,0.0,COL,,6793.0,0.0
2312,1.0,14.0,14.0,Thatcher Demko,0.0,31.0,2.34084,Demko,5.0,0.0,...,0.91267,324.0,20252026,L,355.0,1.0,VAN,,47675.0,8.0
2313,1.0,19.0,19.0,John Gibson,0.0,56.0,3.06980,Gibson,7.0,1.0,...,0.89019,454.0,20252026,L,510.0,2.0,DET,,65672.0,10.0
2314,1.0,10.0,9.0,Vitek Vanecek,0.0,29.0,2.98746,Vanecek,7.0,1.0,...,0.87280,199.0,20252026,L,228.0,0.0,UTA,,34946.0,2.0


In [13]:
import re

def to_snake(name: str) -> str:
    # splitta CamelCase till snake_case
    s = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", name)
    s = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s)
    return s.lower()

In [14]:
fs = project.get_feature_store()

goalies_fg = fs.get_or_create_feature_group(
    name="goalies",
    description="NHL goalie stats per season since 2000",
    version=1,
    primary_key=["player_id", "season_id"]
)

goalies_fg.insert(goalies_df)

Uploading Dataframe: 100.00% |██████████| Rows 2316/2316 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: goalies_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/goalies_1_offline_fg_materialization/executions


(Job('goalies_1_offline_fg_materialization', 'SPARK'), None)