In [1]:
import os
import sys
from pathlib import Path

# lägg till projektroten (mappen ovanför notebooks/) på sys.path
root_dir = Path().absolute()

if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])

root_dir = str(root_dir) 
print(f"Root dir: {root_dir}")
print("Local environment")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Root dir: c:\Users\Chris\hockey-agent
Local environment
Added the following directory to the PYTHONPATH: c:\Users\Chris\hockey-agent


In [2]:
import hopsworks
from config import settings
import requests
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY,
    host = settings.HOPSWORKS_HOST
)


2025-12-20 08:56:10,037 INFO: Initializing external client
2025-12-20 08:56:10,039 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-20 08:56:10,908 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3193


In [4]:
from datetime import datetime

def generate_season_ids(start_year=2000):
    current_year = datetime.now().year

    season_ids = []
    for year in range(start_year, current_year+1):
        season_ids.append(f"{year}{year+1}")

    return season_ids

season_ids = generate_season_ids(2000)
season_ids[:5], season_ids[-3:]

(['20002001', '20012002', '20022003', '20032004', '20042005'],
 ['20232024', '20242025', '20252026'])

In [5]:
import requests
import pandas as pd

URL = "https://api.nhle.com/stats/rest/en/team/summary"

def fetch_team_for_season(season_id: str) -> pd.DataFrame:
    params = {
        "cayenneExp": f"gameTypeId=2 and seasonId={season_id}"
        "limit": -1
    }

    resp = requests.get(URL, params=params, timeout=20)
    resp.raise_for_status()

    data = resp.json()["data"]
    df = pd.DataFrame(data)

    df["seasonId"] = season_id  # säkerställ att den finns
    return df


SyntaxError: invalid syntax (3736821761.py, line 9)

In [None]:
all_teams = []

for season_id in season_ids:
    try:
        print(f"Hämtar säsong {season_id}")
        df_season = fetch_team_for_season(season_id)
        all_teams.append(df_season)
    except Exception as e:
        print(f"Misslyckades för {season_id}: {e}")

teams_df = pd.concat(all_teams, ignore_index=True)
print(teams_df.shape)

Hämtar säsong 20002001
Hämtar säsong 20012002
Hämtar säsong 20022003
Hämtar säsong 20032004
Hämtar säsong 20042005
Hämtar säsong 20052006
Hämtar säsong 20062007
Hämtar säsong 20072008
Hämtar säsong 20082009
Hämtar säsong 20092010
Hämtar säsong 20102011
Hämtar säsong 20112012
Hämtar säsong 20122013
Hämtar säsong 20132014
Hämtar säsong 20142015
Hämtar säsong 20152016
Hämtar säsong 20162017
Hämtar säsong 20172018
Hämtar säsong 20182019
Hämtar säsong 20192020
Hämtar säsong 20202021
Hämtar säsong 20212022
Hämtar säsong 20222023
Hämtar säsong 20232024
Hämtar säsong 20242025
Hämtar säsong 20252026
(764, 25)




In [None]:
import re

def to_snake(name: str) -> str:
    # splitta CamelCase till snake_case
    s = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", name)
    s = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s)
    return s.lower()

In [None]:
teams_df = teams_df.rename(columns={
    col: to_snake(col) for col in teams_df.columns
})
teams_df

Unnamed: 0,faceoff_win_pct,games_played,goals_against,goals_against_per_game,goals_for,goals_for_per_game,losses,ot_losses,penalty_kill_net_pct,penalty_kill_pct,...,season_id,shots_against_per_game,shots_for_per_game,team_full_name,team_id,team_shutouts,ties,wins,wins_in_regulation,wins_in_shootout
0,0.553117,88.0,225.0,2.55681,243.0,2.76136,31.0,4.0,0.851941,0.837379,...,20002001,27.56818,28.51136,Washington Capitals,15.0,6.0,10.0,43.0,40.0,0.0
1,0.489401,107.0,247.0,2.30841,364.0,3.40186,29.0,3.0,0.872641,0.844340,...,20002001,23.65420,30.97196,New Jersey Devils,1.0,14.0,12.0,63.0,56.0,0.0
2,0.469422,86.0,254.0,2.95348,248.0,2.88372,32.0,7.0,0.834975,0.805419,...,20002001,26.02325,29.32558,Vancouver Canucks,23.0,2.0,11.0,36.0,31.0,0.0
3,0.483333,82.0,245.0,2.98780,188.0,2.29268,41.0,5.0,0.833333,0.817949,...,20002001,29.47560,25.89024,Anaheim Ducks,24.0,6.0,11.0,25.0,21.0,0.0
4,0.515064,97.0,229.0,2.36082,289.0,2.97938,28.0,5.0,0.896247,0.856513,...,20002001,24.24742,30.76288,St. Louis Blues,19.0,10.0,12.0,52.0,43.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
759,0.496885,33.0,90.0,2.72727,108.0,3.27272,9.0,2.0,0.847826,0.804348,...,20252026,24.24242,33.15151,Carolina Hurricanes,12.0,2.0,,22.0,14.0,4.0
760,0.494731,33.0,92.0,2.78787,94.0,2.84848,10.0,6.0,0.817307,0.817308,...,20252026,25.81818,24.90909,Philadelphia Flyers,4.0,0.0,,17.0,10.0,5.0
761,0.529443,35.0,117.0,3.34285,119.0,3.40000,12.0,6.0,0.830000,0.790000,...,20252026,26.68571,28.48571,Edmonton Oilers,22.0,2.0,,17.0,12.0,0.0
762,0.469791,33.0,102.0,3.09090,105.0,3.18181,13.0,2.0,0.863636,0.827273,...,20252026,26.66666,29.15151,Florida Panthers,13.0,3.0,,18.0,16.0,1.0


In [None]:
fs = project.get_feature_store()

teams_fg = fs.get_or_create_feature_group(
    name="teams",
    description="NHL team information from standings endpoint",
    version=1,
    primary_key=["team_full_name", "season_id"]
)

In [None]:
teams_fg.insert(teams_df)

Uploading Dataframe: 100.00% |██████████| Rows 764/764 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: teams_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/teams_1_offline_fg_materialization/executions


(Job('teams_1_offline_fg_materialization', 'SPARK'), None)