In [1]:
import os
import sys
from pathlib import Path

# lägg till projektroten (mappen ovanför notebooks/) på sys.path
root_dir = Path().absolute()

if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])

root_dir = str(root_dir) 
print(f"Root dir: {root_dir}")
print("Local environment")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Root dir: /Users/jacobbjareklint/Code/GitHub/hockey-agent
Local environment
Added the following directory to the PYTHONPATH: /Users/jacobbjareklint/Code/GitHub/hockey-agent


In [2]:
import hopsworks
from config import settings
import requests
import pandas as pd
import util

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY,
    host = settings.HOPSWORKS_HOST
)


2025-12-30 11:16:44,606 INFO: Initializing external client
2025-12-30 11:16:44,607 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-30 11:16:46,059 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3193


In [4]:
all_seasons = util.generate_season_ids(2000)
STATS_BASE = settings.NHL_STATS_BASE_URL
endpoiint = "en/skater/summary"

dfs = []
for s in all_seasons:
    url = f"{STATS_BASE}/{endpoiint}"
    cayenne = f"gameTypeId=2 and seasonId={s}"

    base_params = {
        "isAggregate": "false",
        "isGame": "false",
        "start": 0,
        "limit": -1,
        "cayenneExp": cayenne,
    }

    resp = requests.get(url, params=base_params, timeout=20)
    resp.raise_for_status()

    summary = resp.json()["data"]
    df_sum = pd.DataFrame(summary)
    dfs.append(df_sum)

df_all = pd.concat(dfs, ignore_index=True)
print(len(df_all))
print(df_all["seasonId"].value_counts().sort_index())

22304
seasonId
20002001     884
20012002     874
20022003     888
20032004     916
20052006     870
20062007     858
20072008     852
20082009     885
20092010     879
20102011     891
20112012     894
20122013     839
20132014     886
20142015     882
20152016     898
20162017     888
20172018     890
20182019     906
20192020     883
20202021     913
20212022    1004
20222023     951
20232024     924
20242025     920
20252026     829
Name: count, dtype: int64


In [5]:
df_all.drop(columns=["lastName"], inplace=True)

df_all = df_all.rename(columns={col: util.to_snake(col) for col in df_all.columns})
df_all.info

<bound method DataFrame.info of        assists  ev_goals  ev_points  faceoff_win_pct  game_winning_goals  \
0           16         9         19          0.55102                   1   
1            7         2          8          0.40895                   0   
2            1         0          1              NaN                   0   
3            8         0          4              NaN                   0   
4            0         0          0          1.00000                   0   
...        ...       ...        ...              ...                 ...   
22299       12         5         13          0.37500                   2   
22300        0         0          0              NaN                   0   
22301        7         5         10          0.45132                   0   
22302        1         0          1              NaN                   0   
22303        1         0          0              NaN                   0   

       games_played  goals  ot_goals  penalty_minutes  

In [6]:
fs = project.get_feature_store() 

In [7]:
player_season_stats_fg = fs.get_or_create_feature_group(
    name='player_season_stats',
    description='Stats of players for each season',
    version=1,
    primary_key=['player_id','skater_full_name', 'season_id'],
    #event_time="date",
)

In [8]:
player_season_stats_fg.insert(df_all)

Uploading Dataframe: 100.00% |██████████| Rows 22304/22304 | Elapsed Time: 00:02 | Remaining Time: 00:00


Launching job: player_season_stats_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/player_season_stats_1_offline_fg_materialization/executions


(Job('player_season_stats_1_offline_fg_materialization', 'SPARK'), None)