In [1]:
import os
import sys
from pathlib import Path

# lägg till projektroten (mappen ovanför notebooks/) på sys.path
root_dir = Path().absolute()

if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])

root_dir = str(root_dir) 
print(f"Root dir: {root_dir}")
print("Local environment")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Root dir: /Users/jacobbjareklint/Code/GitHub/hockey-agent
Local environment
Added the following directory to the PYTHONPATH: /Users/jacobbjareklint/Code/GitHub/hockey-agent


In [None]:
import hopsworks
import requests
import pandas as pd
from config import settings
import util

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY,
    host = settings.HOPSWORKS_HOST
)

2025-12-18 11:08:45,020 INFO: Initializing external client
2025-12-18 11:08:45,021 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-18 11:08:45,819 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3193


In [None]:
import datetime
today = datetime.date.today()

In [None]:
STATS_BASE = settings.NHL_STATS_BASE_URL
season_id = util.get_season(today)
endpoiint = "en/skater/summary"

url = f"{STATS_BASE}/{endpoiint}"
cayenne = f"gameTypeId=2 and seasonId={season_id}"

base_params = {
    "isAggregate": "false",
    "isGame": "false",
    "start": 0,
    "limit": -1,
    "cayenneExp": cayenne,
}

resp = requests.get(url, params=base_params, timeout=20)
resp.raise_for_status()

summary = resp.json()["data"]
df_sum = pd.DataFrame(summary)
print(df_sum.columns.tolist())
df_sum

['assists', 'evGoals', 'evPoints', 'faceoffWinPct', 'gameWinningGoals', 'gamesPlayed', 'goals', 'lastName', 'otGoals', 'penaltyMinutes', 'playerId', 'plusMinus', 'points', 'pointsPerGame', 'positionCode', 'ppGoals', 'ppPoints', 'seasonId', 'shGoals', 'shPoints', 'shootingPct', 'shootsCatches', 'shots', 'skaterFullName', 'teamAbbrevs', 'timeOnIcePerGame']


Unnamed: 0,assists,evGoals,evPoints,faceoffWinPct,gameWinningGoals,gamesPlayed,goals,lastName,otGoals,penaltyMinutes,...,ppPoints,seasonId,shGoals,shPoints,shootingPct,shootsCatches,shots,skaterFullName,teamAbbrevs,timeOnIcePerGame
0,2,0,2,,0,12,0,Chisholm,0,2,...,0,20252026,0,0,0.00000,L,8,Declan Chisholm,WSH,887.0000
1,5,0,5,,0,33,0,Mikkola,0,35,...,0,20252026,0,0,0.00000,L,37,Niko Mikkola,FLA,1197.5757
2,16,7,15,0.00000,2,29,10,Marchenko,0,14,...,10,20252026,1,1,0.11363,R,88,Kirill Marchenko,CBJ,1144.2068
3,15,6,16,0.43478,3,33,8,Ehlers,1,6,...,7,20252026,0,0,0.09302,L,86,Nikolaj Ehlers,CAR,987.0000
4,0,3,3,0.00000,1,14,3,Smith,0,8,...,0,20252026,0,0,0.15789,L,19,Cole Smith,NSH,824.7142
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
817,6,4,8,0.46511,0,34,4,Bourque,0,4,...,2,20252026,0,0,0.07692,R,52,Mavrik Bourque,DAL,825.2352
818,16,13,26,0.53899,4,35,17,Larkin,1,24,...,7,20252026,0,0,0.15887,L,107,Dylan Larkin,DET,1232.1428
819,5,5,9,0.51724,1,34,5,Cotter,0,13,...,1,20252026,0,0,0.13513,L,37,Paul Cotter,NJD,748.3823
820,3,0,3,,0,18,0,Vaakanainen,0,10,...,0,20252026,0,0,0.00000,L,11,Urho Vaakanainen,NYR,852.7222


In [None]:
df_sum.drop(columns=["lastName"], inplace=True)

df_sum = df_sum.rename(columns={col: util.to_snake(col) for col in df_sum.columns})
df_sum.info

<bound method DataFrame.info of      assists  ev_goals  ev_points  faceoff_win_pct  game_winning_goals  \
0          2         0          2              NaN                   0   
1          5         0          5              NaN                   0   
2         16         7         15          0.00000                   2   
3         15         6         16          0.43478                   3   
4          0         3          3          0.00000                   1   
..       ...       ...        ...              ...                 ...   
817        6         4          8          0.46511                   0   
818       16        13         26          0.53899                   4   
819        5         5          9          0.51724                   1   
820        3         0          3              NaN                   0   
821        1         0          0              NaN                   0   

     games_played  goals  ot_goals  penalty_minutes  player_id  ...  \
0       

In [7]:
fs = project.get_feature_store()

player_stats_fg = fs.get_feature_group(name = 'player_season_stats', version = 1,)

player_stats_fg.insert(df_sum)



Uploading Dataframe: 100.00% |██████████| Rows 822/822 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: player_season_stats_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/player_season_stats_1_offline_fg_materialization/executions


(Job('player_season_stats_1_offline_fg_materialization', 'SPARK'), None)