In [1]:
import os
import sys
from pathlib import Path

# lägg till projektroten (mappen ovanför notebooks/) på sys.path
root_dir = Path().absolute()

if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])

root_dir = str(root_dir) 
print(f"Root dir: {root_dir}")
print("Local environment")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Root dir: c:\Users\Chris\hockey-agent
Local environment
Added the following directory to the PYTHONPATH: c:\Users\Chris\hockey-agent


In [2]:
import hopsworks
from config import settings
import requests
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY,
    host = settings.HOPSWORKS_HOST
)


2025-12-15 12:34:46,187 INFO: Initializing external client
2025-12-15 12:34:46,188 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-15 12:34:47,917 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3193


In [4]:
STATS_BASE = settings.NHL_STATS_BASE_URL
season_id = "20232024"
endpoiint = "en/skater/summary"

url = f"{STATS_BASE}/{endpoiint}"
cayenne = f"gameTypeId=2 and seasonId={season_id}"

base_params = {
    "isAggregate": "true",
    "isGame": "false",
    "start": 0,
    "limit": -1,
    "cayenneExp": cayenne,
}

resp = requests.get(url, params=base_params, timeout=20)
resp.raise_for_status()

summary = resp.json()["data"]
df_sum = pd.DataFrame(summary)
df_sum["season_id"] = int(season_id) 
print(df_sum.columns.tolist())
df_sum

['assists', 'evGoals', 'evPoints', 'faceoffWinPct', 'gameWinningGoals', 'gamesPlayed', 'goals', 'lastName', 'otGoals', 'penaltyMinutes', 'playerId', 'plusMinus', 'points', 'pointsPerGame', 'positionCode', 'ppGoals', 'ppPoints', 'shGoals', 'shPoints', 'shootingPct', 'shootsCatches', 'shots', 'skaterFullName', 'timeOnIcePerGame', 'season_id']


Unnamed: 0,assists,evGoals,evPoints,faceoffWinPct,gameWinningGoals,gamesPlayed,goals,lastName,otGoals,penaltyMinutes,...,ppGoals,ppPoints,shGoals,shPoints,shootingPct,shootsCatches,shots,skaterFullName,timeOnIcePerGame,season_id
0,2,0,2,0.44000,0,45,0,Groulx,0,22,...,0,0,0,0,0.00000,L,32,Bo Groulx,731.3111,20232024
1,1,2,3,0.50625,0,31,2,Hamblin,0,0,...,0,0,0,0,0.11111,L,18,James Hamblin,475.4838,20232024
2,2,0,2,,0,4,0,Lucic,0,2,...,0,0,0,0,0.00000,L,1,Milan Lucic,718.2500,20232024
3,47,18,43,0.37223,4,62,27,Hughes,2,12,...,9,31,0,0,0.09854,L,274,Jack Hughes,1258.4354,20232024
4,27,34,52,0.25000,5,82,42,Marchessault,2,40,...,8,17,0,0,0.15789,R,266,Jonathan Marchessault,1073.6951,20232024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
919,49,14,38,0.47967,3,80,18,Nugent-Hopkins,0,36,...,4,26,0,3,0.09836,L,183,Ryan Nugent-Hopkins,1177.4500,20232024
920,43,12,41,0.53906,1,82,26,O'Reilly,0,18,...,14,28,0,0,0.14444,L,180,Ryan O'Reilly,1185.9512,20232024
921,3,4,6,0.46031,1,25,4,Sgarbossa,0,2,...,0,1,0,0,0.14814,L,27,Michael Sgarbossa,658.2400,20232024
922,4,1,5,0.40000,0,18,1,Lycksell,0,6,...,0,0,0,0,0.03448,L,29,Olle Lycksell,596.7777,20232024


In [5]:
df_sum.drop(columns=["lastName"], inplace=True)
import re

def to_snake(name: str) -> str:
    # splitta CamelCase till snake_case
    s = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", name)
    s = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s)
    return s.lower()

df_sum = df_sum.rename(columns={col: to_snake(col) for col in df_sum.columns})
df_sum.info

<bound method DataFrame.info of      assists  ev_goals  ev_points  faceoff_win_pct  game_winning_goals  \
0          2         0          2          0.44000                   0   
1          1         2          3          0.50625                   0   
2          2         0          2              NaN                   0   
3         47        18         43          0.37223                   4   
4         27        34         52          0.25000                   5   
..       ...       ...        ...              ...                 ...   
919       49        14         38          0.47967                   3   
920       43        12         41          0.53906                   1   
921        3         4          6          0.46031                   1   
922        4         1          5          0.40000                   0   
923        2         0          2              NaN                   0   

     games_played  goals  ot_goals  penalty_minutes  player_id  ...  pp_goals  

In [6]:
fs = project.get_feature_store() 

In [7]:
player_season_stats_fg = fs.get_or_create_feature_group(
    name='player_season_stats',
    description='Stats of players for each season',
    version=1,
    primary_key=['player_id','skater_full_name', 'season_id'],
    #event_time="date",
)

In [8]:
player_season_stats_fg.insert(df_sum)

Uploading Dataframe: 100.00% |██████████| Rows 924/924 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: player_season_stats_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/player_season_stats_1_offline_fg_materialization/executions


(Job('player_season_stats_1_offline_fg_materialization', 'SPARK'), None)