In [1]:
import os
import sys
from pathlib import Path

# lägg till projektroten (mappen ovanför notebooks/) på sys.path
root_dir = Path().absolute()

if root_dir.parts[-1:] == ('notebooks',):
    root_dir = Path(*root_dir.parts[:-1])

root_dir = str(root_dir) 
print(f"Root dir: {root_dir}")
print("Local environment")

if root_dir not in sys.path:
    sys.path.append(root_dir)
    print(f"Added the following directory to the PYTHONPATH: {root_dir}")

Root dir: /Users/jacobbjareklint/Code/GitHub/hockey-agent
Local environment
Added the following directory to the PYTHONPATH: /Users/jacobbjareklint/Code/GitHub/hockey-agent


In [None]:
import hopsworks
import requests
import pandas as pd
from config import settings
import util

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
project = hopsworks.login(
    project=settings.HOPSWORKS_PROJECT,
    api_key_value=settings.HOPSWORKS_API_KEY,
    host = settings.HOPSWORKS_HOST
)

2025-12-19 10:49:54,905 INFO: Initializing external client
2025-12-19 10:49:54,906 INFO: Base URL: https://eu-west.cloud.hopsworks.ai:443
2025-12-19 10:49:56,214 INFO: Python Engine initialized.

Logged in to project, explore it here https://eu-west.cloud.hopsworks.ai:443/p/3193


In [None]:
import datetime
today = datetime.date.today()

In [None]:
STATS_BASE = settings.NHL_STATS_BASE_URL
season_id = util.get_season(today)
endpoiint = "en/team/summary"

url = f"{STATS_BASE}/{endpoiint}"
cayenne = f"gameTypeId=2 and seasonId={season_id}"

base_params = {
    "isAggregate": "false",
    "isGame": "false",
    "start": 0,
    "limit": -1,
    "cayenneExp": cayenne,
}

resp = requests.get(url, params=base_params, timeout=20)
resp.raise_for_status()

summary = resp.json()["data"]
df_sum = pd.DataFrame(summary)
df_sum["seasonId"] = season_id
print(df_sum.columns.tolist())

df_sum

['faceoffWinPct', 'gamesPlayed', 'goalsAgainst', 'goalsAgainstPerGame', 'goalsFor', 'goalsForPerGame', 'losses', 'otLosses', 'penaltyKillNetPct', 'penaltyKillPct', 'pointPct', 'points', 'powerPlayNetPct', 'powerPlayPct', 'regulationAndOtWins', 'seasonId', 'shotsAgainstPerGame', 'shotsForPerGame', 'teamFullName', 'teamId', 'teamShutouts', 'ties', 'wins', 'winsInRegulation', 'winsInShootout']


Unnamed: 0,faceoffWinPct,gamesPlayed,goalsAgainst,goalsAgainstPerGame,goalsFor,goalsForPerGame,losses,otLosses,penaltyKillNetPct,penaltyKillPct,...,seasonId,shotsAgainstPerGame,shotsForPerGame,teamFullName,teamId,teamShutouts,ties,wins,winsInRegulation,winsInShootout
0,0.501578,34,104,3.05882,96,2.82352,14,1,0.8,0.757895,...,20252026,28.58823,29.44117,New Jersey Devils,1,1,,19,12,3
1,0.516693,34,93,2.73529,99,2.91176,12,3,0.875,0.822917,...,20252026,28.14705,28.55882,New York Islanders,2,4,,19,13,3
2,0.541518,36,96,2.66666,90,2.5,15,4,0.838383,0.818182,...,20252026,27.97222,26.38888,New York Rangers,3,2,,17,11,1
3,0.494731,33,92,2.78787,94,2.84848,10,6,0.817307,0.817308,...,20252026,25.81818,24.90909,Philadelphia Flyers,4,0,,17,10,5
4,0.498863,33,101,3.0606,103,3.12121,10,9,0.821782,0.811882,...,20252026,28.60606,28.30303,Pittsburgh Penguins,5,3,,14,13,0
5,0.516739,35,109,3.11428,112,3.2,15,0,0.813953,0.806202,...,20252026,30.25714,26.91428,Boston Bruins,6,0,,20,14,2
6,0.438845,33,112,3.39393,101,3.0606,14,4,0.885416,0.854167,...,20252026,29.72727,28.33333,Buffalo Sabres,7,1,,15,11,2
7,0.496999,34,116,3.41176,108,3.17647,12,4,0.790909,0.772728,...,20252026,27.08823,25.44117,Montréal Canadiens,8,0,,18,11,2
8,0.564889,33,104,3.15151,102,3.0909,13,4,0.722222,0.711112,...,20252026,25.54545,27.54545,Ottawa Senators,9,1,,16,11,3
9,0.568025,33,107,3.24242,105,3.18181,13,5,0.855555,0.833334,...,20252026,31.24242,26.96969,Toronto Maple Leafs,10,1,,15,11,0


In [None]:
df_sum = df_sum.rename(columns={col: util.to_snake(col) for col in df_sum.columns})
print(df_sum.columns.tolist())
df_sum.info

['faceoff_win_pct', 'games_played', 'goals_against', 'goals_against_per_game', 'goals_for', 'goals_for_per_game', 'losses', 'ot_losses', 'penalty_kill_net_pct', 'penalty_kill_pct', 'point_pct', 'points', 'power_play_net_pct', 'power_play_pct', 'regulation_and_ot_wins', 'season_id', 'shots_against_per_game', 'shots_for_per_game', 'team_full_name', 'team_id', 'team_shutouts', 'ties', 'wins', 'wins_in_regulation', 'wins_in_shootout']


<bound method DataFrame.info of     faceoff_win_pct  games_played  goals_against  goals_against_per_game  \
0          0.501578            34            104                 3.05882   
1          0.516693            34             93                 2.73529   
2          0.541518            36             96                 2.66666   
3          0.494731            33             92                 2.78787   
4          0.498863            33            101                 3.06060   
5          0.516739            35            109                 3.11428   
6          0.438845            33            112                 3.39393   
7          0.496999            34            116                 3.41176   
8          0.564889            33            104                 3.15151   
9          0.568025            33            107                 3.24242   
10         0.496885            33             90                 2.72727   
11         0.469791            33            102        

In [7]:
float_cols = ['faceoff_win_pct', 'games_played', 'goals_against', 'goals_against_per_game', 'goals_for', 
              'goals_for_per_game', 'losses', 'ot_losses', 'penalty_kill_net_pct', 'penalty_kill_pct', 'point_pct',
                'points', 'power_play_net_pct', 'power_play_pct', 'regulation_and_ot_wins', 'shots_against_per_game', 
                'shots_for_per_game', 'team_id', 'team_shutouts', 'ties', 'wins', 
                'wins_in_regulation', 'wins_in_shootout']



df_sum[float_cols] = df_sum[float_cols].astype("float64")

In [8]:
fs = project.get_feature_store()

teams_fg = fs.get_feature_group(name = 'teams', version = 1,)

teams_fg.insert(df_sum)

Uploading Dataframe: 100.00% |██████████| Rows 32/32 | Elapsed Time: 00:00 | Remaining Time: 00:00


Launching job: teams_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://eu-west.cloud.hopsworks.ai:443/p/3193/jobs/named/teams_1_offline_fg_materialization/executions


(Job('teams_1_offline_fg_materialization', 'SPARK'), None)