### Testing access to the NHL API

In [1]:
import pandas as pd
from nhlpy.api.query.builder import QueryBuilder, QueryContext
from nhlpy.nhl_client import NHLClient
from nhlpy.api.query.filters.franchise import FranchiseQuery
from nhlpy.api.query.filters.shoot_catch import ShootCatchesQuery
from nhlpy.api.query.filters.draft import DraftQuery
from nhlpy.api.query.filters.season import SeasonQuery
from nhlpy.api.query.filters.game_type import GameTypeQuery
from nhlpy.api.query.filters.position import PositionQuery, PositionTypes
from nhlpy.api.query.filters.status import StatusQuery
from nhlpy.api.query.filters.opponent import OpponentQuery
from nhlpy.api.query.filters.home_road import HomeRoadQuery
from nhlpy.api.query.filters.experience import ExperienceQuery
from nhlpy.api.query.filters.decision import DecisionQuery

In [2]:
client = NHLClient(verbose=True)

In [3]:
print(client)

<nhlpy.nhl_client.NHLClient object at 0x0000018FC5443BF0>


## Team info prep

In [4]:
team_info = client.teams.teams_info(date="2024-10-31")

INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/standings/2024-10-31 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/standings/2024-10-31
INFO:httpx:HTTP Request: GET https://api.nhle.com/stats/rest/en/franchise "HTTP/1.1 200 OK"
INFO:root:API URL: https://api.nhle.com/stats/rest/en/franchise


In [5]:
df_team_info = pd.DataFrame(team_info)

In [6]:
df_team_info.head()

Unnamed: 0,conference,division,name,common_name,abbr,logo,franchise_id
0,"{'abbr': 'W', 'name': 'Western'}","{'abbr': 'C', 'name': 'Central'}",Winnipeg Jets,Jets,WPG,https://assets.nhle.com/logos/nhl/svg/WPG_ligh...,35
1,"{'abbr': 'E', 'name': 'Eastern'}","{'abbr': 'M', 'name': 'Metropolitan'}",New Jersey Devils,Devils,NJD,https://assets.nhle.com/logos/nhl/svg/NJD_ligh...,23
2,"{'abbr': 'W', 'name': 'Western'}","{'abbr': 'P', 'name': 'Pacific'}",Vegas Golden Knights,Golden Knights,VGK,https://assets.nhle.com/logos/nhl/svg/VGK_ligh...,38
3,"{'abbr': 'E', 'name': 'Eastern'}","{'abbr': 'A', 'name': 'Atlantic'}",Florida Panthers,Panthers,FLA,https://assets.nhle.com/logos/nhl/svg/FLA_ligh...,33
4,"{'abbr': 'E', 'name': 'Eastern'}","{'abbr': 'M', 'name': 'Metropolitan'}",Washington Capitals,Capitals,WSH,https://assets.nhle.com/logos/nhl/svg/WSH_seco...,24


Data in some fields is stored as a dictionary with abbreviated and full names, namely for Conference and Division.

We can write a function that would access a certain value under a given key:

In [7]:
df_team_info["conference"].apply(lambda x: x.get("name") if isinstance(x, dict) else x).unique()

array(['Western', 'Eastern'], dtype=object)

Now wrapping it under a function that accepts a column name and a dictionary key as arguments.

In [8]:
def get_default_value(column, get_value):
    return column.apply(lambda x: x.get(get_value) if isinstance(x, dict) else x)

Create a loop to go through columns we want to change.

In [9]:
columns_to_change = ["conference", "division"]

for column in columns_to_change:
    df_team_info[column] = get_default_value(df_team_info[column], "name")

In [10]:
df_team_info.head()

Unnamed: 0,conference,division,name,common_name,abbr,logo,franchise_id
0,Western,Central,Winnipeg Jets,Jets,WPG,https://assets.nhle.com/logos/nhl/svg/WPG_ligh...,35
1,Eastern,Metropolitan,New Jersey Devils,Devils,NJD,https://assets.nhle.com/logos/nhl/svg/NJD_ligh...,23
2,Western,Pacific,Vegas Golden Knights,Golden Knights,VGK,https://assets.nhle.com/logos/nhl/svg/VGK_ligh...,38
3,Eastern,Atlantic,Florida Panthers,Panthers,FLA,https://assets.nhle.com/logos/nhl/svg/FLA_ligh...,33
4,Eastern,Metropolitan,Washington Capitals,Capitals,WSH,https://assets.nhle.com/logos/nhl/svg/WSH_seco...,24


To make iteratation through teams easier, save it as a series. 

In [11]:
s_teams = df_team_info["abbr"]

In [12]:
s_teams.info()

<class 'pandas.core.series.Series'>
RangeIndex: 32 entries, 0 to 31
Series name: abbr
Non-Null Count  Dtype 
--------------  ----- 
32 non-null     object
dtypes: object(1)
memory usage: 388.0+ bytes


## Roster Data analysis

In roster table, we want to get players attributes for further filtering

In [13]:
# Select a season and a team to perform a query
season_id = 20242025
team_abbr = "COL"

In [14]:
roster_data = client.teams.roster(team_abbr=team_abbr, season=season_id)

INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/roster/COL/20242025 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/roster/COL/20242025


Let's inspect the object we received from the query.

In [15]:
roster_data

{'forwards': [{'id': 8479525,
   'headshot': 'https://assets.nhle.com/mugs/nhl/20242025/COL/8479525.png',
   'firstName': {'default': 'Ross'},
   'lastName': {'default': 'Colton'},
   'sweaterNumber': 20,
   'positionCode': 'C',
   'shootsCatches': 'L',
   'heightInInches': 72,
   'weightInPounds': 194,
   'heightInCentimeters': 183,
   'weightInKilograms': 88,
   'birthDate': '1996-09-11',
   'birthCity': {'default': 'Robbinsville'},
   'birthCountry': 'USA',
   'birthStateProvince': {'default': 'NJ'}},
  {'id': 8477494,
   'headshot': 'https://assets.nhle.com/mugs/nhl/20242025/COL/8477494.png',
   'firstName': {'default': 'Jonathan'},
   'lastName': {'default': 'Drouin'},
   'sweaterNumber': 27,
   'positionCode': 'L',
   'shootsCatches': 'L',
   'heightInInches': 71,
   'weightInPounds': 198,
   'heightInCentimeters': 180,
   'weightInKilograms': 90,
   'birthDate': '1995-03-28',
   'birthCity': {'default': 'Ste-Agathe'},
   'birthCountry': 'CAN',
   'birthStateProvince': {'default'

The returned dictionary contains positions as keys and list of dictionaries with individual players' info as values. Let's inspect the goalies record. 

In [16]:
roster_data["goalies"]

[{'id': 8481020,
  'headshot': 'https://assets.nhle.com/mugs/nhl/20242025/COL/8481020.png',
  'firstName': {'default': 'Justus'},
  'lastName': {'default': 'Annunen'},
  'sweaterNumber': 60,
  'positionCode': 'G',
  'shootsCatches': 'L',
  'heightInInches': 76,
  'weightInPounds': 210,
  'heightInCentimeters': 193,
  'weightInKilograms': 95,
  'birthDate': '2000-03-11',
  'birthCity': {'default': 'Kempele'},
  'birthCountry': 'FIN'},
 {'id': 8480382,
  'headshot': 'https://assets.nhle.com/mugs/nhl/20242025/COL/8480382.png',
  'firstName': {'default': 'Alexandar',
   'cs': 'Alexandr',
   'fi': 'Aleksandar',
   'sk': 'Alexander'},
  'lastName': {'default': 'Georgiev',
   'cs': 'Georgijev',
   'fi': 'Georgijev',
   'sk': 'Georgijev'},
  'sweaterNumber': 40,
  'positionCode': 'G',
  'shootsCatches': 'L',
  'heightInInches': 73,
  'weightInPounds': 178,
  'heightInCentimeters': 185,
  'weightInKilograms': 81,
  'birthDate': '1996-02-10',
  'birthCity': {'default': 'Ruse'},
  'birthCountry':

We can combine all three values for defensemen, forwards and goales in a list and convert that into a dataframe for cleaning and analysis.

In [17]:
roster_data_combined = roster_data["forwards"] + roster_data["defensemen"] + roster_data ["goalies"]

In [18]:
df_team_roster = pd.DataFrame(roster_data_combined)

In [19]:
df_team_roster.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   id                   24 non-null     int64 
 1   headshot             24 non-null     object
 2   firstName            24 non-null     object
 3   lastName             24 non-null     object
 4   sweaterNumber        24 non-null     int64 
 5   positionCode         24 non-null     object
 6   shootsCatches        24 non-null     object
 7   heightInInches       24 non-null     int64 
 8   weightInPounds       24 non-null     int64 
 9   heightInCentimeters  24 non-null     int64 
 10  weightInKilograms    24 non-null     int64 
 11  birthDate            24 non-null     object
 12  birthCity            24 non-null     object
 13  birthCountry         24 non-null     object
 14  birthStateProvince   15 non-null     object
dtypes: int64(6), object(9)
memory usage: 2.9+ KB


In [20]:
df_team_roster.head()

Unnamed: 0,id,headshot,firstName,lastName,sweaterNumber,positionCode,shootsCatches,heightInInches,weightInPounds,heightInCentimeters,weightInKilograms,birthDate,birthCity,birthCountry,birthStateProvince
0,8479525,https://assets.nhle.com/mugs/nhl/20242025/COL/...,{'default': 'Ross'},{'default': 'Colton'},20,C,L,72,194,183,88,1996-09-11,{'default': 'Robbinsville'},USA,{'default': 'NJ'}
1,8477494,https://assets.nhle.com/mugs/nhl/20242025/COL/...,{'default': 'Jonathan'},{'default': 'Drouin'},27,L,L,71,198,180,90,1995-03-28,{'default': 'Ste-Agathe'},CAN,{'default': 'QC'}
2,8483930,https://assets.nhle.com/mugs/nhl/20242025/COL/...,{'default': 'Ivan'},{'default': 'Ivan'},82,C,L,72,190,183,86,2002-08-20,{'default': 'Ostrava'},CZE,
3,8480448,https://assets.nhle.com/mugs/nhl/20242025/COL/...,{'default': 'Parker'},{'default': 'Kelly'},17,C,L,73,185,185,84,1999-05-14,{'default': 'Camrose'},CAN,{'default': 'AB'}
4,8481641,https://assets.nhle.com/mugs/nhl/20242025/COL/...,{'default': 'Joel'},{'default': 'Kiviranta'},94,L,L,71,185,180,84,1996-03-23,"{'default': 'Vantaa', 'sv': 'Vanda'}",FIN,


Data in some of the fields is represented as dictionaries with information like First and Last Names and Birth City can be presented in multiple languages. Let's inspect them to understand how we can transform the data to make analysis easy.   

In [21]:
df_team_roster.loc[ : ,["firstName", "lastName", "birthCity", "birthStateProvince", "birthCountry"]]

Unnamed: 0,firstName,lastName,birthCity,birthStateProvince,birthCountry
0,{'default': 'Ross'},{'default': 'Colton'},{'default': 'Robbinsville'},{'default': 'NJ'},USA
1,{'default': 'Jonathan'},{'default': 'Drouin'},{'default': 'Ste-Agathe'},{'default': 'QC'},CAN
2,{'default': 'Ivan'},{'default': 'Ivan'},{'default': 'Ostrava'},,CZE
3,{'default': 'Parker'},{'default': 'Kelly'},{'default': 'Camrose'},{'default': 'AB'},CAN
4,{'default': 'Joel'},{'default': 'Kiviranta'},"{'default': 'Vantaa', 'sv': 'Vanda'}",,FIN
5,{'default': 'Nikolai'},{'default': 'Kovalenko'},{'default': 'Raleigh'},{'default': 'NC'},USA
6,{'default': 'Artturi'},{'default': 'Lehkonen'},{'default': 'Piikkio'},,FIN
7,{'default': 'Nathan'},{'default': 'MacKinnon'},{'default': 'Halifax'},{'default': 'NS'},CAN
8,{'default': 'Casey'},{'default': 'Mittelstadt'},{'default': 'Eden Prairie'},{'default': 'MN'},USA
9,"{'default': 'Valeri', 'cs': 'Valerij', 'sk': '...","{'default': 'Nichushkin', 'cs': 'Ničuškin', 'f...","{'default': 'Chelyabinsk', 'cs': 'Čeljabinsk',...",,RUS


We can reuse the function we created earlier to extract values under "default" key, which is English spellings.

In [22]:
# Test the function on the Last Name column
get_default_value(df_team_roster["lastName"], "default").tail(10)


14       Girard
15    Kylington
16       Ludvig
17        Makar
18     Malinski
19       Manson
20        Toews
21      Annunen
22     Georgiev
23        Miner
Name: lastName, dtype: object

Now in the dataframe, we apply the function to the fields we want to be cleaned.

In [23]:
columns_to_change = ["firstName", "lastName", "birthCity", "birthStateProvince"]

for column in columns_to_change:
    df_team_roster[column] = get_default_value(df_team_roster[column], "default")

In [24]:
df_team_roster.loc[df_team_roster["lastName"] == "Georgiev"].T

Unnamed: 0,22
id,8480382
headshot,https://assets.nhle.com/mugs/nhl/20242025/COL/...
firstName,Alexandar
lastName,Georgiev
sweaterNumber,40
positionCode,G
shootsCatches,L
heightInInches,73
weightInPounds,178
heightInCentimeters,185


For the purpose of our project we don't need certain fields such as "birthCity", "birthStateProvince", "weightInPounds" etc.

In [25]:
df_team_roster.drop(
    columns=["heightInInches", "weightInPounds", "birthCity", "birthStateProvince"],
    axis=1,
    inplace=True
)

In [26]:
df_team_roster.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   id                   24 non-null     int64 
 1   headshot             24 non-null     object
 2   firstName            24 non-null     object
 3   lastName             24 non-null     object
 4   sweaterNumber        24 non-null     int64 
 5   positionCode         24 non-null     object
 6   shootsCatches        24 non-null     object
 7   heightInCentimeters  24 non-null     int64 
 8   weightInKilograms    24 non-null     int64 
 9   birthDate            24 non-null     object
 10  birthCountry         24 non-null     object
dtypes: int64(4), object(7)
memory usage: 2.2+ KB


To be able to repeatedly clean data for all franchises, we create a function that will be reused in a loop.

In [27]:
def get_team_roster(team_abbr, season_id):
    
    # Load data
    data = client.teams.roster(team_abbr=team_abbr, season=season_id)
    data_combined = data["forwards"] + data["defensemen"] + data ["goalies"]
    df = pd.DataFrame(data_combined)

    if df.shape[1] > 0:
        # Clean columns
        columns_to_change = ["firstName", "lastName", "birthCity", "birthStateProvince"]
        for column in columns_to_change:
            df[column] = get_default_value(df[column], "default")

        # Drop columns
        df.drop(
            columns=["heightInInches", "weightInPounds", "birthCity", "birthStateProvince"],
            axis=1,
            inplace=True
        )

    return df

In [28]:
get_team_roster("DAL", "20242025").head()

INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/roster/DAL/20242025 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/roster/DAL/20242025


Unnamed: 0,id,headshot,firstName,lastName,sweaterNumber,positionCode,shootsCatches,heightInCentimeters,weightInKilograms,birthDate,birthCountry
0,8473994,https://assets.nhle.com/mugs/nhl/20242025/DAL/...,Jamie,Benn,14,L,L,191,93,1989-07-18,CAN
1,8476278,https://assets.nhle.com/mugs/nhl/20242025/DAL/...,Colin,Blackwell,15,C,R,173,86,1993-03-28,USA
2,8482145,https://assets.nhle.com/mugs/nhl/20242025/DAL/...,Mavrik,Bourque,22,C,R,180,82,2002-01-08,CAN
3,8480840,https://assets.nhle.com/mugs/nhl/20242025/DAL/...,Oskar,Bäck,10,C,L,193,92,2000-03-12,SWE
4,8474149,https://assets.nhle.com/mugs/nhl/20242025/DAL/...,Evgenii,Dadonov,63,R,L,180,85,1989-03-12,RUS


In [29]:
season_id

20242025

In [30]:
list_of_roster_dfs = []

for index, team in s_teams.items():
    df = get_team_roster(team, season_id)
    list_of_roster_dfs.append(df)


INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/roster/WPG/20242025 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/roster/WPG/20242025
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/roster/NJD/20242025 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/roster/NJD/20242025
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/roster/VGK/20242025 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/roster/VGK/20242025
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/roster/FLA/20242025 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/roster/FLA/20242025
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/roster/WSH/20242025 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/roster/WSH/20242025
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/roster/DAL/20242025 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/roster/DAL/20242025
INFO:httpx:HTTP Request: GET

In [31]:
list_of_roster_dfs

[         id                                           headshot  firstName  \
 0   8478891  https://assets.nhle.com/mugs/nhl/20242025/WPG/...      Mason   
 1   8480289  https://assets.nhle.com/mugs/nhl/20242025/WPG/...     Morgan   
 2   8478398  https://assets.nhle.com/mugs/nhl/20242025/WPG/...       Kyle   
 3   8477940  https://assets.nhle.com/mugs/nhl/20242025/WPG/...    Nikolaj   
 4   8481019  https://assets.nhle.com/mugs/nhl/20242025/WPG/...      David   
 5   8480113  https://assets.nhle.com/mugs/nhl/20242025/WPG/...       Alex   
 6   8480845  https://assets.nhle.com/mugs/nhl/20242025/WPG/...     Rasmus   
 7   8476392  https://assets.nhle.com/mugs/nhl/20242025/WPG/...       Adam   
 8   8476480  https://assets.nhle.com/mugs/nhl/20242025/WPG/...  Vladislav   
 9   8475799  https://assets.nhle.com/mugs/nhl/20242025/WPG/...       Nino   
 10  8482149  https://assets.nhle.com/mugs/nhl/20242025/WPG/...       Cole   
 11  8476460  https://assets.nhle.com/mugs/nhl/20242025/WPG/... 

In [82]:
df_team_roster_combined = pd.concat(list_of_roster_dfs)

In [83]:
df_team_roster_combined.info() 

<class 'pandas.core.frame.DataFrame'>
Index: 766 entries, 0 to 21
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   id                   766 non-null    int64 
 1   headshot             766 non-null    object
 2   firstName            766 non-null    object
 3   lastName             766 non-null    object
 4   sweaterNumber        766 non-null    int64 
 5   positionCode         766 non-null    object
 6   shootsCatches        766 non-null    object
 7   heightInCentimeters  766 non-null    int64 
 8   weightInKilograms    766 non-null    int64 
 9   birthDate            766 non-null    object
 10  birthCountry         766 non-null    object
dtypes: int64(4), object(7)
memory usage: 71.8+ KB


In [84]:
df_team_roster_combined.loc[df_team_roster_combined["heightInCentimeters"] == df_team_roster_combined["heightInCentimeters"].max()]

Unnamed: 0,id,headshot,firstName,lastName,sweaterNumber,positionCode,shootsCatches,heightInCentimeters,weightInKilograms,birthDate,birthCountry
19,8474574,https://assets.nhle.com/mugs/nhl/20242025/VAN/...,Tyler,Myers,57,D,R,203,104,1990-02-01,USA
6,8483609,https://assets.nhle.com/mugs/nhl/20242025/CGY/...,Adam,Klapka,43,R,R,203,107,2000-09-14,CZE


In [85]:
df_team_roster_combined.loc[df_team_roster_combined["id"] == 8479639].T

Unnamed: 0,13
id,8479639
headshot,https://assets.nhle.com/mugs/nhl/20242025/WPG/...
firstName,Dylan
lastName,Coghlan
sweaterNumber,52
positionCode,D
shootsCatches,R
heightInCentimeters,188
weightInKilograms,94
birthDate,1998-02-19


In [86]:
df_team_roster_combined['positionCode'].unique()

array(['C', 'L', 'R', 'D', 'G'], dtype=object)

In [87]:
s_skater_ids = df_team_roster_combined.loc[df_team_roster_combined['positionCode'] != "G"]['id']

In [38]:
s_skater_ids.info()

s_skater_ids.head()

<class 'pandas.core.series.Series'>
Index: 696 entries, 0 to 19
Series name: id
Non-Null Count  Dtype
--------------  -----
696 non-null    int64
dtypes: int64(1)
memory usage: 10.9 KB


0    8478891
1    8480289
2    8478398
3    8477940
4    8481019
Name: id, dtype: int64

In [88]:
s_goalie_ids = df_team_roster_combined.loc[df_team_roster_combined['positionCode'] == "G"]['id']

In [89]:
s_goalie_ids.info()

s_goalie_ids.head()

<class 'pandas.core.series.Series'>
Index: 70 entries, 21 to 21
Series name: id
Non-Null Count  Dtype
--------------  -----
70 non-null     int64
dtypes: int64(1)
memory usage: 1.1 KB


21    8477480
22    8476945
22    8474596
23    8474593
21    8478499
Name: id, dtype: int64

## Game logs data prep

Game logs method requires player id, season id and game type arguments.
We can reuse the season id we declared earlier, while declaring the player id (Nathan MacKinnon) and game type (Regular Season) below.

In [41]:
player_id = 8477492
game_type = 2

In [42]:
game_logs_data = client.stats.player_game_log(player_id=player_id, season_id=season_id, game_type=game_type)

INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8477492/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8477492/game-log/20242025/2


In [43]:
df_game_logs = pd.DataFrame(game_logs_data)

In [44]:
df_game_logs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 21 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   gameId              18 non-null     int64 
 1   teamAbbrev          18 non-null     object
 2   homeRoadFlag        18 non-null     object
 3   gameDate            18 non-null     object
 4   goals               18 non-null     int64 
 5   assists             18 non-null     int64 
 6   commonName          18 non-null     object
 7   opponentCommonName  18 non-null     object
 8   points              18 non-null     int64 
 9   plusMinus           18 non-null     int64 
 10  powerPlayGoals      18 non-null     int64 
 11  powerPlayPoints     18 non-null     int64 
 12  gameWinningGoals    18 non-null     int64 
 13  otGoals             18 non-null     int64 
 14  shots               18 non-null     int64 
 15  shifts              18 non-null     int64 
 16  shorthandedGoals    18 non-n

Inspecting the most recent record and transposing for better readability.

In [45]:
df_game_logs.head(1).T

Unnamed: 0,0
gameId,2024020268
teamAbbrev,COL
homeRoadFlag,H
gameDate,2024-11-15
goals,0
assists,0
commonName,{'default': 'Avalanche'}
opponentCommonName,{'default': 'Capitals'}
points,0
plusMinus,-2


There are a few columns we can drop as they are not required / could be found in the team info table.

In [46]:
df_game_logs.drop(columns=["commonName", "opponentCommonName"], inplace=True)

For TOI (Time on Ice) field, we convert the values to seconds to allow easier aggregations across environments.

In [47]:
# Check if the TOI is in MM:SS format
if len(df_game_logs["toi"][0].split(":")) == 2:
    print(int(df_game_logs["toi"][0].split(":")[0]) * 60 + int(df_game_logs["toi"][0].split(":")[1]))
else:
    print(int(df_game_logs["toi"][0].split(":")[0]) * 60 * 60 + int(df_game_logs["toi"][0].split(":")[1]) * 60 + int(df_game_logs["toi"][0].split(":")[2]))

1326


Translate the above if-else into a lambda function that can be applied to all rows

In [48]:
df_game_logs["toi"].apply(
    lambda x: 
        int(x.split(":")[0]) * 60 + int(x.split(":")[1])
        if len(x.split(":")) == 2
        else int(x.split(":")[0]) * 60 * 60 + int(x.split(":")[1]) * 60 + int(x.split(":")[2])
    )

0     1326
1     1485
2     1469
3     1579
4     1469
5     1461
6     1352
7     1498
8     1497
9     1394
10    1257
11    1311
12    1200
13    1428
14    1428
15    1380
16    1403
17    1293
Name: toi, dtype: int64

In [49]:
df_game_logs["toi"] = df_game_logs["toi"].apply(
    lambda x: 
        int(x.split(":")[0]) * 60 + int(x.split(":")[1])
        if len(x.split(":")) == 2
        else int(x.split(":")[0]) * 60 * 60 + int(x.split(":")[1]) * 60 + int(x.split(":")[2])
    )

As we are going to loop through players in each franchise, we assign player id and season id for further analysis as they aren't included.

In [50]:
df_game_logs["playerId"] = player_id
df_game_logs["seasonId"] = season_id

In [51]:
df_game_logs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   gameId             18 non-null     int64 
 1   teamAbbrev         18 non-null     object
 2   homeRoadFlag       18 non-null     object
 3   gameDate           18 non-null     object
 4   goals              18 non-null     int64 
 5   assists            18 non-null     int64 
 6   points             18 non-null     int64 
 7   plusMinus          18 non-null     int64 
 8   powerPlayGoals     18 non-null     int64 
 9   powerPlayPoints    18 non-null     int64 
 10  gameWinningGoals   18 non-null     int64 
 11  otGoals            18 non-null     int64 
 12  shots              18 non-null     int64 
 13  shifts             18 non-null     int64 
 14  shorthandedGoals   18 non-null     int64 
 15  shorthandedPoints  18 non-null     int64 
 16  opponentAbbrev     18 non-null     object
 17 

Create a function to load and clean game logs data.

In [52]:
def get_game_logs(player_id, season_id, game_type):
    
    # Get the data
    data = client.stats.player_game_log(player_id=player_id, season_id=season_id, game_type=game_type)
    df = pd.DataFrame(data)
    if df.shape[1] > 0:
        # Drop columns
        df.drop(columns=["commonName", "opponentCommonName"], inplace=True)
        
        # Format time on ice
        df["toiInSeconds"] = df["toi"].apply(
            lambda x: 
                int(x.split(":")[0]) * 60 + int(x.split(":")[1]) 
                if len(x.split(":")) == 2 
                else int(x.split(":")[0]) * 60 * 60 + int(x.split(":")[1]) * 60 + int(x.split(":")[2]))
        df.drop(columns=["toi"], inplace=True)

        # Append player and season ids
        df["playerId"] = player_id
        df["seasonId"] = season_id

    return df

In [53]:
get_game_logs(player_id, season_id, game_type)

INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8477492/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8477492/game-log/20242025/2


Unnamed: 0,gameId,teamAbbrev,homeRoadFlag,gameDate,goals,assists,points,plusMinus,powerPlayGoals,powerPlayPoints,...,otGoals,shots,shifts,shorthandedGoals,shorthandedPoints,opponentAbbrev,pim,toiInSeconds,playerId,seasonId
0,2024020268,COL,H,2024-11-15,0,0,0,-2,0,0,...,0,0,24,0,0,WSH,0,1326,8477492,20242025
1,2024020255,COL,H,2024-11-13,0,3,3,3,0,0,...,0,3,27,0,0,LAK,0,1485,8477492,20242025
2,2024020243,COL,H,2024-11-11,0,1,1,1,0,0,...,0,5,30,0,0,NSH,0,1469,8477492,20242025
3,2024020233,COL,H,2024-11-09,1,3,4,3,0,1,...,0,7,28,0,0,CAR,0,1579,8477492,20242025
4,2024020217,COL,R,2024-11-07,0,0,0,0,0,0,...,0,4,27,0,0,WPG,0,1469,8477492,20242025
5,2024020202,COL,H,2024-11-05,0,5,5,3,0,2,...,0,4,24,0,0,SEA,0,1461,8477492,20242025
6,2024020182,COL,R,2024-11-02,1,1,2,-1,0,1,...,0,6,29,0,0,NSH,0,1352,8477492,20242025
7,2024020157,COL,H,2024-10-30,0,2,2,0,0,1,...,0,2,25,0,0,TBL,0,1498,8477492,20242025
8,2024020144,COL,H,2024-10-28,1,0,1,-4,1,1,...,0,1,25,0,0,CHI,0,1497,8477492,20242025
9,2024020139,COL,H,2024-10-27,1,1,2,1,0,0,...,0,5,27,0,0,OTT,0,1394,8477492,20242025


Goalies game logs stats are returned in a different format to skaters.

In [54]:
goalie_player_id = 8477480

In [55]:
game_logs_data = client.stats.player_game_log(player_id=goalie_player_id, season_id=season_id, game_type=game_type)

INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8477480/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8477480/game-log/20242025/2


In [56]:
df_game_logs_goalies = pd.DataFrame(game_logs_data)

In [57]:
df_game_logs_goalies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 17 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   gameId              4 non-null      int64  
 1   teamAbbrev          4 non-null      object 
 2   homeRoadFlag        4 non-null      object 
 3   gameDate            4 non-null      object 
 4   goals               4 non-null      int64  
 5   assists             4 non-null      int64  
 6   commonName          4 non-null      object 
 7   opponentCommonName  4 non-null      object 
 8   gamesStarted        4 non-null      int64  
 9   decision            4 non-null      object 
 10  shotsAgainst        4 non-null      int64  
 11  goalsAgainst        4 non-null      int64  
 12  savePctg            4 non-null      float64
 13  shutouts            4 non-null      int64  
 14  opponentAbbrev      4 non-null      object 
 15  pim                 4 non-null      int64  
 16  toi         

In [58]:
df_game_logs_goalies.drop(columns=["commonName", "opponentCommonName"], inplace=True)

In [59]:
df_game_logs_goalies["playerId"] = goalie_player_id
df_game_logs_goalies["seasonId"] = season_id

In [60]:
df_game_logs_goalies.head().T

Unnamed: 0,0,1,2,3
gameId,2024020259,2024020171,2024020129,2024020088
teamAbbrev,WPG,WPG,WPG,WPG
homeRoadFlag,R,R,R,H
gameDate,2024-11-14,2024-11-01,2024-10-26,2024-10-20
goals,0,0,0,0
assists,0,0,0,0
gamesStarted,1,1,1,1
decision,L,W,W,W
shotsAgainst,28,22,29,42
goalsAgainst,3,2,3,3


In [61]:
get_game_logs(goalie_player_id, season_id, game_type)

INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8477480/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8477480/game-log/20242025/2


Unnamed: 0,gameId,teamAbbrev,homeRoadFlag,gameDate,goals,assists,gamesStarted,decision,shotsAgainst,goalsAgainst,savePctg,shutouts,opponentAbbrev,pim,toiInSeconds,playerId,seasonId
0,2024020259,WPG,R,2024-11-14,0,0,1,L,28,3,0.892857,0,TBL,0,3478,8477480,20242025
1,2024020171,WPG,R,2024-11-01,0,0,1,W,22,2,0.909091,0,CBJ,0,3600,8477480,20242025
2,2024020129,WPG,R,2024-10-26,0,0,1,W,29,3,0.896552,0,CGY,0,3600,8477480,20242025
3,2024020088,WPG,H,2024-10-20,0,0,1,W,42,3,0.928571,0,PIT,0,3593,8477480,20242025


In [62]:
season_id

20242025

In [63]:
game_type

2

In [73]:
import time
import httpx

In [75]:
list_of_skaters_game_logs_dfs = []

for index, player in s_skater_ids.items():
    try:
        df = get_game_logs(player, season_id, game_type)
        list_of_skaters_game_logs_dfs.append(df)
        time.sleep(0.5)
    except httpx.RequestError as e:
        print(f"request failed for player {player}: {e}")

INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8478891/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8478891/game-log/20242025/2
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8480289/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8480289/game-log/20242025/2
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8478398/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8478398/game-log/20242025/2
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8477940/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8477940/game-log/20242025/2
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8481019/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8481019/game-log/20242025/2
INFO:httpx:HTTP Request: GET https://api

In [71]:
list_of_goalie_game_logs_dfs = []

for index, player in s_goalie_ids.items():
    df = get_game_logs(player, season_id, game_type)
    list_of_goalie_game_logs_dfs.append(df)

INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8477480/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8477480/game-log/20242025/2
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8476945/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8476945/game-log/20242025/2
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8474596/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8474596/game-log/20242025/2
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8474593/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8474593/game-log/20242025/2
INFO:httpx:HTTP Request: GET https://api-web.nhle.com/v1/player/8478499/game-log/20242025/2 "HTTP/1.1 200 OK"
INFO:root:API URL: https://api-web.nhle.com/v1/player/8478499/game-log/20242025/2
INFO:httpx:HTTP Request: GET https://api

In [76]:
df_game_logs_goalies = pd.concat(list_of_goalie_game_logs_dfs)

In [77]:
df_game_logs_skaters = pd.concat(list_of_skaters_game_logs_dfs)

In [78]:
df_game_logs_skaters.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9462 entries, 0 to 16
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   gameId             9462 non-null   int64 
 1   teamAbbrev         9462 non-null   object
 2   homeRoadFlag       9462 non-null   object
 3   gameDate           9462 non-null   object
 4   goals              9462 non-null   int64 
 5   assists            9462 non-null   int64 
 6   points             9462 non-null   int64 
 7   plusMinus          9462 non-null   int64 
 8   powerPlayGoals     9462 non-null   int64 
 9   powerPlayPoints    9462 non-null   int64 
 10  gameWinningGoals   9462 non-null   int64 
 11  otGoals            9462 non-null   int64 
 12  shots              9462 non-null   int64 
 13  shifts             9462 non-null   int64 
 14  shorthandedGoals   9462 non-null   int64 
 15  shorthandedPoints  9462 non-null   int64 
 16  opponentAbbrev     9462 non-null   object
 17  pi

In [79]:
df_game_logs_skaters.describe()

Unnamed: 0,gameId,goals,assists,points,plusMinus,powerPlayGoals,powerPlayPoints,gameWinningGoals,otGoals,shots,shifts,shorthandedGoals,shorthandedPoints,pim,toiInSeconds,playerId,seasonId
count,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0,9462.0
mean,2024020000.0,0.175439,0.294547,0.469985,-0.004016,0.035722,0.104312,0.02695,0.00465,1.63866,20.963116,0.003805,0.007187,0.475269,995.699535,8478738.0,20242025.0
std,78.19471,0.430913,0.57261,0.7324,1.212705,0.19396,0.346215,0.161946,0.068037,1.518013,4.795425,0.061568,0.084473,1.279538,268.303023,2741.93,0.0
min,2024020000.0,0.0,0.0,0.0,-5.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,34.0,8470600.0,20242025.0
25%,2024020000.0,0.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,807.0,8476874.0,20242025.0
50%,2024020000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,21.0,0.0,0.0,0.0,989.0,8478483.0,20242025.0
75%,2024020000.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0,24.0,0.0,0.0,0.0,1186.75,8480891.0,20242025.0
max,2024020000.0,3.0,5.0,5.0,5.0,2.0,3.0,1.0,1.0,12.0,40.0,1.0,1.0,20.0,1986.0,8484958.0,20242025.0


In [80]:
df_game_logs_goalies.info()

<class 'pandas.core.frame.DataFrame'>
Index: 553 entries, 0 to 3
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   gameId          553 non-null    int64  
 1   teamAbbrev      553 non-null    object 
 2   homeRoadFlag    553 non-null    object 
 3   gameDate        553 non-null    object 
 4   goals           553 non-null    int64  
 5   assists         553 non-null    int64  
 6   gamesStarted    553 non-null    int64  
 7   decision        526 non-null    object 
 8   shotsAgainst    553 non-null    int64  
 9   goalsAgainst    553 non-null    int64  
 10  savePctg        553 non-null    float64
 11  shutouts        553 non-null    int64  
 12  opponentAbbrev  553 non-null    object 
 13  pim             553 non-null    int64  
 14  toiInSeconds    553 non-null    int64  
 15  playerId        553 non-null    int64  
 16  seasonId        553 non-null    int64  
dtypes: float64(1), int64(11), object(5)
memory

In [81]:
df_game_logs_goalies.describe()

Unnamed: 0,gameId,goals,assists,gamesStarted,shotsAgainst,goalsAgainst,savePctg,shutouts,pim,toiInSeconds,playerId,seasonId
count,553.0,553.0,553.0,553.0,553.0,553.0,553.0,553.0,553.0,553.0,553.0,553.0
mean,2024020000.0,0.001808,0.018083,0.954792,27.481013,2.723327,0.891633,0.061483,0.025316,3424.417722,8478328.0,20242025.0
std,78.32131,0.042524,0.133373,0.207948,7.975512,1.594073,0.081112,0.240431,0.223791,588.219477,2244.951,0.0
min,2024020000.0,0.0,0.0,0.0,2.0,0.0,0.4,0.0,0.0,286.0,8470594.0,20242025.0
25%,2024020000.0,0.0,0.0,1.0,23.0,2.0,0.862069,0.0,0.0,3501.0,8476914.0,20242025.0
50%,2024020000.0,0.0,0.0,1.0,27.0,3.0,0.90625,0.0,0.0,3596.0,8478470.0,20242025.0
75%,2024020000.0,0.0,0.0,1.0,33.0,4.0,0.941176,0.0,0.0,3600.0,8480045.0,20242025.0
max,2024020000.0,1.0,1.0,1.0,50.0,8.0,1.0,1.0,2.0,3900.0,8482821.0,20242025.0


## Joining tables into an analytics ready state

We want to join game logs data with player attributes from the team roster dataframe. The output will allow us to perform more advances querying with player attributes in the same view as performance. 

In [93]:
df_skaters_performance = df_game_logs_skaters.merge(df_team_roster_combined, how="left", left_on="playerId", right_on="id")

df_goalies_performance = df_game_logs_goalies.merge(df_team_roster_combined, how="left", left_on="playerId", right_on="id")

In [91]:
df_skaters_performance.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9462 entries, 0 to 9461
Data columns (total 32 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   gameId               9462 non-null   int64 
 1   teamAbbrev           9462 non-null   object
 2   homeRoadFlag         9462 non-null   object
 3   gameDate             9462 non-null   object
 4   goals                9462 non-null   int64 
 5   assists              9462 non-null   int64 
 6   points               9462 non-null   int64 
 7   plusMinus            9462 non-null   int64 
 8   powerPlayGoals       9462 non-null   int64 
 9   powerPlayPoints      9462 non-null   int64 
 10  gameWinningGoals     9462 non-null   int64 
 11  otGoals              9462 non-null   int64 
 12  shots                9462 non-null   int64 
 13  shifts               9462 non-null   int64 
 14  shorthandedGoals     9462 non-null   int64 
 15  shorthandedPoints    9462 non-null   int64 
 16  oppone

In [92]:
df_skaters_performance.head(1).T

Unnamed: 0,0
gameId,2024020259
teamAbbrev,WPG
homeRoadFlag,R
gameDate,2024-11-14
goals,0
assists,0
points,0
plusMinus,1
powerPlayGoals,0
powerPlayPoints,0


In [94]:
df_goalies_performance.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 553 entries, 0 to 552
Data columns (total 28 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   gameId               553 non-null    int64  
 1   teamAbbrev           553 non-null    object 
 2   homeRoadFlag         553 non-null    object 
 3   gameDate             553 non-null    object 
 4   goals                553 non-null    int64  
 5   assists              553 non-null    int64  
 6   gamesStarted         553 non-null    int64  
 7   decision             526 non-null    object 
 8   shotsAgainst         553 non-null    int64  
 9   goalsAgainst         553 non-null    int64  
 10  savePctg             553 non-null    float64
 11  shutouts             553 non-null    int64  
 12  opponentAbbrev       553 non-null    object 
 13  pim                  553 non-null    int64  
 14  toiInSeconds         553 non-null    int64  
 15  playerId             553 non-null    int

In [95]:
df_goalies_performance.head(1).T

Unnamed: 0,0
gameId,2024020259
teamAbbrev,WPG
homeRoadFlag,R
gameDate,2024-11-14
goals,0
assists,0
gamesStarted,1
decision,L
shotsAgainst,28
goalsAgainst,3


## Database preparation

With data preparation complete, we look into creating SQLite database to store the data.

In [96]:
import sqlite3

In [97]:
con = sqlite3.connect("hnl.db")

In [98]:
cur = con.cursor()

In [99]:
# declare table names
teams_info_table = "teams_info"
players_roster_table = "players_info"
skaters_game_logs_table = "skaters_game_logs"
goalies_game_logs_table = "goalies_game_logs"

In [None]:
# cur.execute(f"DROP TABLE IF EXISTS skaters_game_logs")

<sqlite3.Cursor at 0x18fc934f540>

In [147]:
cur.execute(f"""
  CREATE TABLE IF NOT EXISTS {teams_info_table} (
    id INTEGER PRIMARY KEY,
    abbr VARCHAR(16),
    name VARCHAR(32),
    logo VARCHAR(64),
    conference VARCHAR(16),
    division VARCHAR(16)
  )
""")

<sqlite3.Cursor at 0x18fc934f540>

In [148]:
cur.execute(f"""
  CREATE TABLE IF NOT EXISTS {players_roster_table} (
    id INTEGER PRIMARY KEY,
    headshot VARCHAR(64),
    firstName VARCHAR(64),
    lastName VARCHAR(64),
    sweaterNumber INTEGER,
    positionCode VARCHAR(8),
    shootsCatches VARCHAR(8),
    heightInCentimeters INTEGER,
    weightInKilograms INTEGER,
    birthDate VARCHAR(64),
    birthCountry VARCHAR(8)
  )      
""")

<sqlite3.Cursor at 0x18fc934f540>

In [149]:
cur.execute(f"""CREATE TABLE IF NOT EXISTS {skaters_game_logs_table} (
    gameId INTEGER,
    teamAbbrev VARCHAR(8),
    homeRoadFlag VARCHAR(8),
    gameDate STRING,
    goals INTEGER,        
    assists INTEGER,         
    points INTEGER,       
    plusMinus INTEGER,         
    powerPlayGoals INTEGER,         
    powerPlayPoints INTEGER,         
    gameWinningGoals INTEGER,         
    otGoals INTEGER,
    shots INTEGER,      
    shifts INTEGER,         
    shorthandedGoals INTEGER,         
    shorthandedPoints INTEGER,         
    opponentAbbrev VARCHAR(8),     
    pim INTEGER,
    toiInSeconds INTEGER,        
    playerId INTEGER,         
    seasonId INTEGER,
    PRIMARY KEY (gameId, playerId, seasonId)
    FOREIGN KEY (teamAbbrev) REFERENCES {teams_info_table}(abbr)
    FOREIGN KEY (playerId) REFERENCES {players_roster_table}(id)
  )
""")

<sqlite3.Cursor at 0x18fc934f540>

In [150]:
cur.execute(f"""CREATE TABLE IF NOT EXISTS {goalies_game_logs_table} (
    gameId INTEGER, 
    teamAbbrev VARCHAR(8),
    homeRoadFlag VARCHAR(8),
    gameDate STRING,
    goals INTEGER, 
    assists INTEGER,  
    gamesStarted INTEGER,  
    decision VARCHAR(8), 
    shotsAgainst INTEGER,  
    goalsAgainst INTEGER,  
    savePctg FLOAT,
    shutouts INTEGER,  
    opponentAbbrev VARCHAR(8),
    pim INTEGER,
    toiInSeconds INTEGER, 
    playerId INTEGER,  
    seasonId INTEGER,
    PRIMARY KEY (gameId, playerId, seasonId)
    FOREIGN KEY (teamAbbrev) REFERENCES {teams_info_table}(abbr)
    FOREIGN KEY (playerId) REFERENCES {players_roster_table}(id)
  )
""")

<sqlite3.Cursor at 0x18fc934f540>

In [151]:
res = cur.execute("SELECT name FROM sqlite_master")
res.fetchall()

[('teams_info',),
 ('goalies_game_logs',),
 ('sqlite_autoindex_goalies_game_logs_1',),
 ('players_info',),
 ('skaters_game_logs',),
 ('sqlite_autoindex_skaters_game_logs_1',)]

In [152]:
for row in df_team_info.itertuples(index=False, name="Team"):
    query = f"""
        INSERT OR REPLACE INTO {teams_info_table} (id, abbr, name, logo, conference, division)
        VALUES (?, ?, ?, ?, ?, ?)
    """
    cur.execute(query, (row.franchise_id, row.abbr, row.name, row.logo, row.conference, row.division))

In [153]:
res = cur.execute(f"PRAGMA table_info({teams_info_table})")

res.fetchall()

[(0, 'id', 'INTEGER', 0, None, 1),
 (1, 'abbr', 'VARCHAR(16)', 0, None, 0),
 (2, 'name', 'VARCHAR(32)', 0, None, 0),
 (3, 'logo', 'VARCHAR(64)', 0, None, 0),
 (4, 'conference', 'VARCHAR(16)', 0, None, 0),
 (5, 'division', 'VARCHAR(16)', 0, None, 0)]

In [154]:
res = cur.execute(f"""
        SELECT * FROM {teams_info_table} LIMIT 10
""")

res.fetchall()

[(1,
  'MTL',
  'Montréal Canadiens',
  'https://assets.nhle.com/logos/nhl/svg/MTL_light.svg',
  'Eastern',
  'Atlantic'),
 (5,
  'TOR',
  'Toronto Maple Leafs',
  'https://assets.nhle.com/logos/nhl/svg/TOR_light.svg',
  'Eastern',
  'Atlantic'),
 (6,
  'BOS',
  'Boston Bruins',
  'https://assets.nhle.com/logos/nhl/svg/BOS_light.svg',
  'Eastern',
  'Atlantic'),
 (10,
  'NYR',
  'New York Rangers',
  'https://assets.nhle.com/logos/nhl/svg/NYR_light.svg',
  'Eastern',
  'Metropolitan'),
 (11,
  'CHI',
  'Chicago Blackhawks',
  'https://assets.nhle.com/logos/nhl/svg/CHI_light.svg',
  'Western',
  'Central'),
 (12,
  'DET',
  'Detroit Red Wings',
  'https://assets.nhle.com/logos/nhl/svg/DET_light.svg',
  'Eastern',
  'Atlantic'),
 (14,
  'LAK',
  'Los Angeles Kings',
  'https://assets.nhle.com/logos/nhl/svg/LAK_light.svg',
  'Western',
  'Pacific'),
 (15,
  'DAL',
  'Dallas Stars',
  'https://assets.nhle.com/logos/nhl/svg/DAL_light.svg',
  'Western',
  'Central'),
 (16,
  'PHI',
  'Philad

In [155]:
for row in df_team_roster_combined.itertuples(index=False, name="Roster"):
    query = f"""
        INSERT OR REPLACE INTO {players_roster_table} (id, headshot, firstName, lastName, sweaterNumber, positionCode, shootsCatches, heightInCentimeters, weightInKilograms, birthDate, birthCountry)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """
    cur.execute(query, (row.id, row.headshot, row.firstName, row.lastName, row.sweaterNumber, row.positionCode, row.shootsCatches, row.heightInCentimeters, row.weightInKilograms, row.birthDate, row.birthCountry))

In [156]:
res = cur.execute(f"PRAGMA table_info({players_roster_table})")

res.fetchall()

[(0, 'id', 'INTEGER', 0, None, 1),
 (1, 'headshot', 'VARCHAR(64)', 0, None, 0),
 (2, 'firstName', 'VARCHAR(64)', 0, None, 0),
 (3, 'lastName', 'VARCHAR(64)', 0, None, 0),
 (4, 'sweaterNumber', 'INTEGER', 0, None, 0),
 (5, 'positionCode', 'VARCHAR(8)', 0, None, 0),
 (6, 'shootsCatches', 'VARCHAR(8)', 0, None, 0),
 (7, 'heightInCentimeters', 'INTEGER', 0, None, 0),
 (8, 'weightInKilograms', 'INTEGER', 0, None, 0),
 (9, 'birthDate', 'VARCHAR(64)', 0, None, 0),
 (10, 'birthCountry', 'VARCHAR(8)', 0, None, 0)]

In [157]:
res = cur.execute(f"SELECT * FROM {players_roster_table} LIMIT 10")

res.fetchall()

[(8470594,
  'https://assets.nhle.com/mugs/nhl/20242025/MIN/8470594.png',
  'Marc-Andre',
  'Fleury',
  29,
  'G',
  'L',
  188,
  84,
  '1984-11-28',
  'CAN'),
 (8470600,
  'https://assets.nhle.com/mugs/nhl/20242025/STL/8470600.png',
  'Ryan',
  'Suter',
  22,
  'D',
  'L',
  185,
  91,
  '1985-01-21',
  'USA'),
 (8470613,
  'https://assets.nhle.com/mugs/nhl/20242025/CAR/8470613.png',
  'Brent',
  'Burns',
  8,
  'D',
  'R',
  196,
  103,
  '1985-03-09',
  'CAN'),
 (8470621,
  'https://assets.nhle.com/mugs/nhl/20242025/EDM/8470621.png',
  'Corey',
  'Perry',
  90,
  'R',
  'R',
  191,
  94,
  '1985-05-16',
  'CAN'),
 (8471214,
  'https://assets.nhle.com/mugs/nhl/20242025/WSH/8471214.png',
  'Alex',
  'Ovechkin',
  8,
  'L',
  'R',
  191,
  108,
  '1985-09-17',
  'RUS'),
 (8471215,
  'https://assets.nhle.com/mugs/nhl/20242025/PIT/8471215.png',
  'Evgeni',
  'Malkin',
  71,
  'C',
  'L',
  196,
  95,
  '1986-07-31',
  'RUS'),
 (8471675,
  'https://assets.nhle.com/mugs/nhl/20242025/PIT/8

In [158]:
for row in df_game_logs_skaters.itertuples(index=False, name="GameLog"):
    query = f"""
        INSERT OR REPLACE INTO {skaters_game_logs_table} (gameId, teamAbbrev, homeRoadFlag, gameDate, goals, assists, points, plusMinus, powerPlayGoals, powerPlayPoints, gameWinningGoals, otGoals, shots, shifts, shorthandedGoals, shorthandedPoints, opponentAbbrev, pim, toiInSeconds, playerId, seasonId)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """
    cur.execute(query, (row.gameId, row.teamAbbrev, row.homeRoadFlag, row.gameDate, row.goals, row.assists, row.points, row.plusMinus, row.powerPlayGoals, row.powerPlayPoints, row.gameWinningGoals, row.otGoals, row.shots, row.shifts, row.shorthandedGoals, row.shorthandedPoints, row.opponentAbbrev, row.pim, row.toiInSeconds, row.playerId, row.seasonId))

In [159]:
res = cur.execute(f"PRAGMA table_info({skaters_game_logs_table})")

res.fetchall()

[(0, 'gameId', 'INTEGER', 0, None, 1),
 (1, 'teamAbbrev', 'VARCHAR(8)', 0, None, 0),
 (2, 'homeRoadFlag', 'VARCHAR(8)', 0, None, 0),
 (3, 'gameDate', 'STRING', 0, None, 0),
 (4, 'goals', 'INTEGER', 0, None, 0),
 (5, 'assists', 'INTEGER', 0, None, 0),
 (6, 'points', 'INTEGER', 0, None, 0),
 (7, 'plusMinus', 'INTEGER', 0, None, 0),
 (8, 'powerPlayGoals', 'INTEGER', 0, None, 0),
 (9, 'powerPlayPoints', 'INTEGER', 0, None, 0),
 (10, 'gameWinningGoals', 'INTEGER', 0, None, 0),
 (11, 'otGoals', 'INTEGER', 0, None, 0),
 (12, 'shots', 'INTEGER', 0, None, 0),
 (13, 'shifts', 'INTEGER', 0, None, 0),
 (14, 'shorthandedGoals', 'INTEGER', 0, None, 0),
 (15, 'shorthandedPoints', 'INTEGER', 0, None, 0),
 (16, 'opponentAbbrev', 'VARCHAR(8)', 0, None, 0),
 (17, 'pim', 'INTEGER', 0, None, 0),
 (18, 'toiInSeconds', 'INTEGER', 0, None, 0),
 (19, 'playerId', 'INTEGER', 0, None, 2),
 (20, 'seasonId', 'INTEGER', 0, None, 3)]

In [160]:
res = cur.execute(f"SELECT * FROM {skaters_game_logs_table} LIMIT 10")

res.fetchall()

[(2024020259,
  'WPG',
  'R',
  '2024-11-14',
  0,
  0,
  0,
  1,
  0,
  0,
  0,
  0,
  2,
  18,
  0,
  0,
  'TBL',
  0,
  850,
  8478891,
  20242025),
 (2024020247,
  'WPG',
  'R',
  '2024-11-12',
  0,
  0,
  0,
  -1,
  0,
  0,
  0,
  0,
  1,
  21,
  0,
  0,
  'NYR',
  0,
  851,
  8478891,
  20242025),
 (2024020230,
  'WPG',
  'H',
  '2024-11-09',
  0,
  0,
  0,
  -1,
  0,
  0,
  0,
  0,
  0,
  18,
  0,
  0,
  'DAL',
  0,
  866,
  8478891,
  20242025),
 (2024020217,
  'WPG',
  'H',
  '2024-11-07',
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  21,
  0,
  0,
  'COL',
  0,
  1098,
  8478891,
  20242025),
 (2024020201,
  'WPG',
  'H',
  '2024-11-05',
  0,
  1,
  1,
  2,
  0,
  0,
  0,
  0,
  1,
  20,
  0,
  0,
  'UTA',
  0,
  849,
  8478891,
  20242025),
 (2024020186,
  'WPG',
  'H',
  '2024-11-03',
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  2,
  19,
  0,
  0,
  'TBL',
  0,
  884,
  8478891,
  20242025),
 (2024020171,
  'WPG',
  'R',
  '2024-11-01',
  1,
  0,
  1,
  1,
  0,
  0,
  1,
  0,

In [162]:
for row in df_game_logs_goalies.itertuples(index=False, name="GameLog"):
    query = f"""
        INSERT OR REPLACE INTO {goalies_game_logs_table} (gameId, teamAbbrev, homeRoadFlag, gameDate, goals, assists, gamesStarted, decision, shotsAgainst, goalsAgainst, savePctg, shutouts, opponentAbbrev, pim, toiInSeconds, playerId, seasonId)
        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """
    cur.execute(query, (row.gameId, row.teamAbbrev, row.homeRoadFlag, row.gameDate, row.goals, row.assists, row.gamesStarted, row.decision, row.shotsAgainst, row.goalsAgainst, row.savePctg, row.shutouts, row.opponentAbbrev, row.pim, row.toiInSeconds, row.playerId, row.seasonId))

In [163]:
res = cur.execute(f"PRAGMA table_info({goalies_game_logs_table})")

res.fetchall()

[(0, 'gameId', 'INTEGER', 0, None, 1),
 (1, 'teamAbbrev', 'VARCHAR(8)', 0, None, 0),
 (2, 'homeRoadFlag', 'VARCHAR(8)', 0, None, 0),
 (3, 'gameDate', 'STRING', 0, None, 0),
 (4, 'goals', 'INTEGER', 0, None, 0),
 (5, 'assists', 'INTEGER', 0, None, 0),
 (6, 'gamesStarted', 'INTEGER', 0, None, 0),
 (7, 'decision', 'VARCHAR(8)', 0, None, 0),
 (8, 'shotsAgainst', 'INTEGER', 0, None, 0),
 (9, 'goalsAgainst', 'INTEGER', 0, None, 0),
 (10, 'savePctg', 'FLOAT', 0, None, 0),
 (11, 'shutouts', 'INTEGER', 0, None, 0),
 (12, 'opponentAbbrev', 'VARCHAR(8)', 0, None, 0),
 (13, 'pim', 'INTEGER', 0, None, 0),
 (14, 'toiInSeconds', 'INTEGER', 0, None, 0),
 (15, 'playerId', 'INTEGER', 0, None, 2),
 (16, 'seasonId', 'INTEGER', 0, None, 3)]

In [164]:
res = cur.execute(f"SELECT * FROM {goalies_game_logs_table} LIMIT 10")

res.fetchall()

[(2024020259,
  'WPG',
  'R',
  '2024-11-14',
  0,
  0,
  1,
  'L',
  28,
  3,
  0.892857,
  0,
  'TBL',
  0,
  3478,
  8477480,
  20242025),
 (2024020171,
  'WPG',
  'R',
  '2024-11-01',
  0,
  0,
  1,
  'W',
  22,
  2,
  0.909091,
  0,
  'CBJ',
  0,
  3600,
  8477480,
  20242025),
 (2024020129,
  'WPG',
  'R',
  '2024-10-26',
  0,
  0,
  1,
  'W',
  29,
  3,
  0.896552,
  0,
  'CGY',
  0,
  3600,
  8477480,
  20242025),
 (2024020088,
  'WPG',
  'H',
  '2024-10-20',
  0,
  0,
  1,
  'W',
  42,
  3,
  0.928571,
  0,
  'PIT',
  0,
  3593,
  8477480,
  20242025),
 (2024020247,
  'WPG',
  'R',
  '2024-11-12',
  0,
  0,
  1,
  'W',
  36,
  3,
  0.916667,
  0,
  'NYR',
  0,
  3599,
  8476945,
  20242025),
 (2024020230,
  'WPG',
  'H',
  '2024-11-09',
  0,
  0,
  1,
  'W',
  33,
  1,
  0.969697,
  0,
  'DAL',
  0,
  3600,
  8476945,
  20242025),
 (2024020217,
  'WPG',
  'H',
  '2024-11-07',
  0,
  0,
  1,
  'W',
  35,
  0,
  1.0,
  1,
  'COL',
  0,
  3585,
  8476945,
  20242025),
 (202402020

Commit changes and close database

In [165]:

con.commit()

In [166]:
con.close()