In [1]:
import pandas as pd
from io import StringIO
import os
from datetime import datetime, timedelta
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Union, Set
from enum import Enum
import pickle
import pandas as pd
import statsmodels.api as sm
import numpy as np

from nst_scraper import nst_on_ice_scraper
from db_utils import update_player_db, check_last_update, append_player_ids, get_player_full_name
from team_utils import get_most_recent_game_id

from dotenv import load_dotenv

pd.set_option('display.max_columns', None)
np.set_printoptions(legacy='1.25')

In [2]:
player_stats_df = nst_on_ice_scraper(fromseason=20242025, thruseason=20242025, startdate='', enddate='')
player_stats_df.head()

Unnamed: 0,player,team,position,gp,toi,goals,total_assists,first_assists,second_assists,total_points,ipp,shots,sh%,ixg,icf,iff,iscf,ihdcf,rush_attempts,rebounds_created,pim,total_penalties,minor,major,misconduct,penalties_drawn,giveaways,takeaways,hits,hits_taken,shots_blocked,faceoffs_won,faceoffs_lost,faceoffs_%
0,Ryan Suter,STL,D,25,491.1,1,3,1,2,4,17.39,22,4.55,0.92,58,34,15,2,3,5,14,7,7,0,0,1,25,5,13,23,29,0,0,-
1,Brent Burns,CAR,D,24,394.783333,1,5,2,3,6,33.33,38,2.63,1.49,102,63,28,2,4,7,4,2,2,0,0,2,31,11,3,16,18,0,0,-
2,Corey Perry,EDM,R,24,242.766667,4,0,0,0,4,50.0,21,19.05,2.35,44,29,27,12,0,1,16,5,3,2,0,8,6,3,8,17,5,2,2,50.00
3,Alex Ovechkin,WSH,L,18,215.25,9,7,4,3,16,72.73,34,26.47,3.27,69,49,40,16,0,5,6,3,3,0,0,0,7,4,41,8,3,0,0,-
4,Evgeni Malkin,PIT,C,26,384.183333,3,10,7,3,13,76.47,34,8.82,5.06,69,45,40,22,0,9,10,5,5,0,0,5,28,6,12,24,18,80,107,42.78


In [3]:
goalie_stats_df = nst_on_ice_scraper(fromseason=20242025, thruseason=20242025, startdate='', enddate='', pos='g')
goalie_stats_df.head()

Unnamed: 0,player,team,gp,toi,shots_against,saves,goals_against,sv%,gaa,gsaa,xg_against,hd_shots_against,hd_saves,hd_goals_against,hdsv%,hdgaa,hdgsaa,md_shots_against,md_saves,md_goals_against,mdsv%,mdgaa,mdgsaa,ld_shots_against,ld_saves,ld_goals_against,ldsv%,ldgaa,ldgsaa,rush_attempts_against,rebound_attempts_against,avg._shot_distance,avg._goal_distance
0,Marc-Andre Fleury,MIN,6,299.833333,138,128,10,0.928,2.0,2.17,8.5,22,18,4,0.818,0.8,-0.08,30,25,5,0.833,1.0,-1.42,77,77,0,1.0,0.0,2.54,9,17,39.61,30.4
1,Jonathan Quick,NYR,7,319.583333,166,158,8,0.952,1.5,6.64,13.86,44,42,2,0.955,0.38,5.84,46,41,5,0.891,0.94,0.49,72,71,1,0.986,0.19,1.37,13,31,33.71,23.38
2,James Reimer,"ANA, BUF",4,195.983333,89,81,8,0.91,2.45,-0.15,8.7,11,6,5,0.545,1.53,-3.04,41,38,3,0.927,0.92,1.89,36,36,0,1.0,0.0,1.19,6,17,32.03,16.88
3,Semyon Varlamov,NYI,10,499.45,211,192,19,0.91,2.28,-0.39,19.67,61,53,8,0.869,0.96,2.87,50,44,6,0.88,0.72,-0.03,82,77,5,0.939,0.6,-2.3,16,37,35.72,20.05
4,Jacob Markstrom,N.J,19,915.4,406,368,38,0.906,2.49,-2.18,33.69,90,70,20,0.778,1.31,-3.97,114,102,12,0.895,0.79,1.61,171,165,6,0.965,0.39,-0.36,55,75,40.54,21.32


In [4]:
class Position(Enum):
    C = 'C'
    L = 'L'
    R = 'R'
    D = 'D'
    G = 'G'
    
    @property
    def category(self) -> str:
        if self in {Position.C, Position.L, Position.R}:
            return 'F'
        elif self == Position.D:
            return 'D'
        elif self == Position.G:
            return 'G'
    
    def __str__(self) -> str:
        return self.value

In [5]:
@dataclass
class Player:
    name: str
    team: str
    position: Position
    player_id: Optional[int] = None

    def __str__(self) -> str:
        """
        Returns a string representation of the player.
        """
        return f"{self.name} ({self.position}) - {self.team}"

    def to_dict(self) -> Dict[str, Optional[str]]:
        """
        Converts the Player instance into a dictionary.
        
        Returns:
            Dict[str, Optional[str]]: A dictionary representation of the player.
        """
        return {
            'player_id': self.player_id,
            'name': self.name,
            'team': self.team,
            'position': self.position.value
        }

In [6]:
@dataclass
class Lineup:
    name: str
    forwards: List[Optional[Player]] = field(default_factory=lambda: [None] * 12)
    defense: List[Optional[Player]] = field(default_factory=lambda: [None] * 6)
    goalies: List[Optional[Player]] = field(default_factory=lambda: [None] * 2)
    
    ALLOWED_FORWARD_CATEGORIES = {'F'}
    ALLOWED_DEFENSE_CATEGORY = 'D'
    ALLOWED_GOALIE_CATEGORY = 'G'
    
    def __post_init__(self):
        self.validate_lineup_size()
    
    def validate_lineup_size(self):
        total_players = sum(player is not None for player in self.forwards + self.defense + self.goalies)
        if total_players > 20:
            raise ValueError(f"Total number of players ({total_players}) exceeds the hard limit of 20.")
    
    def add_player(
        self,
        category: str,
        player: Player,
        slot: int,
        allowed_categories: Union[str, Set[str]],
        max_slots: int
    ):
        """
        Adds a player to the specified category and slot after validating their position category.
        Ensures that the total number of players does not exceed 20.
        """
        if isinstance(allowed_categories, str):
            allowed_categories = {allowed_categories}
        elif isinstance(allowed_categories, set):
            allowed_categories = allowed_categories
        else:
            raise TypeError("allowed_categories must be a string or a set of strings.")
        
        if player.position.category not in allowed_categories:
            raise ValueError(
                f"Cannot add player '{player.name}' with position '{player.position.value}' "
                f"to {category}. Allowed categories: {', '.join(allowed_categories)}."
            )
        
        if not 0 <= slot < max_slots:
            raise IndexError(f"{category.capitalize()} slot must be between 0 and {max_slots - 1}.")
        
        current_category = getattr(self, category)
        if current_category[slot]:
            existing_player = current_category[slot].name
            print(f"Warning: Slot {slot + 1} in {category} is already occupied by '{existing_player}'. Overwriting.")
        
        # Check total players before adding
        total_players = sum(player is not None for player in self.forwards + self.defense + self.goalies)
        if current_category[slot] is None and total_players >= 20:
            raise ValueError("Cannot add more players. The lineup has reached the hard limit of 20 players.")
        
        current_category[slot] = player
        setattr(self, category, current_category)
        print(f"Added player '{player.name}' to {category.capitalize()} slot {slot + 1}.")
    
    def add_forward(self, player: Player, slot: int):
        self.add_player(
            category='forwards',
            player=player,
            slot=slot,
            allowed_categories=self.ALLOWED_FORWARD_CATEGORIES,
            max_slots=len(self.forwards)
        )
    
    def add_defense(self, player: Player, slot: int):
        self.add_player(
            category='defense',
            player=player,
            slot=slot,
            allowed_categories={self.ALLOWED_DEFENSE_CATEGORY},
            max_slots=len(self.defense)
        )
    
    def set_goalie(self, player: Player, slot: int):
        self.add_player(
            category='goalies',
            player=player,
            slot=slot,
            allowed_categories={self.ALLOWED_GOALIE_CATEGORY},
            max_slots=len(self.goalies)
        )
    
    def adjust_slots(self, category: str, delta: int):
        """
        Adjusts the number of slots in the specified category by delta.
        Allows ±1 adjustment only.
        
        Args:
            category (str): The category to adjust ('forwards' or 'defense').
            delta (int): The change in number of slots (+1 or -1).
        """
        if category not in {'forwards', 'defense'}:
            raise ValueError("Can only adjust 'forwards' or 'defense' categories.")
        if delta not in {-1, 1}:
            raise ValueError("Delta must be either +1 or -1.")
        
        current_slots = getattr(self, category)
        new_slot_count = len(current_slots) + delta
        
        if category == 'forwards':
            if not (11 <= new_slot_count <= 13):
                raise ValueError("Number of forwards can only vary by ±1 from the default of 12.")
        elif category == 'defense':
            if not (5 <= new_slot_count <= 7):
                raise ValueError("Number of defensemen can only vary by ±1 from the default of 6.")
        
        if delta == 1:
            current_slots.append(None)
        elif delta == -1:
            removed_player = current_slots.pop()
            if removed_player:
                print(f"Removed player '{removed_player.name}' from {category}.")
        
        setattr(self, category, current_slots)
        print(f"Adjusted {category} slots to {len(getattr(self, category))}.")
        self.validate_lineup_size()
    
    def display_lineup(self):
        """
        Prints the current lineup in a structured format.
        """
        print(f"Lineup: {self.name}\n")
        
        for category, title in [('forwards', 'Forwards'), ('defense', 'Defense'), ('goalies', 'Goalies')]:
            print(f"{title}:")
            for idx, player in enumerate(getattr(self, category), start=1):
                player_info = str(player) if player else 'Empty'
                print(f"  Slot {idx}: {player_info}")
            print()
    
    def to_dataframe(self) -> pd.DataFrame:
        """
        Converts the lineup into a pandas DataFrame.
        Conditionally includes player attributes if they are present.
        """
        data = []
        for category, pos in [('forwards', 'f'), ('defense', 'd'), ('goalies', 'g')]:
            for idx, player in enumerate(getattr(self, category), start=1):
                player_dict = {
                    'Position': f"{pos}{idx}",
                    'Player': player.name if player else 'Empty'
                }
                # Conditionally add 'player_id' if it exists
                if player and player.player_id is not None:
                    player_dict['Player ID'] = player.player_id
                data.append(player_dict)
        
        df = pd.DataFrame(data)
        
        # Optionally, remove columns where all values are NaN
        df.dropna(axis=1, how='all', inplace=True)
        
        return df
    
    def to_transposed_dataframe(self) -> pd.DataFrame:
        """
        Transposes the lineup DataFrame so that each column represents a position-slot combination
        and the row contains the corresponding player names.
        Conditionally includes additional player attributes if they are present.
        """
        df = self.to_dataframe()
        
        # Initialize dictionaries to hold player names and optional IDs
        player_data = {}
        id_data = {}
        
        for _, row in df.iterrows():
            pos = row['Position']
            player_name = row['Player']
            player_data[pos] = player_name
            
            # Handle 'Player ID' if it exists
            if 'Player ID' in row and pd.notna(row['Player ID']):
                id_data[f"{pos}_ID"] = row['Player ID']
        
        # Combine player names and IDs into a single dictionary
        transposed_data = {**player_data, **id_data}
        
        # Create the transposed DataFrame with a single row
        transposed_df = pd.DataFrame([transposed_data])
        
        return transposed_df

In [7]:
# Creating Player instances from the player_stats_df DataFrame
player_list = []
for _, row in player_stats_df.iterrows():
    try:
        # !! Take only first position if multiple are listed, this is a hack to deal with the fact that some players have multiple positions listed
        position = row['position'].split(',')[0].strip()
        position_enum = Position(position)  # Convert abbreviation to Position Enum
    except ValueError:
        print(f"Invalid position '{row['position']}' for player '{row['player']}'. Skipping.")
        continue
    
    player = Player(
        name=row['player'],
        team=row['team'],
        position=position_enum
        # player_id is not set initially
    )
    player_list.append(player)

# Convert player list to DataFrame to verify
pd.DataFrame([{
    'name': player.name,
    'team': player.team, 
    'position': player.position.value
} for player in player_list]).head()

Unnamed: 0,name,team,position
0,Ryan Suter,STL,D
1,Brent Burns,CAR,D
2,Corey Perry,EDM,R
3,Alex Ovechkin,WSH,L
4,Evgeni Malkin,PIT,C


In [8]:
goalie_stats_df.head()

Unnamed: 0,player,team,gp,toi,shots_against,saves,goals_against,sv%,gaa,gsaa,xg_against,hd_shots_against,hd_saves,hd_goals_against,hdsv%,hdgaa,hdgsaa,md_shots_against,md_saves,md_goals_against,mdsv%,mdgaa,mdgsaa,ld_shots_against,ld_saves,ld_goals_against,ldsv%,ldgaa,ldgsaa,rush_attempts_against,rebound_attempts_against,avg._shot_distance,avg._goal_distance
0,Marc-Andre Fleury,MIN,6,299.833333,138,128,10,0.928,2.0,2.17,8.5,22,18,4,0.818,0.8,-0.08,30,25,5,0.833,1.0,-1.42,77,77,0,1.0,0.0,2.54,9,17,39.61,30.4
1,Jonathan Quick,NYR,7,319.583333,166,158,8,0.952,1.5,6.64,13.86,44,42,2,0.955,0.38,5.84,46,41,5,0.891,0.94,0.49,72,71,1,0.986,0.19,1.37,13,31,33.71,23.38
2,James Reimer,"ANA, BUF",4,195.983333,89,81,8,0.91,2.45,-0.15,8.7,11,6,5,0.545,1.53,-3.04,41,38,3,0.927,0.92,1.89,36,36,0,1.0,0.0,1.19,6,17,32.03,16.88
3,Semyon Varlamov,NYI,10,499.45,211,192,19,0.91,2.28,-0.39,19.67,61,53,8,0.869,0.96,2.87,50,44,6,0.88,0.72,-0.03,82,77,5,0.939,0.6,-2.3,16,37,35.72,20.05
4,Jacob Markstrom,N.J,19,915.4,406,368,38,0.906,2.49,-2.18,33.69,90,70,20,0.778,1.31,-3.97,114,102,12,0.895,0.79,1.61,171,165,6,0.965,0.39,-0.36,55,75,40.54,21.32


In [9]:
# Creating Player instances from the goalie_stats_df DataFrame
goalie_list = []
for _, row in goalie_stats_df.iterrows():
    player = Player(
        name=row['player'],
        team=row['team'],
        position=Position.G  # Set position to 'G' for goalies
        # player_id is not set initially
    )
    goalie_list.append(player)

# Convert goalie list to DataFrame to verify
pd.DataFrame([{
    'name': player.name,
    'team': player.team, 
    'position': player.position.value
} for player in goalie_list]).head()

Unnamed: 0,name,team,position
0,Marc-Andre Fleury,MIN,G
1,Jonathan Quick,NYR,G
2,James Reimer,"ANA, BUF",G
3,Semyon Varlamov,NYI,G
4,Jacob Markstrom,N.J,G


In [10]:
today_datetime= datetime.now()
yesterday_datetime = today_datetime - timedelta(days=1, hours=6) # UTC offset
yesterday = yesterday_datetime.strftime('%Y-%m-%d')
yesterday

'2024-12-02'

In [11]:
# Load environment variables from .env file
load_dotenv()

# Construct the database configuration dictionary
db_config = {
    'dbname': os.getenv('DB_NAME'),
    'user': os.getenv('DB_USER'),
    'password': os.getenv('DB_PASSWORD'),
    'host': os.getenv('DB_HOST'),
    'port': os.getenv('DB_PORT')
}

In [12]:
# Check the last update time of the players database
last_update = check_last_update(db_config)

INFO:db_utils:Database connection established.
INFO:db_utils:Last database update was on: 2024-12-02
INFO:db_utils:Database connection closed.


In [13]:
# Convert last_update to datetime
last_update_dt = datetime.strptime(last_update, '%Y-%m-%d')
today_dt = datetime.strptime(today_datetime.strftime('%Y-%m-%d'), '%Y-%m-%d')
yesterday_dt = datetime.strptime(yesterday, '%Y-%m-%d')

# Only update if last update was before yesterday
if last_update_dt not in [today_dt, yesterday_dt]:
    # Update the player database from last update to yesterday
    update_player_db(last_update, yesterday, db_config, skip_existing=True)
else:
    print(f"No need to update the player database. Last update was on: {last_update}")


No need to update the player database. Last update was on: 2024-12-02


In [14]:
def add_player_to_lineup(lineup: Lineup, player: Player, category: str):
    """
    Adds a player to the lineup in the specified category, handling potential errors.
    
    Args:
        lineup (Lineup): The lineup object.
        player (Player): The player to add.
        category (str): The category ('forwards', 'defense', 'goalies').
    """
    try:
        if category == 'forwards':
            slot = next(i for i, p in enumerate(lineup.forwards) if p is None)
            lineup.add_forward(player, slot)
        elif category == 'defense':
            slot = next(i for i, p in enumerate(lineup.defense) if p is None)
            lineup.add_defense(player, slot)
        elif category == 'goalies':
            slot = next(i for i, p in enumerate(lineup.goalies) if p is None)
            lineup.set_goalie(player, slot)
        else:
            print(f"Unknown category '{category}'.")
    except StopIteration:
        print(f"No available slots to add player '{player.name}' in category '{category}'.")
    except ValueError as ve:
        print(ve)
    except IndexError as ie:
        print(ie)

In [15]:
# TODO this function just creates a lineup from the player_list and goalie_list
def create_lineup(team) -> Lineup:
    """
    Creates and displays a lineup consisting of players from the specified team.
    
    Args:
        team (str): The team name to filter players.
    """
    # Creating two lineup objects
    lineup1 = Lineup("Lineup 1")
    
    # Adding forwards to lineup1
    forward_count = 0
    for player in player_list:
        if player.team == team:
            try:
                lineup1.add_forward(player, forward_count)
                forward_count += 1
                if forward_count >= 12:
                    break
            except ValueError as e:
                print(f"Skipping player '{player.name}': {e}")
            except IndexError as e:
                print(f"Skipping player '{player.name}': {e}")
        else:
            continue  # Proceed to the next player if not in the specified team
    
    # Adding defense to lineup1
    defense_count = 0
    for player in player_list:
        if player.team == team:
            try:
                lineup1.add_defense(player, defense_count)
                defense_count += 1
                if defense_count >= 6:
                    break
            except ValueError as e:
                print(f"Skipping player '{player.name}': {e}")
            except IndexError as e:
                print(f"Skipping player '{player.name}': {e}")
        else:
            continue  # Proceed to the next player if not in the specified team
    
    # Adding goalies to lineup1
    goalie_count = 0
    for goalie in goalie_list:
        if goalie.team != team:
            continue  # Proceed to the next goalie if not in the specified team
        if goalie_count >= 2:
            print("Maximum of two goalies have been assigned.")
            break
        try:
            lineup1.set_goalie(goalie, goalie_count)
            goalie_count += 1
        except ValueError as e:
            print(f"Skipping goalie '{goalie.name}': {e}")
        except IndexError as e:
            print(f"Skipping goalie '{goalie.name}': {e}")
    
    # Display the lineup
    # lineup1.display_lineup()
    return lineup1

my_lineup = create_lineup('TOR')

Added player 'Ryan Reaves' to Forwards slot 1.
Added player 'Max Pacioretty' to Forwards slot 2.
Added player 'John Tavares' to Forwards slot 3.
Skipping player 'Oliver Ekman-Larsson': Cannot add player 'Oliver Ekman-Larsson' with position 'D' to forwards. Allowed categories: F.
Skipping player 'Chris Tanev': Cannot add player 'Chris Tanev' with position 'D' to forwards. Allowed categories: F.
Skipping player 'Jani Hakanpää': Cannot add player 'Jani Hakanpää' with position 'D' to forwards. Allowed categories: F.
Skipping player 'Morgan Rielly': Cannot add player 'Morgan Rielly' with position 'D' to forwards. Allowed categories: F.
Skipping player 'Jake McCabe': Cannot add player 'Jake McCabe' with position 'D' to forwards. Allowed categories: F.
Added player 'Max Domi' to Forwards slot 4.
Added player 'William Nylander' to Forwards slot 5.
Added player 'Mitch Marner' to Forwards slot 6.
Added player 'Steven Lorentz' to Forwards slot 7.
Skipping player 'Philippe Myers': Cannot add playe

In [16]:
def create_flexible_lineup(team: str) -> Lineup:
    """
    Creates a flexible lineup for the specified team, allowing for ±1 forward or defense spot.
    
    Args:
        team (str): The team name to filter players.
    
    Returns:
        Lineup: The configured lineup object.
    """
    lineup = Lineup(f"Flexible Lineup for {team}")
    
    # Example logic to adjust slots based on team strategy
    # Here, we simply allow flexibility; implement specific rules as needed
    allow_extra_forward = True  # Example condition
    allow_extra_defense = False  # Example condition
    
    if allow_extra_forward:
        lineup.adjust_slots('forwards', 1)  # Increase forwards to 13
    elif allow_extra_defense:
        lineup.adjust_slots('defense', 1)  # Increase defense to 7
    
    # Add players to the lineup
    forward_count = 0
    defense_count = 0
    goalie_count = 0
    
    for player in player_list:
        if player.team == team:
            if player.position.category == 'F' and forward_count < len(lineup.forwards):
                try:
                    lineup.add_forward(player, forward_count)
                    forward_count += 1
                except ValueError:
                    continue
            elif player.position.category == 'D' and defense_count < len(lineup.defense):
                try:
                    lineup.add_defense(player, defense_count)
                    defense_count += 1
                except ValueError:
                    continue
    
    for goalie in goalie_list:
        if goalie.team == team and goalie_count < len(lineup.goalies):
            try:
                lineup.set_goalie(goalie, goalie_count)
                goalie_count += 1
            except ValueError:
                continue
    
    return lineup

# Create and display a flexible lineup for 'TOR'
flexible_lineup = create_flexible_lineup('TOR')
flexible_lineup.display_lineup()

Adjusted forwards slots to 13.
Added player 'Ryan Reaves' to Forwards slot 1.
Added player 'Max Pacioretty' to Forwards slot 2.
Added player 'John Tavares' to Forwards slot 3.
Added player 'Oliver Ekman-Larsson' to Defense slot 1.
Added player 'Chris Tanev' to Defense slot 2.
Added player 'Jani Hakanpää' to Defense slot 3.
Added player 'Morgan Rielly' to Defense slot 4.
Added player 'Jake McCabe' to Defense slot 5.
Added player 'Max Domi' to Forwards slot 4.
Added player 'William Nylander' to Forwards slot 5.
Added player 'Mitch Marner' to Forwards slot 6.
Added player 'Steven Lorentz' to Forwards slot 7.
Added player 'Philippe Myers' to Defense slot 6.
Added player 'Auston Matthews' to Forwards slot 8.
Added player 'Alex Nylander' to Forwards slot 9.
Added player 'David Kampf' to Forwards slot 10.
Added player 'Connor Dewar' to Forwards slot 11.
Added player 'Pontus Holmberg' to Forwards slot 12.
Added player 'Nicholas Robertson' to Forwards slot 13.
Added player 'Anthony Stolarz' to 

In [17]:
my_lineup.to_dataframe()

Unnamed: 0,Position,Player
0,f1,Ryan Reaves
1,f2,Max Pacioretty
2,f3,John Tavares
3,f4,Max Domi
4,f5,William Nylander
5,f6,Mitch Marner
6,f7,Steven Lorentz
7,f8,Auston Matthews
8,f9,Alex Nylander
9,f10,David Kampf


In [18]:
# Convert the lineup to a transposed DataFrame
transposed_lineup_df = my_lineup.to_transposed_dataframe()

# Display the transposed DataFrame
transposed_lineup_df

Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,d1,d2,d3,d4,d5,d6,g1,g2
0,Ryan Reaves,Max Pacioretty,John Tavares,Max Domi,William Nylander,Mitch Marner,Steven Lorentz,Auston Matthews,Alex Nylander,David Kampf,Connor Dewar,Pontus Holmberg,Oliver Ekman-Larsson,Chris Tanev,Jani Hakanpää,Morgan Rielly,Jake McCabe,Philippe Myers,Anthony Stolarz,Joseph Woll


In [19]:
def assign_player_ids_to_lineup(transposed_lineup_df, db_config):
    """
    Processes the transposed_lineup_df DataFrame to append player IDs next to each player's name.
    
    This function performs the following steps:
        1. Extracts all unique player names from the lineup DataFrame.
        2. Creates Player instances for each player name.
        3. Uses the append_player_ids function to assign player IDs to each Player object.
        4. Inserts new columns into the DataFrame with the corresponding player IDs.
    
    Args:
        transposed_lineup_df (pd.DataFrame): The transposed lineup DataFrame with player names.
        db_config (dict): Database configuration with keys: dbname, user, password, host, port.
    
    Returns:
        pd.DataFrame: The updated DataFrame with player IDs added.
    """
    import pandas as pd

    # Ensure the DataFrame has only one row
    if transposed_lineup_df.shape[0] != 1:
        raise ValueError("transposed_lineup_df should contain exactly one row representing the lineup.")

    # Extract player names from the DataFrame
    player_columns = transposed_lineup_df.columns
    player_names = transposed_lineup_df.iloc[0].tolist()

    # Create Player instances
    player_list = []
    for name in player_names:
        if name != 'Empty':
            player = Player(name=name, team=None, position=None)  # Team and position can be set if available
            player_list.append(player)

    # Append player IDs using the existing function
    append_player_ids(player_list, db_config)

    # Create a mapping from player name to player_id
    name_to_id = {player.name: player.player_id for player in player_list if player.player_id is not None}

    # Initialize a copy of the DataFrame to avoid modifying the original
    updated_df = transposed_lineup_df.copy()

    # Insert player_id columns adjacent to each player name column
    for col in player_columns:
        player_name = updated_df.at[0, col]
        if player_name != 'Empty':
            player_id = name_to_id.get(player_name, None)
            id_column = f"{col}_id"
            updated_df[id_column] = player_id
        else:
            id_column = f"{col}_id"
            updated_df[id_column] = None  # Assign None for empty slots

    return updated_df

In [20]:
# Assuming you have already created `transposed_lineup_df` and `db_config`
updated_lineup_df = assign_player_ids_to_lineup(transposed_lineup_df, db_config)

# Display the updated DataFrame
updated_lineup_df

INFO:db_utils:Database connection established.
INFO:db_utils:Assigned player_id 8471817 to Ryan Reaves.
INFO:db_utils:Assigned player_id 8474157 to Max Pacioretty.
INFO:db_utils:Assigned player_id 8475166 to John Tavares.
INFO:db_utils:Assigned player_id 8477503 to Max Domi.
INFO:db_utils:Assigned player_id 8477939 to William Nylander.
INFO:db_utils:Assigned player_id 8478483 to Mitch Marner.
INFO:db_utils:Assigned player_id 8478904 to Steven Lorentz.
INFO:db_utils:Assigned player_id 8479318 to Auston Matthews.
INFO:db_utils:Assigned player_id 8479423 to Alex Nylander.
INFO:db_utils:Assigned player_id 8480144 to David Kampf.
INFO:db_utils:Assigned player_id 8480980 to Connor Dewar.
INFO:db_utils:Assigned player_id 8480995 to Pontus Holmberg.
INFO:db_utils:Assigned player_id 8475171 to Oliver Ekman-Larsson.
INFO:db_utils:Assigned player_id 8475690 to Chris Tanev.
INFO:db_utils:Assigned player_id 8475825 to Jani Hakanpää.
INFO:db_utils:Assigned player_id 8476853 to Morgan Rielly.
INFO:db

Unnamed: 0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,d1,d2,d3,d4,d5,d6,g1,g2,f1_id,f2_id,f3_id,f4_id,f5_id,f6_id,f7_id,f8_id,f9_id,f10_id,f11_id,f12_id,d1_id,d2_id,d3_id,d4_id,d5_id,d6_id,g1_id,g2_id
0,Ryan Reaves,Max Pacioretty,John Tavares,Max Domi,William Nylander,Mitch Marner,Steven Lorentz,Auston Matthews,Alex Nylander,David Kampf,Connor Dewar,Pontus Holmberg,Oliver Ekman-Larsson,Chris Tanev,Jani Hakanpää,Morgan Rielly,Jake McCabe,Philippe Myers,Anthony Stolarz,Joseph Woll,8471817,8474157,8475166,8477503,8477939,8478483,8478904,8479318,8479423,8480144,8480980,8480995,8475171,8475690,8475825,8476853,8476931,8479026,8476932,8479361


In [21]:
# player_stats_df = nst_on_ice_scraper(fromseason=20242025, thruseason=20242025, startdate='', enddate='')
player_stats_df.head() 

Unnamed: 0,player,team,position,gp,toi,goals,total_assists,first_assists,second_assists,total_points,ipp,shots,sh%,ixg,icf,iff,iscf,ihdcf,rush_attempts,rebounds_created,pim,total_penalties,minor,major,misconduct,penalties_drawn,giveaways,takeaways,hits,hits_taken,shots_blocked,faceoffs_won,faceoffs_lost,faceoffs_%
0,Ryan Suter,STL,D,25,491.1,1,3,1,2,4,17.39,22,4.55,0.92,58,34,15,2,3,5,14,7,7,0,0,1,25,5,13,23,29,0,0,-
1,Brent Burns,CAR,D,24,394.783333,1,5,2,3,6,33.33,38,2.63,1.49,102,63,28,2,4,7,4,2,2,0,0,2,31,11,3,16,18,0,0,-
2,Corey Perry,EDM,R,24,242.766667,4,0,0,0,4,50.0,21,19.05,2.35,44,29,27,12,0,1,16,5,3,2,0,8,6,3,8,17,5,2,2,50.00
3,Alex Ovechkin,WSH,L,18,215.25,9,7,4,3,16,72.73,34,26.47,3.27,69,49,40,16,0,5,6,3,3,0,0,0,7,4,41,8,3,0,0,-
4,Evgeni Malkin,PIT,C,26,384.183333,3,10,7,3,13,76.47,34,8.82,5.06,69,45,40,22,0,9,10,5,5,0,0,5,28,6,12,24,18,80,107,42.78


In [22]:
def get_skater_stats(lineup: Lineup, player_stats_df: pd.DataFrame) -> pd.DataFrame:
    """
    Gets stats for players in the lineup, maintaining lineup order.
    
    Args:
        lineup (Lineup): The lineup containing players
        player_stats_df (pd.DataFrame): DataFrame with player statistics
        
    Returns:
        pd.DataFrame: Player statistics ordered according to lineup positions
    """
    # Create ordered list of players (forwards then defense)
    players = []
    # Add forwards in order
    players.extend([p for p in lineup.forwards if p])
    # Add defense in order 
    players.extend([p for p in lineup.defense if p])
    
    # Create ordered list of player names
    player_names = [player.name for player in players]
    
    # Filter stats and reorder to match lineup order
    stats_df = player_stats_df[player_stats_df['player'].isin(player_names)]
    
    # Create ordering dictionary mapping names to their position in lineup
    name_to_position = {name: idx for idx, name in enumerate(player_names)}
    
    # Sort stats DataFrame based on lineup order and reset index
    return stats_df.assign(
        lineup_order=stats_df['player'].map(name_to_position)
    ).sort_values('lineup_order').drop('lineup_order', axis=1).reset_index(drop=True)

# lineup_player_stats = get_skater_stats(my_lineup, player_stats_df)

In [23]:
def get_goalie_stats(lineup: Lineup, goalie_stats_df: pd.DataFrame) -> pd.DataFrame:
    """
    Gets stats for goalies in the lineup, maintaining lineup order.
    
    Args:
        lineup (Lineup): The lineup containing goalies
        goalie_stats_df (pd.DataFrame): DataFrame with goalie statistics
        
    Returns:
        pd.DataFrame: Goalie statistics ordered according to lineup positions
    """
    # Create ordered list of goalies
    goalies = [goalie for goalie in lineup.goalies if goalie]
    goalie_names = [goalie.name for goalie in goalies]
    
    # Filter stats and reorder to match lineup order
    stats_df = goalie_stats_df[goalie_stats_df['player'].isin(goalie_names)]
    
    # Create ordering dictionary mapping names to their position in lineup
    name_to_position = {name: idx for idx, name in enumerate(goalie_names)}
    
    # Sort stats DataFrame based on lineup order and reset index
    return stats_df.assign(
        lineup_order=stats_df['player'].map(name_to_position)
    ).sort_values('lineup_order').drop('lineup_order', axis=1).reset_index(drop=True)

In [24]:
lineup_goalie_stats = get_goalie_stats(my_lineup, goalie_stats_df)
lineup_goalie_stats

Unnamed: 0,player,team,gp,toi,shots_against,saves,goals_against,sv%,gaa,gsaa,xg_against,hd_shots_against,hd_saves,hd_goals_against,hdsv%,hdgaa,hdgsaa,md_shots_against,md_saves,md_goals_against,mdsv%,mdgaa,mdgsaa,ld_shots_against,ld_saves,ld_goals_against,ldsv%,ldgaa,ldgsaa,rush_attempts_against,rebound_attempts_against,avg._shot_distance,avg._goal_distance
0,Anthony Stolarz,TOR,14,640.55,308,294,14,0.955,1.31,13.17,26.33,65,56,9,0.862,0.84,2.58,77,72,5,0.935,0.47,4.19,146,146,0,1.0,0.0,4.81,38,62,39.19,16.93
1,Joseph Woll,TOR,8,391.166667,171,160,11,0.936,1.69,4.09,12.67,31,28,3,0.903,0.46,2.52,45,43,2,0.956,0.31,3.37,85,79,6,0.929,0.92,-3.2,15,29,40.03,31.55


In [25]:
yesterday

'2024-12-02'

In [26]:
get_most_recent_game_id('TOR', '2024-11-21')

2024020301

In [27]:
from game_utils import get_game_boxscore, display_boxscore

temp_data = get_game_boxscore(2024020274)

In [28]:
def extract_team_lineup(team: str, reference_date: Optional[str] = None) -> Lineup:
    """
    Extracts the most recent lineup for the specified team based on the latest game data.

    This function performs the following steps:
        1. Determines the reference date (defaults to yesterday if not provided).
        2. Retrieves the most recent game ID for the team using `get_most_recent_game_id`.
        3. Fetches the game boxscore data using `get_game_boxscore`.
        4. Processes the boxscore to obtain skaters and goalies using `display_boxscore`.
        5. Constructs and returns a `Lineup` object populated with the team's players.

    Args:
        team (str): The three-letter team code (e.g., 'TOR').
        reference_date (Optional[str]): The reference date in 'YYYY-MM-DD' format. Defaults to yesterday's date.

    Returns:
        Lineup: A `Lineup` object containing the team's players from the most recent game.

    Raises:
        ValueError: If no recent game is found for the team or if the team is not part of the retrieved game.
    """
    # Step 1: Determine the reference date
    if reference_date is None:
        today_datetime = datetime.now()
        yesterday_datetime = today_datetime - timedelta(days=1, hours=6)  # Adjust for UTC offset if necessary
        reference_date = yesterday_datetime.strftime('%Y-%m-%d')

    # Step 2: Retrieve the most recent game ID for the team
    game_id = get_most_recent_game_id(team, reference_date)
    if game_id is None:
        raise ValueError(f"No recent game found for team '{team}' before {reference_date}.")

    # Print the game_id
    print(f"Game ID: {game_id}")

    # Step 3: Fetch the game boxscore data
    game_data = get_game_boxscore(game_id)

    # Step 4: Process the boxscore to obtain skaters and goalies
    away_skaters, away_goalies, home_skaters, home_goalies = display_boxscore(game_data)

    # Extract team abbrevs to determine if the team is home or away
    away_team_code = game_data.get('awayTeam', {}).get('abbrev')
    home_team_code = game_data.get('homeTeam', {}).get('abbrev')

    if not away_team_code or not home_team_code:
        raise ValueError("Team abbreviations not found in game data.")

    if team.upper() == away_team_code.upper():
        team_side = 'Away'
        skaters = away_skaters
        goalies = away_goalies
    elif team.upper() == home_team_code.upper():
        team_side = 'Home'
        skaters = home_skaters
        goalies = home_goalies
    else:
        raise ValueError(f"Team '{team}' not found in game ID {game_id}.")

    # Step 5: Construct the Lineup object
    lineup = Lineup(name=f"{team.upper()} Lineup from Game {game_id}")

    # Add Skaters to the Lineup
    for _, skater in skaters.iterrows():
        try:
            position_enum = Position(skater['position'])  # Convert to Position Enum
        except ValueError:
            print(f"Invalid position '{skater['position']}' for player '{skater['name']}'. Skipping.")
            continue

        player = Player(
            player_id=skater['playerId'],
            name=get_player_full_name(skater['playerId'], db_config),
            team=team.upper(),
            position=position_enum
        )

        # Add player to the appropriate category in the lineup
        if player.position.category == 'F':
            try:
                empty_slot = next(i for i, p in enumerate(lineup.forwards) if p is None)
                lineup.add_forward(player, empty_slot)
            except StopIteration:
                print(f"No available forward slot to add player '{player.name}'.")
        elif player.position.category == 'D':
            try:
                empty_slot = next(i for i, p in enumerate(lineup.defense) if p is None)
                lineup.add_defense(player, empty_slot)
            except StopIteration:
                print(f"No available defense slot to add player '{player.name}'.")
        else:
            print(f"Player '{player.name}' has an unrecognized category '{player.position.category}'. Skipping.")

    # Add Goalies to the Lineup
    for _, goalie in goalies.iterrows():
        player = Player(
            player_id=goalie['playerId'],
            name=get_player_full_name(goalie['playerId'], db_config),
            team=team.upper(),
            position=Position.G
        )
        try:
            empty_slot = next(i for i, p in enumerate(lineup.goalies) if p is None)
            lineup.set_goalie(player, empty_slot)
        except StopIteration:
            print(f"No available goalie slot to add player '{player.name}'.")

    return lineup

In [29]:
chicago_lineup = extract_team_lineup('CHI', '2024-11-22')

Game ID: 2024020313


INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Ryan Donato' for player_id 8477987.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Craig Smith' for player_id 8475225.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Jason Dickinson' for player_id 8477450.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.


Added player 'Ryan Donato' to Forwards slot 1.
Added player 'Craig Smith' to Forwards slot 2.
Added player 'Jason Dickinson' to Forwards slot 3.


INFO:db_utils:Retrieved full name 'Nick Foligno' for player_id 8473422.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Joey Anderson' for player_id 8479315.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Tyler Bertuzzi' for player_id 8477479.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.


Added player 'Nick Foligno' to Forwards slot 4.
Added player 'Joey Anderson' to Forwards slot 5.
Added player 'Tyler Bertuzzi' to Forwards slot 6.


INFO:db_utils:Retrieved full name 'Taylor Hall' for player_id 8475791.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Lukas Reichel' for player_id 8482117.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Patrick Maroon' for player_id 8474034.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.


Added player 'Taylor Hall' to Forwards slot 7.
Added player 'Lukas Reichel' to Forwards slot 8.
Added player 'Patrick Maroon' to Forwards slot 9.


INFO:db_utils:Retrieved full name 'Teuvo Teravainen' for player_id 8476882.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Ilya Mikheyev' for player_id 8481624.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Connor Bedard' for player_id 8484144.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Connor Murphy' for player_id 8476473.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Alec Martinez' for player_id 8474166.
INFO:db_utils:Database connection closed.


Added player 'Teuvo Teravainen' to Forwards slot 10.
Added player 'Ilya Mikheyev' to Forwards slot 11.
Added player 'Connor Bedard' to Forwards slot 12.
Added player 'Connor Murphy' to Defense slot 1.
Added player 'Alec Martinez' to Defense slot 2.


INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Nolan Allan' for player_id 8482700.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Wyatt Kaiser' for player_id 8482176.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Alex Vlasic' for player_id 8481568.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'TJ Brodie' for player_id 8474673.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Petr Mrazek' for player_id 8475852.
INFO:db_utils:Database connection closed.


Added player 'Nolan Allan' to Defense slot 3.
Added player 'Wyatt Kaiser' to Defense slot 4.
Added player 'Alex Vlasic' to Defense slot 5.
Added player 'TJ Brodie' to Defense slot 6.
Added player 'Petr Mrazek' to Goalies slot 1.


INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Arvid Soderblom' for player_id 8482821.
INFO:db_utils:Database connection closed.


Added player 'Arvid Soderblom' to Goalies slot 2.


In [30]:
lineup_skater_stats = get_skater_stats(chicago_lineup, player_stats_df)
lineup_skater_stats

Unnamed: 0,player,team,position,gp,toi,goals,total_assists,first_assists,second_assists,total_points,ipp,shots,sh%,ixg,icf,iff,iscf,ihdcf,rush_attempts,rebounds_created,pim,total_penalties,minor,major,misconduct,penalties_drawn,giveaways,takeaways,hits,hits_taken,shots_blocked,faceoffs_won,faceoffs_lost,faceoffs_%
0,Ryan Donato,CHI,C,23,290.8,8,1,1,0,9,90.0,50,16.0,5.32,88,68,42,20,3,6,6,3,3,0,0,2,6,5,42,21,5,52,57,47.71
1,Craig Smith,CHI,C,23,235.25,5,4,4,0,9,75.0,44,11.36,2.78,66,54,30,12,3,8,16,4,3,0,1,3,13,6,18,11,14,2,7,22.22
2,Jason Dickinson,CHI,C,25,310.2,4,2,2,0,6,66.67,19,21.05,2.14,57,31,29,12,1,5,12,6,6,0,0,3,10,9,33,25,18,131,142,47.99
3,Nick Foligno,CHI,L,25,314.916667,4,2,1,1,6,60.0,25,16.0,3.03,45,36,33,19,1,8,9,3,2,1,0,7,10,3,72,18,9,90,86,51.14
4,Joey Anderson,CHI,R,12,150.833333,0,1,1,0,1,33.33,11,0.0,1.07,20,13,10,5,1,2,0,0,0,0,0,1,3,6,8,16,7,0,0,-
5,Tyler Bertuzzi,CHI,L,25,296.85,1,3,3,0,4,40.0,18,5.56,2.73,42,33,21,13,2,1,6,3,3,0,0,6,10,7,17,8,10,0,0,-
6,Taylor Hall,CHI,L,24,317.733333,4,3,1,2,7,70.0,43,9.3,3.37,82,57,41,14,4,2,8,4,4,0,0,4,15,13,15,29,18,11,20,35.48
7,Lukas Reichel,CHI,L,21,219.0,3,5,4,1,8,88.89,31,9.68,3.11,45,37,28,14,3,3,4,2,2,0,0,1,3,5,7,12,12,38,81,31.93
8,Patrick Maroon,CHI,L,22,228.716667,1,7,5,2,8,80.0,18,5.56,2.17,34,30,17,10,4,2,18,6,4,2,0,3,9,2,40,13,2,9,9,50.00
9,Teuvo Teravainen,CHI,C,25,318.95,2,2,1,1,4,44.44,35,5.71,1.84,56,43,23,3,4,3,4,2,2,0,0,2,22,5,2,5,9,5,2,71.43


In [31]:
lineup_goalie_stats = get_goalie_stats(chicago_lineup, goalie_stats_df)
lineup_goalie_stats

Unnamed: 0,player,team,gp,toi,shots_against,saves,goals_against,sv%,gaa,gsaa,xg_against,hd_shots_against,hd_saves,hd_goals_against,hdsv%,hdgaa,hdgsaa,md_shots_against,md_saves,md_goals_against,mdsv%,mdgaa,mdgsaa,ld_shots_against,ld_saves,ld_goals_against,ldsv%,ldgaa,ldgsaa,rush_attempts_against,rebound_attempts_against,avg._shot_distance,avg._goal_distance
0,Petr Mrazek,CHI,18,878.583333,428,393,35,0.918,2.39,2.76,38.98,120,98,22,0.817,1.5,-0.62,90,83,7,0.922,0.48,3.74,191,185,6,0.969,0.41,0.3,38,104,35.91,19.49
1,Arvid Soderblom,CHI,7,350.483333,170,157,13,0.924,2.23,2.0,14.55,43,36,7,0.837,1.2,0.66,43,39,4,0.907,0.68,1.13,72,70,2,0.972,0.34,0.37,16,34,37.35,21.92


In [32]:
def calculate_min_percentage(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculates the average time on ice per game as a percentage of total game time (60 minutes).
    
    Args:
        df (pd.DataFrame): DataFrame containing 'toi' and 'gp' columns
        
    Returns:
        pd.DataFrame: Original DataFrame with new 'min%' column added
    """
    df_copy = df.copy()
    df_copy['min%'] = (df_copy['toi'] / df_copy['gp'] / 300 * 100).round(2)
    return df_copy

# # Apply the function to lineup_player_stats
# lineup_player_stats = calculate_min_percentage(lineup_player_stats)
# lineup_player_stats

In [33]:
def sum_min_percentage(df: pd.DataFrame) -> float:
    """
    Calculates the sum of the 'min%' column in the given DataFrame.
    
    Args:
        df (pd.DataFrame): DataFrame containing the 'min%' column.
        
    Returns:
        float: The total sum of the 'min%' values.
        
    Raises:
        KeyError: If the 'min%' column is not present in the DataFrame.
    """
    if 'min%' not in df.columns:
        raise KeyError("The DataFrame does not contain a 'min%' column.")
    
    total_min_percentage = df['min%'].sum()
    return total_min_percentage

# Example usage:
# total_min_percentage = sum_min_percentage(lineup_player_stats)
# print(f"Total min%: {total_min_percentage}")

In [34]:
def calculate_adj_min(df: pd.DataFrame, total_min_percentage: float) -> None:
    """
    Calculates the adjusted minimum (adj_min) for each player based on their min% and the total min%.
    
    The formula used is:
        adj_min = (min% / total_min_percentage) * 300
    
    Args:
        df (pd.DataFrame): DataFrame containing the 'min%' column.
        total_min_percentage (float): The total sum of the 'min%' column.
        
    Raises:
        KeyError: If the 'min%' column is not present in the DataFrame.
        ValueError: If total_min_percentage is not a positive number.
    """
    if 'min%' not in df.columns:
        raise KeyError("The DataFrame does not contain a 'min%' column.")
    
    if total_min_percentage <= 0:
        raise ValueError("total_min_percentage must be a positive number.")
    
    # Calculate and append the 'adj_min' column
    df['adj_min'] = ((df['min%'] / total_min_percentage) * 300).round(2)

# # Example usage:
# calculate_adj_min(lineup_player_stats, total_min_percentage)
# print(lineup_player_stats[['player', 'min%', 'adj_min']])

In [35]:
def calculate_ixg_per_60(df: pd.DataFrame) -> None:
    """
    Calculates the expected goals per 60 minutes (ixg_per_60) for each player.

    The formula used is:
        ixg_per_60 = (ixg / toi) * 60

    Args:
        df (pd.DataFrame): DataFrame containing the 'ixg' and 'toi' columns.
        
    Raises:
        KeyError: If either 'ixg' or 'toi' columns are not present in the DataFrame.
        ValueError: If 'toi' contains zero or negative values.
    """
    # Check if required columns exist
    required_columns = {'ixg', 'toi'}
    missing_columns = required_columns - set(df.columns)
    if missing_columns:
        raise KeyError(f"The DataFrame is missing the following required columns: {', '.join(missing_columns)}")
    
    # Check for non-positive 'toi' values to avoid division by zero or negative scaling
    if (df['toi'] <= 0).any():
        raise ValueError("The 'toi' column contains zero or negative values, which are invalid for calculating ixg_per_60.")
    
    # Calculate ixg_per_60 and append as a new column
    df['ixg_per_60'] = (df['ixg'] / df['toi']) * 60
    
    # Optionally, round the values for better readability
    df['ixg_per_60'] = df['ixg_per_60'].round(2)

# Example usage:
# calculate_ixg_per_60(lineup_player_stats)
# print(lineup_player_stats[['player', 'ixg', 'toi', 'ixg_per_60']])

In [36]:
# Define file paths to load the model and transformer
model_filepath = 'models/polynomial_model_degree_1.pkl'
poly_filepath = 'models/polynomial_features_degree_1.pkl'

# Load the regression model
with open(model_filepath, 'rb') as model_file:
    loaded_model = pickle.load(model_file)
print(f"Model loaded from {model_filepath}")

# Load the PolynomialFeatures transformer
with open(poly_filepath, 'rb') as poly_file:
    loaded_poly = pickle.load(poly_file)
print(f"PolynomialFeatures transformer loaded from {poly_filepath}")

# Function to make predictions using the loaded model and transformer
def predict_gpm(new_ixg60_value, model, poly, x_col='ixg60'):
    """
    Predicts 'gpm' using the loaded model and polynomial transformer.
    
    Parameters:
        new_ixg60_value (float): The new ixg60 value for prediction.
        model (RegressionResults): The loaded regression model.
        poly (PolynomialFeatures): The loaded polynomial features transformer.
        x_col (str): The name of the independent variable column. Defaults to 'ixg60'.
        
    Returns:
        predicted_gpm (float): The predicted gpm value.
    """
    # Prepare the input data
    X_new = np.array([[new_ixg60_value]])
    X_new_poly = poly.transform(X_new)
    X_new_poly_const = sm.add_constant(X_new_poly, has_constant='add')
    
    # Create DataFrame with appropriate column names
    feature_names = ['const'] + poly.get_feature_names_out([x_col]).tolist()
    new_data = pd.DataFrame(X_new_poly_const, columns=feature_names)
    
    # Predict
    predicted_gpm = model.predict(new_data)
    return predicted_gpm.iloc[0]

# Example: Predicting 'gpm' for a new ixg60 value
# new_ixg60_value = 50
# predicted_gpm = predict_gpm(new_ixg60_value, loaded_model, loaded_poly)
# print(f"Predicted GPM for ixg60={new_ixg60_value}: {predicted_gpm:.4f}")

Model loaded from models/polynomial_model_degree_1.pkl
PolynomialFeatures transformer loaded from models/polynomial_features_degree_1.pkl


In [37]:
# Function to predict GPM for each player and add it to the DataFrame
def add_gpm_to_lineup(lineup_df, model, poly):
    """
    Adds a 'gpm' column to the lineup_player_stats DataFrame using the predict_gpm function.
    
    Args:
        lineup_df (pd.DataFrame): DataFrame containing 'ixg_per_60' column.
        model: Loaded regression model.
        poly: Loaded PolynomialFeatures transformer.
        
    Returns:
        pd.DataFrame: Updated DataFrame with 'gpm' column added.
    """
    # Define a helper function to handle potential missing or invalid values
    def safe_predict(ixg_per_60):
        if pd.isna(ixg_per_60):
            return np.nan
        try:
            return predict_gpm(ixg_per_60, model, poly)
        except Exception as e:
            print(f"Error predicting GPM for ixg_per_60={ixg_per_60}: {e}")
            return np.nan
    
    # Apply the predict_gpm function to each 'ixg_per_60' value
    lineup_df['gpm'] = lineup_df['ixg_per_60'].apply(safe_predict)
    return lineup_df

# # Apply the function to add 'gpm' to your DataFrame
# lineup_player_stats = add_gpm_to_lineup(lineup_player_stats, loaded_model, loaded_poly)

# # Display the updated DataFrame with 'gpm'
# print(lineup_player_stats[['player', 'ixg_per_60', 'gpm']])

In [38]:
def calculate_x_goals(lineup_stats_df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculates 'x_goals' by multiplying 'gpm' with 'adj_min' for each player.
    
    Args:
        lineup_stats_df (pd.DataFrame): DataFrame containing 'gpm' and 'adj_min' columns.
        
    Returns:
        pd.DataFrame: DataFrame with the new 'x_goals' column added.
        
    Raises:
        KeyError: If 'gpm' or 'adj_min' columns are not present in the DataFrame.
        TypeError: If 'gpm' or 'adj_min' contain non-numeric data.
    """
    # Check if required columns exist
    required_columns = {'gpm', 'adj_min'}
    missing_columns = required_columns - set(lineup_stats_df.columns)
    if missing_columns:
        raise KeyError(f"The DataFrame is missing the following required columns: {', '.join(missing_columns)}")
    
    # Check if 'gpm' and 'adj_min' are numeric
    if not pd.api.types.is_numeric_dtype(lineup_stats_df['gpm']):
        raise TypeError("'gpm' column must be numeric.")
    if not pd.api.types.is_numeric_dtype(lineup_stats_df['adj_min']):
        raise TypeError("'adj_min' column must be numeric.")
    
    # Calculate 'x_goals'
    lineup_stats_df = lineup_stats_df.copy()
    lineup_stats_df['x_goals'] = lineup_stats_df['gpm'] * lineup_stats_df['adj_min']
    
    return lineup_stats_df

In [39]:
def calculate_xg_against_per_60(goalie_stats_df: pd.DataFrame) -> float:
    """
    Calculate the expected goals against (xg_against) per 60 minutes.

    Args:
        goalie_stats_df (pd.DataFrame): DataFrame containing goalie statistics with 'xg_against' and 'toi' columns.

    Returns:
        float: The league average xg_against per 60 minutes.

    Raises:
        KeyError: If required columns are missing from the DataFrame.
        ValueError: If no valid goalies with non-zero 'toi' are found.
    """
    # Ensure required columns are present
    required_columns = {'xg_against', 'toi'}
    missing_columns = required_columns - set(goalie_stats_df.columns)
    if missing_columns:
        raise KeyError(f"Missing columns in goalie_stats_df: {', '.join(missing_columns)}")

    # Drop rows with missing or zero 'toi' to avoid division errors
    valid_goalies = goalie_stats_df.dropna(subset=['xg_against', 'toi'])
    valid_goalies = valid_goalies[valid_goalies['toi'] > 0]

    if valid_goalies.empty:
        raise ValueError("No valid goalies with non-zero 'toi' found in goalie_stats_df.")

    # Calculate xg against per 60 minutes for each goalie
    valid_goalies['xg_against_per_60'] = (valid_goalies['xg_against'] / valid_goalies['toi']) * 60

    return valid_goalies

In [40]:
def calculate_league_avg_xg_against_per_60(goalie_stats_df: pd.DataFrame) -> float:
    """
    Calculate the league average expected goals against (xg_against) per 60 minutes.

    Args:
        goalie_stats_df (pd.DataFrame): DataFrame containing goalie statistics with 'xg_against' and 'toi' columns.

    Returns:
        float: The league average xg_against per 60 minutes.

    Raises:
        KeyError: If required columns are missing from the DataFrame.
        ValueError: If no valid goalies with non-zero 'toi' are found.
    """
    # Ensure required columns are present
    required_columns = {'xg_against', 'toi'}
    missing_columns = required_columns - set(goalie_stats_df.columns)
    if missing_columns:
        raise KeyError(f"Missing columns in goalie_stats_df: {', '.join(missing_columns)}")

    # Drop rows with missing or zero 'toi' to avoid division errors
    valid_goalies = goalie_stats_df.dropna(subset=['xg_against', 'toi'])
    valid_goalies = valid_goalies[valid_goalies['toi'] > 0]

    if valid_goalies.empty:
        raise ValueError("No valid goalies with non-zero 'toi' found in goalie_stats_df.")

    # Calculate xg against per 60 minutes for each goalie
    valid_goalies['xg_against_per_60'] = (valid_goalies['xg_against'] / valid_goalies['toi']) * 60

    # Calculate the league average
    league_avg_xg_against_per_60 = valid_goalies['xg_against_per_60'].mean()

    return league_avg_xg_against_per_60

In [41]:
def calculate_xg_against_adj_percentage(lineup_goalie_stats, goalie_avg_xg_against_per_60):
    """
    Calculate the expected goals against adjusted percentage.
    """
    lineup_goalie_stats['xg_against_adj_percentage'] = (lineup_goalie_stats['xg_against_per_60'] / goalie_avg_xg_against_per_60) * 100
    return lineup_goalie_stats

In [42]:
def process_team_lineup(input_date: str, team: str, db_config: Dict[str, str], model, poly) -> pd.DataFrame:
    """
    Processes the team's lineup for a given date and predicts GPM for each player.
    This function performs the following steps:
        1. Calls `nst_on_ice_scraper` for the input date minus one day.
        2. Extracts the team's lineup using `extract_team_lineup`.
        3. Retrieves skater statistics with `get_skater_stats`.
        4. Calculates `min%` using `calculate_min_percentage`.
        5. Computes `adj_min` using `calculate_adj_min`.
        6. Determines `ixg_per_60` using `calculate_ixg_per_60`.
        7. Predicts `gpm` using the loaded polynomial regression model.
        8. Calculcates x_goals by multiplying gpm by adj_min    
    Args:
        input_date (str): The reference date in 'YYYY-MM-DD' format.
        team (str): The three-letter team code (e.g., 'TOR').
        db_config (Dict[str, str]): Database configuration dictionary.
        model: Loaded regression model.
        poly: Loaded PolynomialFeatures transformer.
        
    Returns:
        pd.DataFrame: Updated DataFrame with calculated metrics and predicted GPM.
    
    Raises:
        ValueError: If any step in the data processing pipeline fails.
    """

    try:
        # Step 1: Calculate the date minus one day
        reference_datetime = datetime.strptime(input_date, '%Y-%m-%d') - timedelta(days=1)
        reference_date_str = reference_datetime.strftime('%Y-%m-%d')
        print(f"Fetching data for reference date: {reference_date_str}")

        # Step 2: Call nst_on_ice_scraper for player and goalie stats
        player_stats_df = nst_on_ice_scraper(
            fromseason=20242025,
            thruseason=20242025,
            startdate='',
            enddate=reference_date_str
        )
        goalie_stats_df = nst_on_ice_scraper(
            fromseason=20242025,
            thruseason=20242025,
            startdate='',
            enddate=reference_date_str,
            pos='g'
        )
        print("Player and goalie statistics fetched successfully.")

        # Step 3: Extract team lineup for the input date and team
        lineup = extract_team_lineup(team, input_date)
        print(f"Lineup extracted for team {team} on {input_date}.")

        # Step 4: Get skater statistics for the lineup
        lineup_skater_stats = get_skater_stats(lineup, player_stats_df)

        # Step 5: Calculate min%
        lineup_skater_stats = calculate_min_percentage(lineup_skater_stats)

        # Step 6: Calculate adj_min
        total_min_percentage = sum_min_percentage(lineup_skater_stats)
        calculate_adj_min(lineup_skater_stats, total_min_percentage)

        # Step 7: Calculate ixg_per_60
        calculate_ixg_per_60(lineup_skater_stats)

        # Step 8: Predict gpm using the polynomial regression model
        lineup_skater_stats = add_gpm_to_lineup(lineup_skater_stats, model, poly)

        # Step 9: Calculate x_goals
        lineup_skater_stats = calculate_x_goals(lineup_skater_stats)

        # Step 10: Get goalie stats for the lineup
        lineup_goalie_stats = get_goalie_stats(lineup, goalie_stats_df)

        # Step 11: Calculate xg_against_per_60
        lineup_goalie_stats = calculate_xg_against_per_60(lineup_goalie_stats)
        
        # Step 12: Calculate league average xg_against_per_60
        goalie_avg_xg_against_per_60 = calculate_league_avg_xg_against_per_60(goalie_stats_df)

        lineup_goalie_stats = calculate_xg_against_adj_percentage(lineup_goalie_stats, goalie_avg_xg_against_per_60)
        
        return lineup_skater_stats, lineup_goalie_stats

    except Exception as e:
        print(f"An error occurred during processing: {e}")
        raise

In [43]:
def load_models(model_filepath='models/polynomial_model_degree_1.pkl',
               poly_filepath='models/polynomial_features_degree_1.pkl'):
    """
    Loads the regression model and PolynomialFeatures transformer from the specified file paths.

    Args:
        model_filepath (str): Path to the saved regression model pickle file.
        poly_filepath (str): Path to the saved PolynomialFeatures transformer pickle file.

    Returns:
        tuple: A tuple containing the loaded regression model and PolynomialFeatures transformer.

    Raises:
        FileNotFoundError: If either of the specified files does not exist.
        pickle.UnpicklingError: If there is an error unpickling the files.
    """
    try:
        # Load the regression model
        with open(model_filepath, 'rb') as model_file:
            loaded_model = pickle.load(model_file)
        print(f"Model loaded from {model_filepath}")

        # Load the PolynomialFeatures transformer
        with open(poly_filepath, 'rb') as poly_file:
            loaded_poly = pickle.load(poly_file)
        print(f"PolynomialFeatures transformer loaded from {poly_filepath}")

        return loaded_model, loaded_poly

    except FileNotFoundError as fnf_error:
        print(f"Error: {fnf_error}")
        raise
    except pickle.UnpicklingError as pickle_error:
        print(f"Error loading pickle files: {pickle_error}")
        raise
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        raise

In [50]:
# Load the models using the new function
loaded_model, loaded_poly = load_models()

# Call the function with desired date and team
input_date = '2024-12-02'
team = 'TOR'
lineup_skater_stats, lineup_goalie_stats = process_team_lineup(input_date, team, db_config, loaded_model, loaded_poly)


Model loaded from models/polynomial_model_degree_1.pkl
PolynomialFeatures transformer loaded from models/polynomial_features_degree_1.pkl
Fetching data for reference date: 2024-12-01
Player and goalie statistics fetched successfully.
Game ID: 2024020378


INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Mitch Marner' for player_id 8478483.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Steven Lorentz' for player_id 8478904.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Matthew Knies' for player_id 8482720.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Connor Dewar' for player_id 8480980.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Pontus Holmberg' for player_id 8480995.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Auston Matthews' for player_id 8479318.
INFO:db_utils:Database connection closed.


Added player 'Mitch Marner' to Forwards slot 1.
Added player 'Steven Lorentz' to Forwards slot 2.
Added player 'Matthew Knies' to Forwards slot 3.
Added player 'Connor Dewar' to Forwards slot 4.
Added player 'Pontus Holmberg' to Forwards slot 5.
Added player 'Auston Matthews' to Forwards slot 6.


INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Fraser Minten' for player_id 8483489.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Nikita Grebenkin' for player_id 8483733.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'William Nylander' for player_id 8477939.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Nicholas Robertson' for player_id 8481582.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'John Tavares' for player_id 8475166.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Alex Nylander' for player_id 8479423.
INFO:db_utils:Database connection closed.
INFO:db_utils:Data

Added player 'Fraser Minten' to Forwards slot 7.
Added player 'Nikita Grebenkin' to Forwards slot 8.
Added player 'William Nylander' to Forwards slot 9.
Added player 'Nicholas Robertson' to Forwards slot 10.
Added player 'John Tavares' to Forwards slot 11.
Added player 'Alex Nylander' to Forwards slot 12.
Added player 'Simon Benoit' to Defense slot 1.


INFO:db_utils:Retrieved full name 'Chris Tanev' for player_id 8475690.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Jake McCabe' for player_id 8476931.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Conor Timmins' for player_id 8479982.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Morgan Rielly' for player_id 8476853.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Oliver Ekman-Larsson' for player_id 8475171.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.
INFO:db_utils:Retrieved full name 'Anthony Stolarz' for player_id 8476932.
INFO:db_utils:Database connection closed.
INFO:db_utils:Database connection established.


Added player 'Chris Tanev' to Defense slot 2.
Added player 'Jake McCabe' to Defense slot 3.
Added player 'Conor Timmins' to Defense slot 4.
Added player 'Morgan Rielly' to Defense slot 5.
Added player 'Oliver Ekman-Larsson' to Defense slot 6.
Added player 'Anthony Stolarz' to Goalies slot 1.


INFO:db_utils:Retrieved full name 'Joseph Woll' for player_id 8479361.
INFO:db_utils:Database connection closed.


Added player 'Joseph Woll' to Goalies slot 2.
Lineup extracted for team TOR on 2024-12-02.


In [51]:
lineup_skater_stats

Unnamed: 0,player,team,position,gp,toi,goals,total_assists,first_assists,second_assists,total_points,ipp,shots,sh%,ixg,icf,iff,iscf,ihdcf,rush_attempts,rebounds_created,pim,total_penalties,minor,major,misconduct,penalties_drawn,giveaways,takeaways,hits,hits_taken,shots_blocked,faceoffs_won,faceoffs_lost,faceoffs_%,min%,adj_min,ixg_per_60,gpm,x_goals
0,Mitch Marner,TOR,R,23,320.066667,3,9,5,4,12,80.00,35,8.57,3.16,65,41,37,10,3,9,2,1,1,0,0,3,21,11,10,9,9,1,3,25.00,4.64,18.15,0.59,0.009614,0.174491
1,Steven Lorentz,TOR,C,23,208.866667,2,1,0,1,3,50.00,18,11.11,2.82,45,32,19,10,1,3,0,0,0,0,0,3,5,6,55,21,18,5,6,45.45,3.03,11.85,0.81,0.01317,0.15607
2,Matthew Knies,TOR,L,21,283.083333,8,2,1,1,10,66.67,31,25.81,4.18,52,42,40,25,2,8,6,3,3,0,0,4,7,3,41,24,6,0,0,-,4.49,17.57,0.89,0.014464,0.254128
3,Connor Dewar,TOR,C,7,68.15,0,0,0,0,0,-,6,0.0,1.2,17,9,11,6,0,4,0,0,0,0,0,1,5,1,15,8,6,9,18,33.33,3.25,12.72,1.06,0.017212,0.218937
4,Pontus Holmberg,TOR,R,21,231.116667,0,3,1,2,3,75.00,14,0.0,1.34,38,23,19,8,1,1,10,5,5,0,0,5,8,3,8,28,9,43,39,52.44,3.67,14.36,0.35,0.005734,0.082338
5,Auston Matthews,TOR,C,14,191.616667,2,4,2,2,6,60.00,36,5.56,4.24,77,60,49,19,5,11,6,3,3,0,0,4,13,5,6,8,17,104,84,55.32,4.56,17.84,1.33,0.021577,0.384933
6,Fraser Minten,TOR,C,4,43.45,1,1,1,0,2,100.00,3,33.33,0.38,8,5,3,1,0,1,0,0,0,0,0,0,0,1,6,1,1,10,13,43.48,3.62,14.16,0.52,0.008482,0.120107
7,Nikita Grebenkin,TOR,R,4,34.583333,0,0,0,0,0,-,1,0.0,0.09,3,1,2,1,0,1,2,1,1,0,0,0,0,0,7,3,1,0,0,-,2.88,11.27,0.16,0.002662,0.030003
8,William Nylander,TOR,R,23,325.066667,8,4,3,1,12,66.67,59,13.56,5.11,106,74,46,22,6,6,8,3,3,0,0,2,12,5,1,17,6,32,29,52.46,4.71,18.43,0.94,0.015272,0.281464
9,Nicholas Robertson,TOR,L,20,212.083333,2,0,0,0,2,40.00,26,7.69,1.75,54,40,21,7,1,2,4,2,2,0,0,9,4,6,14,15,9,1,0,100.00,3.53,13.81,0.5,0.008159,0.112673


In [52]:
lineup_goalie_stats

Unnamed: 0,player,team,gp,toi,shots_against,saves,goals_against,sv%,gaa,gsaa,xg_against,hd_shots_against,hd_saves,hd_goals_against,hdsv%,hdgaa,hdgsaa,md_shots_against,md_saves,md_goals_against,mdsv%,mdgaa,mdgsaa,ld_shots_against,ld_saves,ld_goals_against,ldsv%,ldgaa,ldgsaa,rush_attempts_against,rebound_attempts_against,avg._shot_distance,avg._goal_distance,xg_against_per_60,xg_against_adj_percentage
0,Anthony Stolarz,TOR,13,592.8,282,269,13,0.954,1.32,11.93,23.5,56,48,8,0.857,0.81,2.02,73,68,5,0.932,0.51,3.71,134,134,0,1.0,0.0,4.43,34,56,39.54,17.77,2.378543,96.47843
1,Joseph Woll,TOR,8,391.166667,171,160,11,0.936,1.69,4.12,12.67,31,28,3,0.903,0.46,2.54,45,43,2,0.956,0.31,3.37,85,79,6,0.929,0.92,-3.19,15,29,40.03,31.55,1.943417,78.828876


In [160]:
def attach_xgoals_as_combined_object(lineup_stats_df: pd.DataFrame) -> dict:
    """
    Returns a dictionary containing the lineup stats DataFrame and the x_goals sum.
    
    Args:
        lineup_stats_df (pd.DataFrame): DataFrame containing player statistics, including 'x_goals'.
        
    Returns:
        dict: A dictionary with keys 'lineup_stats' and 'x_goals_sum'.
    """
    x_goals_sum = lineup_stats_df['x_goals'].sum()
    return {
        'lineup_stats': lineup_stats_df,
        'x_goals_sum': x_goals_sum
    }

# Usage Example:
final_lineup_stats_combined = attach_xgoals_as_combined_object(final_lineup_stats)
final_lineup_stats_combined['lineup_stats']
print(f"Total x_goals: {final_lineup_stats_combined['x_goals_sum']}")

Total x_goals: 2.4442916883969903


In [109]:
goalie_stats_df

Unnamed: 0,player,team,gp,toi,shots_against,saves,goals_against,sv%,gaa,gsaa,xg_against,hd_shots_against,hd_saves,hd_goals_against,hdsv%,hdgaa,hdgsaa,md_shots_against,md_saves,md_goals_against,mdsv%,mdgaa,mdgsaa,ld_shots_against,ld_saves,ld_goals_against,ldsv%,ldgaa,ldgsaa,rush_attempts_against,rebound_attempts_against,avg._shot_distance,avg._goal_distance
0,Marc-Andre Fleury,MIN,6,299.833333,138,128,10,0.928,2.00,2.17,8.50,22,18,4,0.818,0.80,-0.08,30,25,5,0.833,1.00,-1.42,77,77,0,1.000,0.00,2.54,9,17,39.61,30.40
1,Jonathan Quick,NYR,7,319.583333,166,158,8,0.952,1.50,6.64,13.86,44,42,2,0.955,0.38,5.84,46,41,5,0.891,0.94,0.49,72,71,1,0.986,0.19,1.37,13,31,33.71,23.38
2,James Reimer,"ANA, BUF",4,195.983333,89,81,8,0.910,2.45,-0.15,8.70,11,6,5,0.545,1.53,-3.04,41,38,3,0.927,0.92,1.89,36,36,0,1.000,0.00,1.19,6,17,32.03,16.88
3,Semyon Varlamov,NYI,10,499.450000,211,192,19,0.910,2.28,-0.39,19.67,61,53,8,0.869,0.96,2.87,50,44,6,0.880,0.72,-0.03,82,77,5,0.939,0.60,-2.30,16,37,35.72,20.05
4,Jacob Markstrom,N.J,19,915.400000,406,368,38,0.906,2.49,-2.18,33.69,90,70,20,0.778,1.31,-3.97,114,102,12,0.895,0.79,1.61,171,165,6,0.965,0.39,-0.36,55,75,40.54,21.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,Aleksei Kolosov,PHI,7,347.200000,145,130,15,0.897,2.59,-2.21,15.37,36,29,7,0.806,1.21,-0.59,42,36,6,0.857,1.04,-0.99,60,58,2,0.967,0.35,-0.02,16,41,33.49,18.07
75,Arvid Soderblom,CHI,7,350.483333,170,157,13,0.924,2.23,2.00,14.55,43,36,7,0.837,1.20,0.66,43,39,4,0.907,0.68,1.13,72,70,2,0.972,0.34,0.37,16,34,37.35,21.92
76,Jaxson Stauber,UTA,1,47.016667,22,22,0,1.000,0.00,1.94,2.39,6,6,0,1.000,0.00,1.07,5,5,0,1.000,0.00,0.60,10,10,0,1.000,0.00,0.33,3,5,36.64,-
77,Dennis Hildeby,TOR,2,88.400000,42,35,7,0.833,4.75,-3.29,4.29,8,5,3,0.625,2.04,-1.57,18,14,4,0.778,2.71,-1.85,16,16,0,1.000,0.00,0.53,4,12,31.90,21.57
