In [39]:
import pandas as pd
from io import StringIO
import os
from datetime import datetime, timedelta
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Union, Set, Any
from enum import Enum
import pickle
import pandas as pd
import statsmodels.api as sm
import numpy as np
import logging

from nst_scraper import nst_on_ice_scraper, nst_team_on_ice_scraper
from db_utils.nhl_db_utils import update_player_db, check_last_update, append_player_ids, get_player_full_name
from team_utils import get_most_recent_game_id, get_fullname_by_tricode
from game_utils import get_game_boxscore, display_boxscore
from pbp_utils import get_matchup_games
from db_utils.prop_odds_db_utils import get_player_shots_ou_odds

from dotenv import load_dotenv

pd.set_option('display.max_columns', None)
np.set_printoptions(legacy='1.25')

In [40]:
# player_stats_df = nst_on_ice_scraper(fromseason=20242025, thruseason=20242025, startdate='', enddate='', rate='y')
# player_stats_df.head()

In [41]:
# goalie_stats_df = nst_on_ice_scraper(fromseason=20242025, thruseason=20242025, startdate='', enddate='', pos='g', rate='y', stdoi='g')
# goalie_stats_df.head()

In [42]:
class Position(Enum):
    C = 'C'
    L = 'L'
    R = 'R'
    D = 'D'
    G = 'G'
    
    @property
    def category(self) -> str:
        if self in {Position.C, Position.L, Position.R}:
            return 'F'
        elif self == Position.D:
            return 'D'
        elif self == Position.G:
            return 'G'
    
    def __str__(self) -> str:
        return self.value

In [43]:
@dataclass
class Player:
    name: str
    team: str
    position: Position
    player_id: Optional[int] = None

    def __str__(self) -> str:
        """
        Returns a string representation of the player.
        """
        return f"{self.name} ({self.position}) - {self.team}"

    def to_dict(self) -> Dict[str, Optional[str]]:
        """
        Converts the Player instance into a dictionary.
        
        Returns:
            Dict[str, Optional[str]]: A dictionary representation of the player.
        """
        return {
            'player_id': self.player_id,
            'name': self.name,
            'team': self.team,
            'position': self.position.value
        }

In [44]:
@dataclass
class Lineup:
    name: str
    forwards: List[Optional[Player]] = field(default_factory=lambda: [None] * 12)
    defense: List[Optional[Player]] = field(default_factory=lambda: [None] * 6)
    goalies: List[Optional[Player]] = field(default_factory=lambda: [None] * 2)
    
    ALLOWED_FORWARD_CATEGORIES = {'F'}
    ALLOWED_DEFENSE_CATEGORY = 'D'
    ALLOWED_GOALIE_CATEGORY = 'G'
    
    def __post_init__(self):
        self.validate_lineup_size()
    
    def validate_lineup_size(self):
        total_players = sum(player is not None for player in self.forwards + self.defense + self.goalies)
        if total_players > 20:
            raise ValueError(f"Total number of players ({total_players}) exceeds the hard limit of 20.")
    
    def add_player(
        self,
        category: str,
        player: Player,
        slot: int,
        allowed_categories: Union[str, Set[str]],
        max_slots: int
    ):
        """
        Adds a player to the specified category and slot after validating their position category.
        Ensures that the total number of players does not exceed 20.
        """
        if isinstance(allowed_categories, str):
            allowed_categories = {allowed_categories}
        elif isinstance(allowed_categories, set):
            allowed_categories = allowed_categories
        else:
            raise TypeError("allowed_categories must be a string or a set of strings.")
        
        if player.position.category not in allowed_categories:
            raise ValueError(
                f"Cannot add player '{player.name}' with position '{player.position.value}' "
                f"to {category}. Allowed categories: {', '.join(allowed_categories)}."
            )
        
        if not 0 <= slot < max_slots:
            raise IndexError(f"{category.capitalize()} slot must be between 0 and {max_slots - 1}.")
        
        current_category = getattr(self, category)
        if current_category[slot]:
            existing_player = current_category[slot].name
            print(f"Warning: Slot {slot + 1} in {category} is already occupied by '{existing_player}'. Overwriting.")
        
        # Check total players before adding
        total_players = sum(player is not None for player in self.forwards + self.defense + self.goalies)
        if current_category[slot] is None and total_players >= 20:
            raise ValueError("Cannot add more players. The lineup has reached the hard limit of 20 players.")
        
        current_category[slot] = player
        setattr(self, category, current_category)
        print(f"Added player '{player.name}' to {category.capitalize()} slot {slot + 1}.")
    
    def add_forward(self, player: Player, slot: int):
        self.add_player(
            category='forwards',
            player=player,
            slot=slot,
            allowed_categories=self.ALLOWED_FORWARD_CATEGORIES,
            max_slots=len(self.forwards)
        )
    
    def add_defense(self, player: Player, slot: int):
        self.add_player(
            category='defense',
            player=player,
            slot=slot,
            allowed_categories={self.ALLOWED_DEFENSE_CATEGORY},
            max_slots=len(self.defense)
        )
    
    def set_goalie(self, player: Player, slot: int):
        self.add_player(
            category='goalies',
            player=player,
            slot=slot,
            allowed_categories={self.ALLOWED_GOALIE_CATEGORY},
            max_slots=len(self.goalies)
        )
    
    def adjust_slots(self, category: str, delta: int):
        """
        Adjusts the number of slots in the specified category by delta.
        Allows ±1 adjustment only.
        
        Args:
            category (str): The category to adjust ('forwards' or 'defense').
            delta (int): The change in number of slots (+1 or -1).
        """
        if category not in {'forwards', 'defense'}:
            raise ValueError("Can only adjust 'forwards' or 'defense' categories.")
        if delta not in {-1, 1}:
            raise ValueError("Delta must be either +1 or -1.")
        
        current_slots = getattr(self, category)
        new_slot_count = len(current_slots) + delta
        
        if category == 'forwards':
            if not (11 <= new_slot_count <= 13):
                raise ValueError("Number of forwards can only vary by ±1 from the default of 12.")
        elif category == 'defense':
            if not (5 <= new_slot_count <= 7):
                raise ValueError("Number of defensemen can only vary by ±1 from the default of 6.")
        
        if delta == 1:
            current_slots.append(None)
        elif delta == -1:
            removed_player = current_slots.pop()
            if removed_player:
                print(f"Removed player '{removed_player.name}' from {category}.")
        
        setattr(self, category, current_slots)
        print(f"Adjusted {category} slots to {len(getattr(self, category))}.")
        self.validate_lineup_size()
    
    def display_lineup(self):
        """
        Prints the current lineup in a structured format.
        """
        print(f"Lineup: {self.name}\n")
        
        for category, title in [('forwards', 'Forwards'), ('defense', 'Defense'), ('goalies', 'Goalies')]:
            print(f"{title}:")
            for idx, player in enumerate(getattr(self, category), start=1):
                player_info = str(player) if player else 'Empty'
                print(f"  Slot {idx}: {player_info}")
            print()
    
    def to_dataframe(self) -> pd.DataFrame:
        """
        Converts the lineup into a pandas DataFrame.
        Conditionally includes player attributes if they are present.
        """
        data = []
        for category, pos in [('forwards', 'f'), ('defense', 'd'), ('goalies', 'g')]:
            for idx, player in enumerate(getattr(self, category), start=1):
                player_dict = {
                    'Position': f"{pos}{idx}",
                    'Player': player.name if player else 'Empty'
                }
                # Conditionally add 'player_id' if it exists
                if player and player.player_id is not None:
                    player_dict['Player ID'] = player.player_id
                data.append(player_dict)
        
        df = pd.DataFrame(data)
        
        # Optionally, remove columns where all values are NaN
        df.dropna(axis=1, how='all', inplace=True)
        
        return df
    
    def to_transposed_dataframe(self) -> pd.DataFrame:
        """
        Transposes the lineup DataFrame so that each column represents a position-slot combination
        and the row contains the corresponding player names.
        Conditionally includes additional player attributes if they are present.
        """
        df = self.to_dataframe()
        
        # Initialize dictionaries to hold player names and optional IDs
        player_data = {}
        id_data = {}
        
        for _, row in df.iterrows():
            pos = row['Position']
            player_name = row['Player']
            player_data[pos] = player_name
            
            # Handle 'Player ID' if it exists
            if 'Player ID' in row and pd.notna(row['Player ID']):
                id_data[f"{pos}_ID"] = row['Player ID']
        
        # Combine player names and IDs into a single dictionary
        transposed_data = {**player_data, **id_data}
        
        # Create the transposed DataFrame with a single row
        transposed_df = pd.DataFrame([transposed_data])
        
        return transposed_df

In [45]:
today_datetime= datetime.now()
yesterday_datetime = today_datetime - timedelta(days=1, hours=6) # UTC offset
yesterday = yesterday_datetime.strftime('%Y-%m-%d')
yesterday

'2024-12-20'

In [46]:
# Load environment variables from .env file
load_dotenv()

db_prefix = 'NHL_DB_'

# # Construct the database configuration dictionary
# db_prefix = {
#     'dbname': os.getenv(f'{db_prefix}NAME'),
#     'user': os.getenv(f'{db_prefix}USER'),
#     'password': os.getenv(f'{db_prefix}PASSWORD'),
#     'host': os.getenv(f'{db_prefix}HOST'),
#     'port': os.getenv(f'{db_prefix}PORT')
# }

In [47]:
# Check the last update time of the players database
last_update = check_last_update(db_prefix)

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Last database update was on: 2024-12-20
INFO:db_utils.base_utils:Database connection closed.


In [48]:
# Convert last_update to datetime
last_update_dt = datetime.strptime(last_update, '%Y-%m-%d')
today_dt = datetime.strptime(today_datetime.strftime('%Y-%m-%d'), '%Y-%m-%d')
yesterday_dt = datetime.strptime(yesterday, '%Y-%m-%d')

# Only update if last update was before yesterday
if last_update_dt not in [today_dt, yesterday_dt]:
    # Update the player database from last update to yesterday
    update_player_db(last_update, yesterday, db_prefix, skip_existing=True)
else:
    print(f"No need to update the player database. Last update was on: {last_update}")


No need to update the player database. Last update was on: 2024-12-20


In [49]:
def add_player_to_lineup(lineup: Lineup, player: Player, category: str):
    """
    Adds a player to the lineup in the specified category, handling potential errors.
    
    Args:
        lineup (Lineup): The lineup object.
        player (Player): The player to add.
        category (str): The category ('forwards', 'defense', 'goalies').
    """
    try:
        if category == 'forwards':
            slot = next(i for i, p in enumerate(lineup.forwards) if p is None)
            lineup.add_forward(player, slot)
        elif category == 'defense':
            slot = next(i for i, p in enumerate(lineup.defense) if p is None)
            lineup.add_defense(player, slot)
        elif category == 'goalies':
            slot = next(i for i, p in enumerate(lineup.goalies) if p is None)
            lineup.set_goalie(player, slot)
        else:
            print(f"Unknown category '{category}'.")
    except StopIteration:
        print(f"No available slots to add player '{player.name}' in category '{category}'.")
    except ValueError as ve:
        print(ve)
    except IndexError as ie:
        print(ie)

In [50]:
# TODO this function just creates a lineup from the player_list and goalie_list
def create_lineup(team) -> Lineup:
    """
    Creates and displays a lineup consisting of players from the specified team.
    
    Args:
        team (str): The team name to filter players.
    """
    # Creating two lineup objects
    lineup1 = Lineup("Lineup 1")
    
    # Adding forwards to lineup1
    forward_count = 0
    for player in player_list:
        if player.team == team:
            try:
                lineup1.add_forward(player, forward_count)
                forward_count += 1
                if forward_count >= 12:
                    break
            except ValueError as e:
                print(f"Skipping player '{player.name}': {e}")
            except IndexError as e:
                print(f"Skipping player '{player.name}': {e}")
        else:
            continue  # Proceed to the next player if not in the specified team
    
    # Adding defense to lineup1
    defense_count = 0
    for player in player_list:
        if player.team == team:
            try:
                lineup1.add_defense(player, defense_count)
                defense_count += 1
                if defense_count >= 6:
                    break
            except ValueError as e:
                print(f"Skipping player '{player.name}': {e}")
            except IndexError as e:
                print(f"Skipping player '{player.name}': {e}")
        else:
            continue  # Proceed to the next player if not in the specified team
    
    # Adding goalies to lineup1
    goalie_count = 0
    for goalie in goalie_list:
        if goalie.team != team:
            continue  # Proceed to the next goalie if not in the specified team
        if goalie_count >= 2:
            print("Maximum of two goalies have been assigned.")
            break
        try:
            lineup1.set_goalie(goalie, goalie_count)
            goalie_count += 1
        except ValueError as e:
            print(f"Skipping goalie '{goalie.name}': {e}")
        except IndexError as e:
            print(f"Skipping goalie '{goalie.name}': {e}")
    
    # Display the lineup
    # lineup1.display_lineup()
    return lineup1

# my_lineup = create_lineup('TOR')

In [79]:
def get_skater_stats(lineup: Lineup, player_stats_df: pd.DataFrame, filter: Optional[str] = None) -> pd.DataFrame:
    """
    Gets stats for players in the lineup, maintaining lineup order.
    
    Args:
        lineup (Lineup): The lineup containing players
        player_stats_df (pd.DataFrame): DataFrame with player statistics
        filter (str, optional): Type of filter to apply on the stats.
            Defaults to None, which keeps all stats. If 'shots', only shot-related statistics are kept.
        
    Returns:
        pd.DataFrame: Player statistics ordered according to lineup positions
    """
    # Apply filter if specified
    if filter == 'shots':
        # Define shot-related columns to keep
        shot_columns = ['player', 'team', 'position', 'gp', 'toi', 'toi/gp','shots/60', 'icf/60', 'iff/60'] 
        player_stats_df = player_stats_df[shot_columns]
    
    # Create ordered list of players (forwards then defense)
    players = []
    # Add forwards in order
    players.extend([p for p in lineup.forwards if p])
    # Add defense in order 
    players.extend([p for p in lineup.defense if p])
    
    # Create ordered list of player names
    player_names = [player.name for player in players]
    
    # Filter stats and reorder to match lineup order
    stats_df = player_stats_df[player_stats_df['player'].isin(player_names)]
    
    # Ensure all players are from the same team
    team_codes = stats_df['team'].unique()
    if len(team_codes) > 1:
        # Assuming lineup is for a single team, get the team from the first player in the lineup
        team_code = None
        for player in lineup.forwards + lineup.defense + lineup.goalies:
            if player:
                team_code = player.team
                break
        if team_code:
            stats_df = stats_df[stats_df['team'] == team_code]
        else:
            raise ValueError("No players found in lineup to determine team.")
    
    # Create ordering dictionary mapping names to their position in lineup
    name_to_position = {name: idx for idx, name in enumerate(player_names)}
    
    # Sort stats DataFrame based on lineup order and reset index
    return stats_df.assign(
        lineup_order=stats_df['player'].map(name_to_position)
    ).sort_values('lineup_order').drop('lineup_order', axis=1).reset_index(drop=True)

# lineup_player_stats = get_skater_stats(my_lineup, player_stats_df)

In [80]:
def get_goalie_stats(lineup: Lineup, goalie_stats_df: pd.DataFrame) -> pd.DataFrame:
    """
    Gets stats for goalies in the lineup, maintaining lineup order.
    
    Args:
        lineup (Lineup): The lineup containing goalies
        goalie_stats_df (pd.DataFrame): DataFrame with goalie statistics
        
    Returns:
        pd.DataFrame: Goalie statistics ordered according to lineup positions
    """
    # Create ordered list of goalies
    goalies = [goalie for goalie in lineup.goalies if goalie]
    goalie_names = [goalie.name for goalie in goalies]
    
    # Filter stats and reorder to match lineup order
    stats_df = goalie_stats_df[goalie_stats_df['player'].isin(goalie_names)]
    
    # Create ordering dictionary mapping names to their position in lineup
    name_to_position = {name: idx for idx, name in enumerate(goalie_names)}
    
    # Sort stats DataFrame based on lineup order and reset index
    return stats_df.assign(
        lineup_order=stats_df['player'].map(name_to_position)
    ).sort_values('lineup_order').drop('lineup_order', axis=1).reset_index(drop=True)

In [53]:
def extract_team_lineup(team: str, reference_date: Optional[str] = None) -> Lineup:
    """
    Extracts the most recent lineup for the specified team based on the latest game data.

    This function performs the following steps:
        1. Determines the reference date (defaults to yesterday if not provided).
        2. Retrieves the most recent game ID for the team using `get_most_recent_game_id`.
        3. Fetches the game boxscore data using `get_game_boxscore`.
        4. Processes the boxscore to obtain skaters and goalies using `display_boxscore`.
        5. Constructs and returns a `Lineup` object populated with the team's players.

    Args:
        team (str): The three-letter team code (e.g., 'TOR').
        reference_date (Optional[str]): The reference date in 'YYYY-MM-DD' format. Defaults to yesterday's date.

    Returns:
        Lineup: A `Lineup` object containing the team's players from the most recent game.

    Raises:
        ValueError: If no recent game is found for the team or if the team is not part of the retrieved game.
    """
    # Step 1: Determine the reference date
    if reference_date is None:
        today_datetime = datetime.now()
        yesterday_datetime = today_datetime - timedelta(days=1, hours=6)  # Adjust for UTC offset if necessary
        reference_date = yesterday_datetime.strftime('%Y-%m-%d')

    # Step 2: Retrieve the most recent game ID for the team
    game_id = get_most_recent_game_id(team, reference_date)
    if game_id is None:
        raise ValueError(f"No recent game found for team '{team}' before {reference_date}.")

    # Print the game_id
    print(f"Game ID: {game_id}")

    # Step 3: Fetch the game boxscore data
    game_data = get_game_boxscore(game_id, clean=False)

    # Step 4: Process the boxscore to obtain skaters and goalies
    away_skaters, away_goalies, home_skaters, home_goalies = display_boxscore(game_data)

    # Extract team abbrevs to determine if the team is home or away
    away_team_code = game_data.get('awayTeam', {}).get('abbrev')
    home_team_code = game_data.get('homeTeam', {}).get('abbrev')

    if not away_team_code or not home_team_code:
        raise ValueError("Team abbreviations not found in game data.")

    if team.upper() == away_team_code.upper():
        team_side = 'Away'
        skaters = away_skaters
        goalies = away_goalies
    elif team.upper() == home_team_code.upper():
        team_side = 'Home'
        skaters = home_skaters
        goalies = home_goalies
    else:
        raise ValueError(f"Team '{team}' not found in game ID {game_id}.")

    # Step 5: Construct the Lineup object
    lineup = Lineup(name=f"{team.upper()} Lineup from Game {game_id}")

    # Add Skaters to the Lineup
    for _, skater in skaters.iterrows():
        try:
            position_enum = Position(skater['position'])  # Convert to Position Enum
        except ValueError:
            print(f"Invalid position '{skater['position']}' for player '{skater['name']}'. Skipping.")
            continue

        player = Player(
            player_id=skater['playerId'],
            name=get_player_full_name(skater['playerId'], db_prefix, suppress_log=True),
            team=team.upper(),
            position=position_enum
        )

        # Add player to the appropriate category in the lineup
        if player.position.category == 'F':
            try:
                empty_slot = next(i for i, p in enumerate(lineup.forwards) if p is None)
                lineup.add_forward(player, empty_slot)
            except StopIteration:
                print(f"No available forward slot to add player '{player.name}'.")
        elif player.position.category == 'D':
            try:
                empty_slot = next(i for i, p in enumerate(lineup.defense) if p is None)
                lineup.add_defense(player, empty_slot)
            except StopIteration:
                print(f"No available defense slot to add player '{player.name}'.")
        else:
            print(f"Player '{player.name}' has an unrecognized category '{player.position.category}'. Skipping.")

    # Add Goalies to the Lineup
    for _, goalie in goalies.iterrows():
        player = Player(
            player_id=goalie['playerId'],
            name=get_player_full_name(goalie['playerId'], db_prefix, suppress_log=True),
            team=team.upper(),
            position=Position.G
        )
        try:
            empty_slot = next(i for i, p in enumerate(lineup.goalies) if p is None)
            lineup.set_goalie(player, empty_slot)
        except StopIteration:
            print(f"No available goalie slot to add player '{player.name}'.")

    return lineup
# chicago_lineup = extract_team_lineup('CHI', '2024-11-22')

In [54]:
def calculate_min_percentage(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculates the average time on ice per game as a percentage of total game time (60 minutes).
    
    Args:
        df (pd.DataFrame): DataFrame containing 'toi' and 'gp' columns
        
    Returns:
        pd.DataFrame: Original DataFrame with new 'min%' column added
    """
    df_copy = df.copy()
    df_copy['min%'] = (df_copy['toi'] / df_copy['gp'] / 300 * 100).round(2)
    return df_copy

# # Apply the function to lineup_player_stats
# lineup_player_stats = calculate_min_percentage(lineup_player_stats)
# lineup_player_stats

In [55]:
def sum_min_percentage(df: pd.DataFrame) -> float:
    """
    Calculates the sum of the 'min%' column in the given DataFrame.
    
    Args:
        df (pd.DataFrame): DataFrame containing the 'min%' column.
        
    Returns:
        float: The total sum of the 'min%' values.
        
    Raises:
        KeyError: If the 'min%' column is not present in the DataFrame.
    """
    if 'min%' not in df.columns:
        raise KeyError("The DataFrame does not contain a 'min%' column.")
    
    total_min_percentage = df['min%'].sum()
    return total_min_percentage

# Example usage:
# total_min_percentage = sum_min_percentage(lineup_player_stats)
# print(f"Total min%: {total_min_percentage}")

In [56]:
def calculate_adj_min(df: pd.DataFrame, total_min_percentage: float) -> None:
    """
    Calculates the adjusted minimum (adj_min) for each player based on their min% and the total min%.
    
    The formula used is:
        adj_min = (min% / total_min_percentage) * 300
    
    Args:
        df (pd.DataFrame): DataFrame containing the 'min%' column.
        total_min_percentage (float): The total sum of the 'min%' column.
        
    Raises:
        KeyError: If the 'min%' column is not present in the DataFrame.
        ValueError: If total_min_percentage is not a positive number.
    """
    if 'min%' not in df.columns:
        raise KeyError("The DataFrame does not contain a 'min%' column.")
    
    if total_min_percentage <= 0:
        raise ValueError("total_min_percentage must be a positive number.")
    
    # Calculate and append the 'adj_min' column
    df['adj_min'] = ((df['min%'] / total_min_percentage) * 300).round(2)

# # Example usage:
# calculate_adj_min(lineup_player_stats, total_min_percentage)
# print(lineup_player_stats[['player', 'min%', 'adj_min']])

In [57]:
# Define file paths to load the model and transformer
model_filepath = 'models/polynomial_model_degree_1.pkl'
poly_filepath = 'models/polynomial_features_degree_1.pkl'

# Load the regression model
with open(model_filepath, 'rb') as model_file:
    loaded_model = pickle.load(model_file)
print(f"Model loaded from {model_filepath}")

# Load the PolynomialFeatures transformer
with open(poly_filepath, 'rb') as poly_file:
    loaded_poly = pickle.load(poly_file)
print(f"PolynomialFeatures transformer loaded from {poly_filepath}")

# Function to make predictions using the loaded model and transformer
def predict_gpm(new_ixg60_value, model, poly, x_col='ixg60'):
    """
    Predicts 'gpm' using the loaded model and polynomial transformer.
    
    Parameters:
        new_ixg60_value (float): The new ixg60 value for prediction.
        model (RegressionResults): The loaded regression model.
        poly (PolynomialFeatures): The loaded polynomial features transformer.
        x_col (str): The name of the independent variable column. Defaults to 'ixg60'.
        
    Returns:
        predicted_gpm (float): The predicted gpm value.
    """
    # Prepare the input data
    X_new = np.array([[new_ixg60_value]])
    X_new_poly = poly.transform(X_new)
    X_new_poly_const = sm.add_constant(X_new_poly, has_constant='add')
    
    # Create DataFrame with appropriate column names
    feature_names = ['const'] + poly.get_feature_names_out([x_col]).tolist()
    new_data = pd.DataFrame(X_new_poly_const, columns=feature_names)
    
    # Predict
    predicted_gpm = model.predict(new_data)
    return predicted_gpm.iloc[0]

# Example: Predicting 'gpm' for a new ixg60 value
# new_ixg60_value = 50
# predicted_gpm = predict_gpm(new_ixg60_value, loaded_model, loaded_poly)
# print(f"Predicted GPM for ixg60={new_ixg60_value}: {predicted_gpm:.4f}")

Model loaded from models/polynomial_model_degree_1.pkl
PolynomialFeatures transformer loaded from models/polynomial_features_degree_1.pkl


In [58]:
# Function to predict GPM for each player and add it to the DataFrame
def add_gpm_to_lineup(lineup_df, model, poly):
    """
    Adds a 'gpm' column to the lineup_player_stats DataFrame using the predict_gpm function.
    
    Args:
        lineup_df (pd.DataFrame): DataFrame containing 'ixg/60' column.
        model: Loaded regression model.
        poly: Loaded PolynomialFeatures transformer.
        
    Returns:
        pd.DataFrame: Updated DataFrame with 'gpm' column added.
    """
    # Define a helper function to handle potential missing or invalid values
    def safe_predict(ixg_60):
        if pd.isna(ixg_60):
            return np.nan
        try:
            return predict_gpm(ixg_60, model, poly)
        except Exception as e:
            print(f"Error predicting GPM for ixg_60={ixg_60}: {e}")
            return np.nan
    
    # Apply the predict_gpm function to each 'ixg/60' value
    lineup_df['gpm'] = lineup_df['ixg/60'].apply(safe_predict)
    return lineup_df

# # Apply the function to add 'gpm' to your DataFrame
# lineup_player_stats = add_gpm_to_lineup(lineup_player_stats, loaded_model, loaded_poly)

# # Display the updated DataFrame with 'gpm'
# print(lineup_player_stats[['player', 'ixg/60', 'gpm']])

In [59]:
def calculate_x_goals(lineup_stats_df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculates 'x_goals' by multiplying 'gpm' with 'adj_min' for each player.
    
    Args:
        lineup_stats_df (pd.DataFrame): DataFrame containing 'gpm' and 'adj_min' columns.
        
    Returns:
        pd.DataFrame: DataFrame with the new 'x_goals' column added.
        
    Raises:
        KeyError: If 'gpm' or 'adj_min' columns are not present in the DataFrame.
        TypeError: If 'gpm' or 'adj_min' contain non-numeric data.
    """
    # Check if required columns exist
    required_columns = {'gpm', 'adj_min'}
    missing_columns = required_columns - set(lineup_stats_df.columns)
    if missing_columns:
        raise KeyError(f"The DataFrame is missing the following required columns: {', '.join(missing_columns)}")
    
    # Check if 'gpm' and 'adj_min' are numeric
    if not pd.api.types.is_numeric_dtype(lineup_stats_df['gpm']):
        raise TypeError("'gpm' column must be numeric.")
    if not pd.api.types.is_numeric_dtype(lineup_stats_df['adj_min']):
        raise TypeError("'adj_min' column must be numeric.")
    
    # Calculate 'x_goals'
    lineup_stats_df = lineup_stats_df.copy()
    lineup_stats_df['x_goals'] = lineup_stats_df['gpm'] * lineup_stats_df['adj_min']
    
    return lineup_stats_df

In [60]:
def calculate_league_avg_xg_against_per_60(goalie_stats_df: pd.DataFrame) -> float:
    """
    Calculate the league average expected goals against (xg_against) per 60 minutes.

    Args:
        goalie_stats_df (pd.DataFrame): DataFrame containing goalie statistics with 'xg_against' and 'toi' columns.

    Returns:
        float: The league average xg_against per 60 minutes.

    Raises:
        KeyError: If required columns are missing from the DataFrame.
        ValueError: If no valid goalies with non-zero 'toi' are found.
    """
    # Ensure required columns are present
    required_columns = {'xg_against', 'toi'}
    missing_columns = required_columns - set(goalie_stats_df.columns)
    if missing_columns:
        raise KeyError(f"Missing columns in goalie_stats_df: {', '.join(missing_columns)}")

    # Drop rows with missing or zero 'toi' to avoid division errors
    valid_goalies = goalie_stats_df.dropna(subset=['xg_against', 'toi'])
    valid_goalies = valid_goalies[valid_goalies['toi'] > 0]

    if valid_goalies.empty:
        raise ValueError("No valid goalies with non-zero 'toi' found in goalie_stats_df.")

    # Calculate xg against per 60 minutes for each goalie
    valid_goalies['xg_against_per_60'] = (valid_goalies['xg_against'] / valid_goalies['toi']) * 60

    # Calculate the league average
    league_avg_xg_against_per_60 = valid_goalies['xg_against_per_60'].mean()

    return league_avg_xg_against_per_60

In [61]:
def calculate_xg_against_adj_percentage(lineup_goalie_stats, goalie_avg_xg_against_per_60):
    """
    Calculate the expected goals against adjusted percentage.
    """
    lineup_goalie_stats['adj%'] = (lineup_goalie_stats['xg_against/60'] / goalie_avg_xg_against_per_60) * 100
    return lineup_goalie_stats

In [62]:
def process_team_lineup(input_date: str, team: str, model, poly) -> pd.DataFrame:
    """
    Processes the team's lineup for a given date and predicts GPM for each player.
    This function performs the following steps:
        1. Calls `nst_on_ice_scraper` for the input date minus one day.
        2. Extracts the team's lineup using `extract_team_lineup`.
        3. Retrieves skater statistics with `get_skater_stats`.
        4. Calculates `min%` using `calculate_min_percentage`.
        5. Computes `adj_min` using `calculate_adj_min`.
        6. Determines `ixg_per_60` using `calculate_ixg_per_60`.
        7. Predicts `gpm` using the loaded polynomial regression model.
        8. Calculcates x_goals by multiplying gpm by adj_min    
    Args:
        input_date (str): The reference date in 'YYYY-MM-DD' format.
        team (str): The three-letter team code (e.g., 'TOR').
        model: Loaded regression model.
        poly: Loaded PolynomialFeatures transformer.
        
    Returns:
        pd.DataFrame: Updated DataFrame with calculated metrics and predicted GPM.
    
    Raises:
        ValueError: If any step in the data processing pipeline fails.
    """

    try:
        # Step 1: Calculate the date minus one day
        reference_datetime = datetime.strptime(input_date, '%Y-%m-%d') - timedelta(days=1)
        reference_date_str = reference_datetime.strftime('%Y-%m-%d')
        print(f"Fetching data for reference date: {reference_date_str}")

        # Step 2: Call nst_on_ice_scraper for player and goalie stats
        player_stats_df = nst_on_ice_scraper(
            fromseason=20242025,
            thruseason=20242025,
            startdate='',
            enddate=reference_date_str,
            rate='y',
            stdoi='std'
        )
        goalie_stats_df = nst_on_ice_scraper(
            fromseason=20242025,
            thruseason=20242025,
            startdate='',
            enddate=reference_date_str,
            pos='G',
            rate='y',
            stdoi='g'
        )
        print("Player and goalie statistics fetched successfully.")

        # Step 3: Extract team lineup for the input date and team
        lineup = extract_team_lineup(team, input_date)
        print(f"Lineup extracted for team {team} on {input_date}.")

        # Step 4: Get skater statistics for the lineup
        lineup_skater_stats = get_skater_stats(lineup, player_stats_df)

        # Step 5: Calculate min%
        lineup_skater_stats = calculate_min_percentage(lineup_skater_stats)

        # Step 6: Calculate adj_min
        total_min_percentage = sum_min_percentage(lineup_skater_stats)
        calculate_adj_min(lineup_skater_stats, total_min_percentage)

        # Step 7: Predict gpm using the polynomial regression model
        lineup_skater_stats = add_gpm_to_lineup(lineup_skater_stats, model, poly)

        # Step 8: Calculate x_goals
        lineup_skater_stats = calculate_x_goals(lineup_skater_stats)

        # Step 9: Get goalie stats for the lineup
        lineup_goalie_stats = get_goalie_stats(lineup, goalie_stats_df)
        
        # Step 10: Calculate league average xg_against_per_60
        goalie_avg_xg_against_per_60 = goalie_stats_df['xg_against/60'].mean()

        lineup_goalie_stats = calculate_xg_against_adj_percentage(lineup_goalie_stats, goalie_avg_xg_against_per_60)
        
        return lineup_skater_stats, lineup_goalie_stats

    except Exception as e:
        print(f"An error occurred during processing: {e}")
        raise

In [63]:
def load_models(model_filepath='models/polynomial_model_degree_1.pkl',
               poly_filepath='models/polynomial_features_degree_1.pkl'):
    """
    Loads the regression model and PolynomialFeatures transformer from the specified file paths.

    Args:
        model_filepath (str): Path to the saved regression model pickle file.
        poly_filepath (str): Path to the saved PolynomialFeatures transformer pickle file.

    Returns:
        tuple: A tuple containing the loaded regression model and PolynomialFeatures transformer.

    Raises:
        FileNotFoundError: If either of the specified files does not exist.
        pickle.UnpicklingError: If there is an error unpickling the files.
    """
    try:
        # Load the regression model
        with open(model_filepath, 'rb') as model_file:
            loaded_model = pickle.load(model_file)
        print(f"Model loaded from {model_filepath}")

        # Load the PolynomialFeatures transformer
        with open(poly_filepath, 'rb') as poly_file:
            loaded_poly = pickle.load(poly_file)
        print(f"PolynomialFeatures transformer loaded from {poly_filepath}")

        return loaded_model, loaded_poly

    except FileNotFoundError as fnf_error:
        print(f"Error: {fnf_error}")
        raise
    except pickle.UnpicklingError as pickle_error:
        print(f"Error loading pickle files: {pickle_error}")
        raise
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        raise

In [64]:
# Load the models using the new function
loaded_model, loaded_poly = load_models()

Model loaded from models/polynomial_model_degree_1.pkl
PolynomialFeatures transformer loaded from models/polynomial_features_degree_1.pkl


In [65]:
def calculate_x_shots(lineup_stats_df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculates 'x_shots' by multiplying 'shots/60' divided by 60 with 'adj_min' for each player.
    
    Args:
        lineup_stats_df (pd.DataFrame): DataFrame containing 'shots/60' and 'adj_min' columns.
        
    Returns:
        pd.DataFrame: DataFrame with the new 'x_goals' column added.
        
    Raises:
        KeyError: If 'shots/60' or 'adj_min' columns are not present in the DataFrame.
        TypeError: If 'shots/60' or 'adj_min' contain non-numeric data.
    """
    # Check if required columns exist
    required_columns = {'shots/60', 'adj_min'}
    missing_columns = required_columns - set(lineup_stats_df.columns)
    if missing_columns:
        raise KeyError(f"The DataFrame is missing the following required columns: {', '.join(missing_columns)}")
    
    # Check if 'shots/60' and 'adj_min' are numeric
    if not pd.api.types.is_numeric_dtype(lineup_stats_df['shots/60']):
        raise TypeError("'shots/60' column must be numeric.")
    if not pd.api.types.is_numeric_dtype(lineup_stats_df['adj_min']):
        raise TypeError("'adj_min' column must be numeric.")
    
    # Calculate 'x_shots' formatted to 2 decimal points
    lineup_stats_df = lineup_stats_df.copy()
    lineup_stats_df['x_shots'] = ((lineup_stats_df['shots/60'] / 60) * lineup_stats_df['adj_min']).round(2)
    
    return lineup_stats_df

In [66]:
def process_team_lineup_shots(input_date: str, team: str) -> pd.DataFrame:
    """
    Processes the team's lineup for a given date and predicts shots for each player.
    This function performs the following steps:
        Calls `nst_on_ice_scraper` for the input date minus one day. Extracts the team's lineup using `extract_team_lineup`.
        Retrieves skater statistics with `get_skater_stats`. Calculates `min%` using `calculate_min_percentage`.
        Computes `adj_min` using `calculate_adj_min`. Calculcates x_shots by multiplying shots/60 by adj_min    
    Args:
        input_date (str): The reference date in 'YYYY-MM-DD' format.
        team (str): The three-letter team code (e.g., 'TOR').
        db_config (Dict[str, str]): Database configuration dictionary.
    Returns:
        pd.DataFrame: Updated DataFrame with calculated metrics and predicted shots (x_shots).
    
    Raises:
        ValueError: If any step in the data processing pipeline fails.
    """

    try:
        # Step 1: Calculate the date minus one day
        reference_datetime = datetime.strptime(input_date, '%Y-%m-%d') - timedelta(days=1)
        reference_date_str = reference_datetime.strftime('%Y-%m-%d')
        print(f"Fetching data for reference date: {reference_date_str}")

        # Step 2: Call nst_on_ice_scraper for player and goalie stats
        player_stats_df = nst_on_ice_scraper(
            fromseason=20242025,
            thruseason=20242025,
            startdate='',
            enddate=reference_date_str,
            rate='y'
        )
        print("Player and goalie statistics fetched successfully.")

        # Step 3: Extract team lineup for the input date and team
        lineup = extract_team_lineup(team, input_date)
        print(f"Lineup extracted for team {team} on {input_date}.")

        # Step 4: Get skater statistics for the lineup
        lineup_skater_stats = get_skater_stats(lineup, player_stats_df, filter='shots')

        # Step 5: Calculate min%
        lineup_skater_stats = calculate_min_percentage(lineup_skater_stats)

        # Step 6: Calculate adj_min
        total_min_percentage = sum_min_percentage(lineup_skater_stats)
        calculate_adj_min(lineup_skater_stats, total_min_percentage)

        # Step 7: Calculate x_shots
        lineup_skater_stats = calculate_x_shots(lineup_skater_stats)
        
        return lineup_skater_stats

    except Exception as e:
        print(f"An error occurred during processing: {e}")
        raise

In [67]:
shots = process_team_lineup_shots('2024-12-09', 'NYR')
shots

Fetching data for reference date: 2024-12-08
Player and goalie statistics fetched successfully.
Game ID: 2024020436
Added player 'Artemi Panarin' to Forwards slot 1.
Added player 'Alexis Lafrenière' to Forwards slot 2.
Added player 'Vincent Trocheck' to Forwards slot 3.
Added player 'Chris Kreider' to Forwards slot 4.
Added player 'Kaapo Kakko' to Forwards slot 5.
Added player 'Jimmy Vesey' to Forwards slot 6.
Added player 'Sam Carrick' to Forwards slot 7.
Added player 'Will Cuylle' to Forwards slot 8.
Added player 'Filip Chytil' to Forwards slot 9.
Added player 'Adam Edstrom' to Forwards slot 10.
Added player 'Reilly Smith' to Forwards slot 11.
Added player 'Mika Zibanejad' to Forwards slot 12.
Added player 'Braden Schneider' to Defense slot 1.
Added player 'Zac Jones' to Defense slot 2.
Added player 'Adam Fox' to Defense slot 3.
Added player 'Ryan Lindgren' to Defense slot 4.
Added player 'K'Andre Miller' to Defense slot 5.
Added player 'Victor Mancini' to Defense slot 6.
Added playe

Unnamed: 0,player,team,position,gp,toi,toi/gp,shots/60,icf/60,iff/60,min%,adj_min,x_shots
0,Artemi Panarin,NYR,L,26,396.22,15.24,8.03,17.11,12.72,5.08,19.06,2.55
1,Alexis Lafrenière,NYR,L,26,396.27,15.24,7.12,13.02,10.6,5.08,19.06,2.26
2,Vincent Trocheck,NYR,C,26,396.18,15.24,7.12,11.51,10.45,5.08,19.06,2.26
3,Chris Kreider,NYR,L,23,259.75,11.29,7.85,14.09,11.78,3.76,14.11,1.85
4,Kaapo Kakko,NYR,R,26,311.78,11.99,6.16,11.16,8.66,4.0,15.01,1.54
5,Jimmy Vesey,NYR,L,15,138.82,9.25,5.62,9.94,7.78,3.08,11.56,1.08
6,Sam Carrick,NYR,C,26,232.72,8.95,5.67,11.34,8.25,2.98,11.18,1.06
7,Will Cuylle,NYR,L,26,327.98,12.61,7.87,12.62,10.79,4.2,15.76,2.07
8,Filip Chytil,NYR,C,19,231.3,12.17,9.6,17.38,14.01,4.06,15.24,2.44
9,Adam Edstrom,NYR,C,26,225.52,8.67,5.59,11.71,8.78,2.89,10.85,1.01


In [68]:
def process_team_shots_and_opponent(input_date: str, team: str) -> Dict[str, Any]:
    """
    Processes a team's lineup shots and retrieves opponent's on-ice statistics for a given date.
    
    This function performs the following steps:
        1. Runs `process_team_lineup_shots` for the specified team and date.
        2. Retrieves the game_id where the team played on the specified date.
        3. Identifies the opponent team based on the game_id.
        4. Converts the opponent team's triCode to its full name.
        5. Uses `nst_team_on_ice_scraper` to fetch the opponent team's on-ice statistics.
    
    Args:
        input_date (str): The reference date in 'YYYY-MM-DD' format.
        team (str): The three-letter team code (e.g., 'TOR').
        db_config (Dict[str, str]): Database configuration dictionary.
        model: Loaded regression model.
        poly: Loaded PolynomialFeatures transformer.
    
    Returns:
        Dict[str, Any]: A dictionary containing the team's lineup shots DataFrame, game_id, opponent team code,
                        and opponent's on-ice statistics DataFrame.
    
    Raises:
        ValueError: If the team did not play on the specified date or if the opponent cannot be determined.
    """
    try:
        # Step 1: Determine the reference date, if input_date is None, use yesterday's date
        if input_date is None:
            today_datetime = datetime.now()
            yesterday_datetime = today_datetime - timedelta(days=1, hours=6)  # Adjust for UTC offset if necessary
            input_date = yesterday_datetime.strftime('%Y-%m-%d')
        else:
            input_datetime = datetime.strptime(input_date, '%Y-%m-%d')
            day_before_input_dt = input_datetime - timedelta(days=1, hours=6)
            day_before_input = day_before_input_dt.strftime('%Y-%m-%d')

        # Step 2: Run process_team_lineup_shots for the given team and date
        team_shots_df = process_team_lineup_shots(input_date, team)
        
        # Insert date as the first column
        team_shots_df.insert(0, 'date', input_date)
        
        print(f"Processed lineup shots for team {team} on {input_date}.")

        # Step 3: Retrieve matchup games for the input date
        print(f"Retrieving matchup games for date {input_date}.")
        temp_data = get_matchup_games(input_date, input_date)
        game_ids = temp_data.get('game_ids', {}).get('id', [])
        game_dates = temp_data.get('game_ids', {}).get('date', [])

        # Initialize variables
        game_id = None
        opponent_team_tricode = None

        # Step 4: Identify the game_id involving the specified team
        for gid, gdate in zip(game_ids, game_dates):
            print(f"Checking Game ID: {gid} on Date: {gdate}")
            boxscore = get_game_boxscore(gid, clean=True)
            away_team = boxscore.get('away_team')
            home_team = boxscore.get('home_team')
            print(f"Away Team: {away_team}, Home Team: {home_team}")
            
            if team.upper() == away_team.upper():
                opponent_team_tricode = home_team.upper()
                game_id = gid
                print(f"Team {team} found as Away Team in Game ID {gid}. Opponent TriCode: {opponent_team_tricode}")
                break
            elif team.upper() == home_team.upper():
                opponent_team_tricode = away_team.upper()
                game_id = gid
                print(f"Team {team} found as Home Team in Game ID {gid}. Opponent TriCode: {opponent_team_tricode}")
                break

        if not game_id or not opponent_team_tricode:
            raise ValueError(f"Team {team} did not play on {input_date} or could not determine opponent.")

        # Step 5: Convert opponent_team_tricode to full name
        opponent_team_fullname = get_fullname_by_tricode(opponent_team_tricode)
        if opponent_team_fullname is None:
            raise ValueError(f"Could not find full name for opponent TriCode '{opponent_team_tricode}'.")
        print(f"Opponent Team Full Name: {opponent_team_fullname}")

        # Step 6: Use nst_team_on_ice_scraper to fetch opponent's on-ice statistics
        print(f"Fetching on-ice statistics for opponent team {opponent_team_fullname}.")
        team_stats_df = nst_team_on_ice_scraper(
            fromseason=20242025,
            thruseason=20242025,
            startdate='',
            enddate=day_before_input,
            stype=2,
            sit='all'
        )
        
        # Assuming 'team' is the column name for team full names in the scraped DataFrame
        if 'team' not in team_stats_df.columns:
            raise KeyError("Column 'team' not found in opponent_stats_df. Please verify the scraped data.")
        
        team_stats_df = team_stats_df[team_stats_df['team'].str.lower() == opponent_team_fullname.lower()]
        if team_stats_df.empty:
            raise ValueError(f"No on-ice statistics found for opponent team '{opponent_team_fullname}'.")
        print(f"Retrieved on-ice statistics for opponent team {opponent_team_fullname}.")

        # Step 7: Extract opponent's Corsi For Percentage (CF%)
        # Assuming the column name for CF% is 'cf%'. Adjust if different.
        if 'cf%' not in team_stats_df.columns:
            raise KeyError("Column 'cf%' not found in opponent_stats_df. Please verify the scraped data.")

        opponent_cf_pct = team_stats_df['cf%'].iloc[0]
        print(f"Opponent's Corsi For Percentage (CF%): {opponent_cf_pct}")

        # Step 8: Apply CF% as a modifier to the team's projected shots (x_shots) using a 50% baseline
        baseline_cf_pct = 50  # Baseline CF% (average value)

        # Adjust x_shots based on opponent's CF%
        team_shots_df['x_shots_mod'] = (team_shots_df['x_shots'] * (baseline_cf_pct / opponent_cf_pct)).round(2)
        print("Applied opponent's CF% as a modifier to the team's projected shots (x_shots) based on 50% baseline CF%.")

        return {
            'team_shots': team_shots_df,
            'game_id': game_id,
            'opponent_team': opponent_team_fullname,
            'opponent_stats': team_stats_df
        }

    except Exception as e:
        print(f"An error occurred during processing: {e}")

In [69]:
# Example usage of the process_team_and_opponent function
input_date = '2024-12-10'
team = 'COL'  # Example team code
matchup_data = process_team_shots_and_opponent(input_date, team)

# Accessing the data
team_shots = matchup_data['team_shots']
game_id = matchup_data['game_id']
opponent_team = matchup_data['opponent_team']
opponent_stats = matchup_data['opponent_stats']

Fetching data for reference date: 2024-12-09
Player and goalie statistics fetched successfully.
Game ID: 2024020440
Added player 'Valeri Nichushkin' to Forwards slot 1.
Added player 'Chris Wagner' to Forwards slot 2.
Added player 'Parker Kelly' to Forwards slot 3.
Added player 'Ross Colton' to Forwards slot 4.
Added player 'Logan O'Connor' to Forwards slot 5.
Added player 'Nathan MacKinnon' to Forwards slot 6.
Added player 'Casey Mittelstadt' to Forwards slot 7.
Added player 'Artturi Lehkonen' to Forwards slot 8.
Added player 'Ivan Ivan' to Forwards slot 9.
Added player 'Nikita Prishchepov' to Forwards slot 10.
Added player 'Joel Kiviranta' to Forwards slot 11.
Added player 'Mikko Rantanen' to Forwards slot 12.
Added player 'Devon Toews' to Defense slot 1.
Added player 'Cale Makar' to Defense slot 2.
Added player 'Calvin de Haan' to Defense slot 3.
Added player 'Samuel Girard' to Defense slot 4.
Added player 'Keaton Middleton' to Defense slot 5.
Added player 'Sam Malinski' to Defense s

In [70]:
# team_shots

In [71]:
def add_sog_to_team_shots(team_shots_df: pd.DataFrame) -> pd.DataFrame:
    """
    Adds shots on goal data to the team_shots DataFrame by parsing boxscores for each unique date and team.

    This function performs the following steps:
        1. Identifies unique combinations of 'date' and 'team' in the team_shots_df.
        2. For each unique combination, retrieves the most recent game_id using `get_most_recent_game_id`.
        3. Retrieves and processes the boxscore for each game_id using `display_boxscore` with `clean=False`.
        4. Extracts player-specific shots on goal from the processed boxscore.
        5. Merges the shots on goal data back into the original team_shots_df.

    Args:
        team_shots_df (pd.DataFrame): DataFrame containing at least 'date', 'team', and 'player' columns.

    Returns:
        pd.DataFrame: Updated DataFrame with an additional 'shots_on_goal' column.

    Raises:
        KeyError: If required columns are missing in the team_shots_df.
        Exception: For any unexpected errors during processing.
    """

    # Ensure required columns are present
    required_columns = {'date', 'team', 'player'}
    if not required_columns.issubset(team_shots_df.columns):
        missing = required_columns - set(team_shots_df.columns)
        raise KeyError(f"Missing required columns in team_shots_df: {', '.join(missing)}")

    # Find unique date and team combinations
    unique_combinations = team_shots_df[['date', 'team']].drop_duplicates()

    for _, row in unique_combinations.iterrows():
        game_date = row['date']
        team = row['team']

        try:
            # Retrieve the most recent game_id for the team on the given date
            game_id = get_most_recent_game_id(team, game_date)
            if game_id is None:
                print(f"No game found for team {team} on {game_date}.")
                shots_on_goal = None
            else:
                # Retrieve and process the boxscore using display_boxscore with clean=False
                processed_boxscore = display_boxscore(game_id)

                # Assuming processed_boxscore is a tuple:
                # (away_skaters_df, away_goalies_df, home_skaters_df, home_goalies_df)
                away_skaters = processed_boxscore[0]
                home_skaters = processed_boxscore[2]

                # Extract team abbreviations from skaters DataFrames
                # Assuming each skaters DataFrame has a 'team_abbrev' column
                away_team_abbrev = away_skaters['team_abbrev'].iloc[0] if not away_skaters.empty else None
                home_team_abbrev = home_skaters['team_abbrev'].iloc[0] if not home_skaters.empty else None

                if team.upper() == away_team_abbrev.upper():
                    # Add full names to away_skaters
                    away_skaters['full_name'] = away_skaters['playerId'].apply(lambda x: get_player_full_name(x, db_prefix))
                    
                    # Merge to get player-specific shots
                    merged_df = pd.merge(
                        team_shots_df[
                            (team_shots_df['date'] == game_date) &
                            (team_shots_df['team'] == team)
                        ],
                        away_skaters[['full_name', 'sog']],
                        left_on='player',
                        right_on='full_name',
                        how='left'
                    )
                    
                    # Assign the 'shots_on_goal' from 'sog', handle missing values
                    team_shots_df.loc[
                        (team_shots_df['date'] == game_date) &
                        (team_shots_df['team'] == team),
                        'sog'
                    ] = merged_df['sog']

                elif team.upper() == home_team_abbrev.upper():
                    # Add full names to home_skaters
                    home_skaters['full_name'] = home_skaters['playerId'].apply(lambda x: get_player_full_name(x, db_prefix))
                    
                    # Merge to get player-specific shots
                    merged_df = pd.merge(
                        team_shots_df[
                            (team_shots_df['date'] == game_date) &
                            (team_shots_df['team'] == team)
                        ],
                        home_skaters[['full_name', 'sog']],
                        left_on='player',
                        right_on='full_name',
                        how='left'
                    )
                    
                    # Assign the 'sog' from 'sog', handle missing values
                    team_shots_df.loc[
                        (team_shots_df['date'] == game_date) &
                        (team_shots_df['team'] == team),
                        'sog'
                    ] = merged_df['sog']

                else:
                    print(f"Team {team} does not match away ({away_team_abbrev}) or home ({home_team_abbrev}) team abbreviations.")
                    # sog = None
                    # Optionally, you can choose to assign None or another value
                    team_shots_df.loc[
                        (team_shots_df['date'] == game_date) &
                        (team_shots_df['team'] == team),
                        'sog'
                    ] = None

        except Exception as e:
            print(f"Error processing team {team} on date {game_date}: {e}")
            # Assign None to 'sog' in case of an error
            team_shots_df.loc[
                (team_shots_df['date'] == game_date) &
                (team_shots_df['team'] == team),
                'sog'
            ] = None

    return team_shots_df

In [72]:
team_shots = add_sog_to_team_shots(team_shots)
# team_shots

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Valeri Nichushkin' for player_id 8477501.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Chris Wagner' for player_id 8475780.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Parker Kelly' for player_id 8480448.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Ross Colton' for player_id 8479525.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Logan O'Connor' for player_id 8481186.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.

In [73]:
def process_team_shots(team_shots_df, sportsbook, line=True):
    """
    Process all players in team_shots DataFrame to retrieve their shot over/under odds and prepare data for merging.
    
    Each player will appear only once in the returned DataFrame, with separate columns for over_odds, under_odds,
    and handicap, provided that the handicap for both bets is equal.
    
    Args:
        team_shots_df (pd.DataFrame): DataFrame containing player shot data with columns:
            date, player, team, position, gp, toi, toi/gp, shots/60, etc.
        sportsbook (str): The name of the sportsbook to filter odds by.
        line (bool, optional): If True, filters odds to find those closest to +100. Defaults to True.
    
    Returns:
        pd.DataFrame: A DataFrame with player names, their respective over_odds, under_odds, and handicap,
                      ready to be merged with team_shots_df.
    
    Raises:
        KeyError: If required columns are missing in team_shots_df.
        ValueError: If multiple dates are present in team_shots_df.
    """
    # Ensure that 'date' and 'team' columns exist
    if 'date' not in team_shots_df.columns or 'team' not in team_shots_df.columns:
        raise KeyError("The DataFrame must contain 'date' and 'team' columns.")
    
    # Get the unique date(s) from the DataFrame
    unique_dates = team_shots_df['date'].unique()
    if len(unique_dates) != 1:
        raise ValueError("All players in the DataFrame must have the same 'date'.")
    
    query_date = unique_dates[0]
    
    # Get unique players and their teams from the DataFrame
    players = team_shots_df[['player', 'team']].drop_duplicates()
    
    # Initialize a list to collect odds data
    team_odds = []
    
    for _, row in players.iterrows():
        player = row['player']
        team_abbrev = row['team']
        team_fullname = get_fullname_by_tricode(team_abbrev)
        
        if not team_fullname:
            logging.warning(f"Could not find full name for team abbreviation '{team_abbrev}'. Skipping player '{player}'.")
            continue
        
        logging.info(f"Processing player: {player} from team: {team_fullname}")
        
        odds_list = get_player_shots_ou_odds(
            player_name=player,
            team_name=team_fullname,  # Pass the full team name
            query_date=query_date, 
            sportsbook=sportsbook,
            line=line
        )
        
        if odds_list:
            # Initialize dictionaries to hold Over and Under odds
            player_odds = {'player': player}
            handicap_set = set()
            handicap_value = None  # Initialize handicap_value
            
            for odds in odds_list:
                # Remove 'game_id' from odds if it exists
                odds_cleaned = {k: v for k, v in odds.items() if k != 'game_id'}
                
                # Collect handicap values to ensure they are equal
                handicap = odds_cleaned.get('handicap')
                handicap_set.add(handicap)
                
                ou_type = odds_cleaned.get('ou')
                odds_value = odds_cleaned.get('odds')
                
                if ou_type == 'Over':
                    player_odds['over_odds'] = odds_value
                elif ou_type == 'Under':
                    player_odds['under_odds'] = odds_value
            
            # Check if handicaps are equal
            if len(handicap_set) > 1:
                logging.warning(f"Handicaps for player '{player}' are not equal: {handicap_set}. Skipping player.")
                continue  # Skip this player due to unequal handicaps
            elif len(handicap_set) == 1:
                handicap_value = handicap_set.pop()
                player_odds['handicap'] = handicap_value  # Add handicap to player_odds
            else:
                logging.warning(f"No handicap information found for player '{player}'. Skipping player.")
                continue  # Skip if no handicap information
            
            team_odds.append(player_odds)
            logging.info(f"Retrieved and cleaned odds for {player}.")
        else:
            logging.warning(f"No odds found for {player}.")
    
    # Define the desired column order
    column_order = ['player','handicap', 'over_odds', 'under_odds']
    
    # Convert the list of odds dictionaries to a DataFrame with specified column order
    if team_odds:
        odds_df = pd.DataFrame(team_odds, columns=column_order)
    else:
        odds_df = pd.DataFrame(columns=column_order)
    
    return odds_df

In [74]:
# Retrieve odds DataFrame
odds_df = process_team_shots(team_shots, sportsbook='draftkings', line=True)

# Display the retrieved odds
# print(odds_df)

INFO:root:Processing player: Valeri Nichushkin from team: Colorado Avalanche
INFO:root:Retrieving player shots OU odds for player: Valeri Nichushkin, date: 2024-12-10, sportsbook: draftkings, team: Colorado Avalanche, line: True
INFO:root:Retrieving NHL games from DB for date: 2024-12-10
INFO:root:Retrieved 11 games from the database for date 2024-12-10.
INFO:root:Retrieved and cleaned odds for Valeri Nichushkin.
INFO:root:Processing player: Chris Wagner from team: Colorado Avalanche
INFO:root:Retrieving player shots OU odds for player: Chris Wagner, date: 2024-12-10, sportsbook: draftkings, team: Colorado Avalanche, line: True
INFO:root:Retrieving NHL games from DB for date: 2024-12-10
INFO:root:Retrieved 11 games from the database for date 2024-12-10.
INFO:root:Processing player: Parker Kelly from team: Colorado Avalanche
INFO:root:Retrieving player shots OU odds for player: Parker Kelly, date: 2024-12-10, sportsbook: draftkings, team: Colorado Avalanche, line: True
INFO:root:Retriev

In [75]:
# Merge the odds into team_shots_df. Since each player has only one row in odds_df, a simple left join suffices
merged_df = pd.merge(team_shots, odds_df, on='player', how='left')

# Display the merged DataFrame
merged_df

Unnamed: 0,date,player,team,position,gp,toi,toi/gp,shots/60,icf/60,iff/60,min%,adj_min,x_shots,x_shots_mod,sog,handicap,over_odds,under_odds
0,2024-12-10,Valeri Nichushkin,COL,R,12,163.48,13.62,6.24,15.78,9.18,4.54,16.85,1.75,1.76,1.0,2.5,-125.0,-105.0
1,2024-12-10,Chris Wagner,COL,R,18,111.53,6.2,9.15,18.83,15.06,2.07,7.68,1.17,1.18,0.0,,,
2,2024-12-10,Parker Kelly,COL,C,29,305.45,10.53,5.5,12.18,8.64,3.51,13.02,1.19,1.2,1.0,,,
3,2024-12-10,Ross Colton,COL,C,12,158.78,13.23,10.2,15.87,12.85,4.41,16.36,2.78,2.79,2.0,2.5,130.0,-182.0
4,2024-12-10,Logan O'Connor,COL,R,29,362.57,12.5,5.46,9.93,7.94,4.17,15.47,1.41,1.42,1.0,,,
5,2024-12-10,Nathan MacKinnon,COL,C,29,508.43,17.53,10.03,17.47,13.1,5.84,21.67,3.62,3.64,6.0,4.5,110.0,-142.0
6,2024-12-10,Casey Mittelstadt,COL,C,29,413.43,14.26,4.35,7.84,6.39,4.75,17.63,1.28,1.29,0.0,,,
7,2024-12-10,Artturi Lehkonen,COL,L,17,293.48,17.26,4.7,11.65,9.0,5.75,21.34,1.67,1.68,2.0,2.5,100.0,-130.0
8,2024-12-10,Ivan Ivan,COL,C,29,273.48,9.43,5.27,10.09,8.12,3.14,11.65,1.02,1.02,0.0,,,
9,2024-12-10,Nikita Prishchepov,COL,C,10,63.67,6.37,3.77,9.42,6.6,2.12,7.87,0.49,0.49,0.0,,,


In [76]:
def process_date_shots(date: str, sportsbook: str = 'draftkings') -> pd.DataFrame:
    """
    Processes team shots for all games on a given date by retrieving game IDs,
    extracting team information, processing shot data and opponent statistics,
    merging odds into the shot data, and compiling the results into a single DataFrame.
    
    This function performs the following steps:
        1. Retrieves the game IDs for the specified date using `get_matchup_games`.
        2. Extracts the teams involved in each game using `get_game_boxscore` with `clean=True`.
        3. For each team in each game, processes their shots and opponent's on-ice statistics using `process_team_shots_and_opponent`.
        4. Processes the team shots to retrieve odds using `process_team_shots`.
        5. Merges the odds into the team shots DataFrame.
        6. Appends all processed and merged results into a single DataFrame.
    
    Args:
        date (str): The reference date in 'YYYY-MM-DD' format.
        sportsbook (str): The name of the sportsbook to filter odds by. Defaults to 'draftkings'.
    
    Returns:
        pd.DataFrame: A concatenated DataFrame containing processed shot data for all teams
                      across all games on the specified date. The DataFrame excludes 'game_id'
                      and 'opponent_team' columns.
    
    Raises:
        ValueError: If no games are found on the specified date or if no team shot data is processed.
    """
    logging.info(f"Starting processing for date: {date}")
    
    # Step 1: Retrieve matchup games for the given date
    matchup = get_matchup_games(start_date=date, end_date=date)
    game_ids = matchup.get('game_ids', {}).get('id', [])
    
    if not game_ids:
        error_msg = f"No games found on date {date}."
        logging.error(error_msg)
        raise ValueError(error_msg)
    
    results = []
    
    # Step 2: Iterate through each game to extract team information
    for game_id in game_ids:
        logging.info(f"Processing game ID: {game_id}")
        boxscore = get_game_boxscore(game_id, clean=True)
        away_team = boxscore.get('away_team')
        home_team = boxscore.get('home_team')
        
        if not away_team or not home_team:
            logging.warning(f"Missing team information for game ID {game_id}. Skipping this game.")
            continue  # Skip this game if team information is incomplete
        
        # List of teams in the current game
        teams = [away_team, home_team]
        
        for team in teams:
            try:
                logging.info(f"Processing team: {team} in game ID: {game_id}")
                
                # Step 3: Process team shots and opponent statistics
                processed_data = process_team_shots_and_opponent(date, team)
                
                # Extract the team_shots DataFrame from the processed data
                team_shots_df = processed_data.get('team_shots')
                
                if team_shots_df is None or team_shots_df.empty:
                    logging.warning(f"No shot data found for team {team} on {date}. Skipping this team.")
                    continue  # Skip processing if no shot data is available

                # Step 4: Add sog (actual shots on goal) to the team_shots DataFrame
                team_shots_df = add_sog_to_team_shots(team_shots_df)

                # Step 5: Process the team shots DataFrame to retrieve odds
                processed_shots_df = process_team_shots(team_shots_df, sportsbook, line=True)
                
                if processed_shots_df.empty:
                    logging.warning(f"Processed shot data is empty for team {team} on {date}. Skipping this team.")
                    continue  # Skip appending if processing yields no data
                
                # Step 6: Merge the odds into the team_shots DataFrame
                merged_df = pd.merge(team_shots_df, processed_shots_df, on='player', how='left')
                
                # Optional: Add team information if not already present
                if 'team' not in merged_df.columns:
                    merged_df['team'] = team
                
                # Step 7: Append the merged DataFrame to the results list
                results.append(merged_df)
                logging.info(f"Successfully processed and merged data for team {team} in game ID {game_id}.")
            
            except Exception as e:
                # Handle exceptions for individual teams to prevent entire function from failing
                logging.error(f"Error processing team '{team}' in game '{game_id}': {e}")
                continue  # Proceed to the next team
    
    if results:
        # Step 7: Concatenate all individual DataFrames into a single DataFrame
        final_df = pd.concat(results, ignore_index=True)
        logging.info("Successfully concatenated all processed and merged team shot data.")
        
        # Step 8: Ensure 'game_id' and 'opponent_team' columns are excluded
        columns_to_exclude = ['game_id', 'opponent_team']
        existing_columns_to_exclude = [col for col in columns_to_exclude if col in final_df.columns]
        
        if existing_columns_to_exclude:
            final_df = final_df.drop(columns=existing_columns_to_exclude)
            logging.info(f"Excluded columns: {existing_columns_to_exclude} from the final DataFrame.")
        
        logging.info("Processing completed successfully.")
        return final_df
    else:
        error_msg = f"No team shot data processed for date {date}."
        logging.error(error_msg)
        raise ValueError(error_msg)

In [40]:
# Specify the date and sportsbook you want to process
date_to_process = '2024-12-09'
selected_sportsbook = 'draftkings'  # Replace with actual sportsbook as needed

try:
    combined_shots_df = process_date_shots(date=date_to_process, sportsbook=selected_sportsbook)
    combined_shots_df.head()
except ValueError as ve:
    print(f"Processing failed: {ve}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

INFO:root:Starting processing for date: 2024-12-09
INFO:root:Processing game ID: 2024020443
INFO:root:Processing team: DET in game ID: 2024020443


Fetching data for reference date: 2024-12-08
Player and goalie statistics fetched successfully.
Game ID: 2024020432
Added player 'Vladimir Tarasenko' to Forwards slot 1.
Added player 'Tyler Motte' to Forwards slot 2.
Added player 'Andrew Copp' to Forwards slot 3.
Added player 'Lucas Raymond' to Forwards slot 4.
Added player 'Michael Rasmussen' to Forwards slot 5.
Added player 'J.T. Compher' to Forwards slot 6.
Added player 'Jonatan Berggren' to Forwards slot 7.
Added player 'Dylan Larkin' to Forwards slot 8.
Added player 'Patrick Kane' to Forwards slot 9.
Added player 'Joe Veleno' to Forwards slot 10.
Added player 'Marco Kasper' to Forwards slot 11.
Added player 'Alex DeBrincat' to Forwards slot 12.
Added player 'Justin Holl' to Defense slot 1.
Added player 'Ben Chiarot' to Defense slot 2.
Added player 'Jeff Petry' to Defense slot 3.
Added player 'Moritz Seider' to Defense slot 4.
Added player 'Erik Gustafsson' to Defense slot 5.
Added player 'Simon Edvinsson' to Defense slot 6.
Added 

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Vladimir Tarasenko' for player_id 8475765.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Tyler Motte' for player_id 8477353.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Andrew Copp' for player_id 8477429.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Lucas Raymond' for player_id 8482078.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Michael Rasmussen' for player_id 8479992.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_ut

Fetching data for reference date: 2024-12-08
Player and goalie statistics fetched successfully.
Game ID: 2024020426
Added player 'Zach Benson' to Forwards slot 1.
Added player 'Jason Zucker' to Forwards slot 2.
Added player 'Peyton Krebs' to Forwards slot 3.
Added player 'Jiri Kulich' to Forwards slot 4.
Added player 'Dylan Cozens' to Forwards slot 5.
Added player 'Beck Malenstyn' to Forwards slot 6.
Added player 'Tyson Kozak' to Forwards slot 7.
Added player 'Ryan McLeod' to Forwards slot 8.
Added player 'Tage Thompson' to Forwards slot 9.
Added player 'JJ Peterka' to Forwards slot 10.
Added player 'Alex Tuch' to Forwards slot 11.
Added player 'Nicolas Aube-Kubel' to Forwards slot 12.
Added player 'Bowen Byram' to Defense slot 1.
Added player 'Dennis Gilbert' to Defense slot 2.
Added player 'Henri Jokiharju' to Defense slot 3.
Added player 'Owen Power' to Defense slot 4.
Added player 'Connor Clifton' to Defense slot 5.
Added player 'Jacob Bryson' to Defense slot 6.
Added player 'Ukko-

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Zach Benson' for player_id 8484145.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Jason Zucker' for player_id 8475722.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Peyton Krebs' for player_id 8481522.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Jiri Kulich' for player_id 8483468.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Dylan Cozens' for player_id 8481528.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_uti

Fetching data for reference date: 2024-12-08
Player and goalie statistics fetched successfully.
Game ID: 2024020427
Added player 'Ryan Donato' to Forwards slot 1.
Added player 'Craig Smith' to Forwards slot 2.
Added player 'Jason Dickinson' to Forwards slot 3.
Added player 'Nick Foligno' to Forwards slot 4.
Added player 'Philipp Kurashev' to Forwards slot 5.
Added player 'Tyler Bertuzzi' to Forwards slot 6.
Added player 'Taylor Hall' to Forwards slot 7.
Added player 'Lukas Reichel' to Forwards slot 8.
Added player 'Patrick Maroon' to Forwards slot 9.
Added player 'Teuvo Teravainen' to Forwards slot 10.
Added player 'Ilya Mikheyev' to Forwards slot 11.
Added player 'Connor Bedard' to Forwards slot 12.
Added player 'Connor Murphy' to Defense slot 1.
Added player 'Alec Martinez' to Defense slot 2.
Added player 'Nolan Allan' to Defense slot 3.
Added player 'Wyatt Kaiser' to Defense slot 4.
Added player 'Alex Vlasic' to Defense slot 5.
Added player 'TJ Brodie' to Defense slot 6.
Added playe

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Ryan Donato' for player_id 8477987.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Craig Smith' for player_id 8475225.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Jason Dickinson' for player_id 8477450.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Nick Foligno' for player_id 8473422.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Philipp Kurashev' for player_id 8480798.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.b

Fetching data for reference date: 2024-12-08
Player and goalie statistics fetched successfully.
Game ID: 2024020436
Added player 'Artemi Panarin' to Forwards slot 1.
Added player 'Alexis Lafrenière' to Forwards slot 2.
Added player 'Vincent Trocheck' to Forwards slot 3.
Added player 'Chris Kreider' to Forwards slot 4.
Added player 'Kaapo Kakko' to Forwards slot 5.
Added player 'Jimmy Vesey' to Forwards slot 6.
Added player 'Sam Carrick' to Forwards slot 7.
Added player 'Will Cuylle' to Forwards slot 8.
Added player 'Filip Chytil' to Forwards slot 9.
Added player 'Adam Edstrom' to Forwards slot 10.
Added player 'Reilly Smith' to Forwards slot 11.
Added player 'Mika Zibanejad' to Forwards slot 12.
Added player 'Braden Schneider' to Defense slot 1.
Added player 'Zac Jones' to Defense slot 2.
Added player 'Adam Fox' to Defense slot 3.
Added player 'Ryan Lindgren' to Defense slot 4.
Added player 'K'Andre Miller' to Defense slot 5.
Added player 'Victor Mancini' to Defense slot 6.
Added playe

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Artemi Panarin' for player_id 8478550.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Alexis Lafrenière' for player_id 8482109.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Vincent Trocheck' for player_id 8476389.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Chris Kreider' for player_id 8475184.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Kaapo Kakko' for player_id 8481554.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_u

Fetching data for reference date: 2024-12-08
Player and goalie statistics fetched successfully.
Game ID: 2024020424
Added player 'Ryan Strome' to Forwards slot 1.
Added player 'Alex Killorn' to Forwards slot 2.
Added player 'Troy Terry' to Forwards slot 3.
Added player 'Brett Leason' to Forwards slot 4.
Added player 'Isac Lundestrom' to Forwards slot 5.
Added player 'Mason McTavish' to Forwards slot 6.
Added player 'Brock McGinn' to Forwards slot 7.
Added player 'Jansen Harkins' to Forwards slot 8.
Added player 'Ross Johnston' to Forwards slot 9.
Added player 'Cutter Gauthier' to Forwards slot 10.
Added player 'Sam Colangelo' to Forwards slot 11.
Added player 'Frank Vatrano' to Forwards slot 12.
Added player 'Jackson LaCombe' to Defense slot 1.
Added player 'Cam Fowler' to Defense slot 2.
Added player 'Brian Dumoulin' to Defense slot 3.
Added player 'Radko Gudas' to Defense slot 4.
Added player 'Drew Helleson' to Defense slot 5.
Added player 'Olen Zellweger' to Defense slot 6.
Added pl

ERROR:root:Error processing team 'ANA' in game '2024020445': 'NoneType' object has no attribute 'get'
INFO:root:Processing team: MTL in game ID: 2024020445


An error occurred during processing: No on-ice statistics found for opponent team 'Montréal Canadiens'.
Fetching data for reference date: 2024-12-08
Player and goalie statistics fetched successfully.
Game ID: 2024020430
Added player 'Brendan Gallagher' to Forwards slot 1.
Added player 'Cole Caufield' to Forwards slot 2.
Added player 'Nick Suzuki' to Forwards slot 3.
Added player 'Alex Newhook' to Forwards slot 4.
Added player 'Josh Anderson' to Forwards slot 5.
Added player 'Juraj Slafkovsky' to Forwards slot 6.
Added player 'Christian Dvorak' to Forwards slot 7.
Added player 'Joel Armia' to Forwards slot 8.
Added player 'Emil Heineman' to Forwards slot 9.
Added player 'Jake Evans' to Forwards slot 10.
Added player 'Kirby Dach' to Forwards slot 11.
Added player 'Patrik Laine' to Forwards slot 12.
Added player 'Mike Matheson' to Defense slot 1.
Added player 'Kaiden Guhle' to Defense slot 2.
Added player 'Jayden Struble' to Defense slot 3.
Added player 'Lane Hutson' to Defense slot 4.
Ad

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Brendan Gallagher' for player_id 8475848.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Cole Caufield' for player_id 8481540.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Nick Suzuki' for player_id 8480018.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Alex Newhook' for player_id 8481618.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Josh Anderson' for player_id 8476981.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.

In [81]:
# Specify the date and sportsbook you want to process
date_to_process = '2024-12-11'
selected_sportsbook = 'draftkings'  # Replace with actual sportsbook as needed

try:
    combined_shots_df = process_date_shots(date=date_to_process, sportsbook=selected_sportsbook)
    combined_shots_df.head()
except ValueError as ve:
    print(f"Processing failed: {ve}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

INFO:root:Starting processing for date: 2024-12-11
INFO:root:Processing game ID: 2024020457
INFO:root:Processing team: NYR in game ID: 2024020457


Fetching data for reference date: 2024-12-10
Player and goalie statistics fetched successfully.
Game ID: 2024020444
Added player 'Artemi Panarin' to Forwards slot 1.
Added player 'Alexis Lafrenière' to Forwards slot 2.
Added player 'Vincent Trocheck' to Forwards slot 3.
Added player 'Chris Kreider' to Forwards slot 4.
Added player 'Kaapo Kakko' to Forwards slot 5.
Added player 'Sam Carrick' to Forwards slot 6.
Added player 'Will Cuylle' to Forwards slot 7.
Added player 'Brett Berard' to Forwards slot 8.
Added player 'Filip Chytil' to Forwards slot 9.
Added player 'Adam Edstrom' to Forwards slot 10.
Added player 'Reilly Smith' to Forwards slot 11.
Added player 'Mika Zibanejad' to Forwards slot 12.
Added player 'Braden Schneider' to Defense slot 1.
Added player 'Zac Jones' to Defense slot 2.
Added player 'Adam Fox' to Defense slot 3.
Added player 'Ryan Lindgren' to Defense slot 4.
Added player 'K'Andre Miller' to Defense slot 5.
Added player 'Victor Mancini' to Defense slot 6.
Added play

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Artemi Panarin' for player_id 8478550.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Alexis Lafrenière' for player_id 8482109.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Vincent Trocheck' for player_id 8476389.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Chris Kreider' for player_id 8475184.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Kaapo Kakko' for player_id 8481554.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_u

Fetching data for reference date: 2024-12-10
Player and goalie statistics fetched successfully.
Game ID: 2024020443
Added player 'Zach Benson' to Forwards slot 1.
Added player 'Jordan Greenway' to Forwards slot 2.
Added player 'Jason Zucker' to Forwards slot 3.
Added player 'Peyton Krebs' to Forwards slot 4.
Added player 'Dylan Cozens' to Forwards slot 5.
Added player 'Beck Malenstyn' to Forwards slot 6.
Added player 'Tyson Kozak' to Forwards slot 7.
Added player 'Ryan McLeod' to Forwards slot 8.
Added player 'Tage Thompson' to Forwards slot 9.
Added player 'JJ Peterka' to Forwards slot 10.
Added player 'Alex Tuch' to Forwards slot 11.
Added player 'Nicolas Aube-Kubel' to Forwards slot 12.
Added player 'Bowen Byram' to Defense slot 1.
Added player 'Henri Jokiharju' to Defense slot 2.
Added player 'Owen Power' to Defense slot 3.
Added player 'Ryan Johnson' to Defense slot 4.
Added player 'Connor Clifton' to Defense slot 5.
Added player 'Jacob Bryson' to Defense slot 6.
Added player 'Ukk

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Zach Benson' for player_id 8484145.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Jordan Greenway' for player_id 8478413.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Jason Zucker' for player_id 8475722.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Peyton Krebs' for player_id 8481522.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Dylan Cozens' for player_id 8481528.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base

Fetching data for reference date: 2024-12-10
Player and goalie statistics fetched successfully.
Game ID: 2024020445
Added player 'Ryan Strome' to Forwards slot 1.
Added player 'Alex Killorn' to Forwards slot 2.
Added player 'Troy Terry' to Forwards slot 3.
Added player 'Brett Leason' to Forwards slot 4.
Added player 'Isac Lundestrom' to Forwards slot 5.
Added player 'Mason McTavish' to Forwards slot 6.
Added player 'Brock McGinn' to Forwards slot 7.
Added player 'Jansen Harkins' to Forwards slot 8.
Added player 'Ross Johnston' to Forwards slot 9.
Added player 'Cutter Gauthier' to Forwards slot 10.
Added player 'Sam Colangelo' to Forwards slot 11.
Added player 'Frank Vatrano' to Forwards slot 12.
Added player 'Jackson LaCombe' to Defense slot 1.
Added player 'Cam Fowler' to Defense slot 2.
Added player 'Brian Dumoulin' to Defense slot 3.
Added player 'Radko Gudas' to Defense slot 4.
Added player 'Olen Zellweger' to Defense slot 5.
Added player 'Jacob Trouba' to Defense slot 6.
Added pla

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Ryan Strome' for player_id 8476458.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Alex Killorn' for player_id 8473986.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Troy Terry' for player_id 8478873.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Brett Leason' for player_id 8481517.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Isac Lundestrom' for player_id 8480806.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_u

Fetching data for reference date: 2024-12-10
Player and goalie statistics fetched successfully.
Game ID: 2024020438
Added player 'Brady Tkachuk' to Forwards slot 1.
Added player 'Josh Norris' to Forwards slot 2.
Added player 'Shane Pinto' to Forwards slot 3.
Added player 'Tim Stützle' to Forwards slot 4.
Added player 'Drake Batherson' to Forwards slot 5.
Added player 'Nick Cousins' to Forwards slot 6.
Added player 'Michael Amadio' to Forwards slot 7.
Added player 'Claude Giroux' to Forwards slot 8.
Added player 'Zack Ostapchuk' to Forwards slot 9.
Added player 'Ridly Greig' to Forwards slot 10.
Added player 'Noah Gregor' to Forwards slot 11.
Added player 'Adam Gaudette' to Forwards slot 12.
Added player 'Nick Jensen' to Defense slot 1.
Added player 'Travis Hamonic' to Defense slot 2.
Added player 'Jacob Bernard-Docker' to Defense slot 3.
Added player 'Tyler Kleven' to Defense slot 4.
Added player 'Thomas Chabot' to Defense slot 5.
Added player 'Jake Sanderson' to Defense slot 6.
Added 

INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Brady Tkachuk' for player_id 8480801.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Josh Norris' for player_id 8480064.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Shane Pinto' for player_id 8481596.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Tim Stützle' for player_id 8482116.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_utils:Database connection established.
INFO:db_utils.nhl_db_utils:Retrieved full name 'Drake Batherson' for player_id 8480208.
INFO:db_utils.base_utils:Database connection closed.
INFO:db_utils.base_

In [82]:
combined_shots_df

Unnamed: 0,date,player,team,position,gp,toi,toi/gp,shots/60,icf/60,iff/60,min%,adj_min,x_shots,x_shots_mod,sog,handicap,over_odds,under_odds
0,2024-12-11,Artemi Panarin,NYR,L,27,416.42,15.42,8.36,17.43,12.97,5.14,19.09,2.66,2.58,7.0,3.5,110.0,-142.0
1,2024-12-11,Alexis Lafrenière,NYR,L,27,413.53,15.32,7.11,13.49,10.88,5.11,18.98,2.25,2.18,2.0,2.5,115.0,-148.0
2,2024-12-11,Vincent Trocheck,NYR,C,27,412.22,15.27,6.84,11.21,10.19,5.09,18.90,2.15,2.08,0.0,2.5,-110.0,-120.0
3,2024-12-11,Chris Kreider,NYR,L,24,270.23,11.26,7.99,14.21,11.77,3.75,13.93,1.86,1.80,3.0,2.5,-110.0,-120.0
4,2024-12-11,Kaapo Kakko,NYR,R,27,325.10,12.04,5.91,11.07,8.31,4.01,14.89,1.47,1.42,0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,2024-12-11,Travis Hamonic,OTT,D,26,386.95,14.88,4.96,12.25,7.91,4.96,18.75,1.55,1.68,1.0,,,
68,2024-12-11,Jacob Bernard-Docker,OTT,D,17,224.12,13.18,4.02,10.71,6.96,4.39,16.60,1.11,1.20,0.0,,,
69,2024-12-11,Tyler Kleven,OTT,D,27,380.63,14.10,3.78,9.62,5.04,4.70,17.77,1.12,1.22,4.0,,,
70,2024-12-11,Thomas Chabot,OTT,D,27,478.40,17.72,7.02,15.80,9.53,5.91,22.34,2.61,2.83,2.0,2.5,-110.0,-120.0
