In [None]:
import pandas as pd
from io import StringIO
from datetime import date, timedelta
from random import randint
from time import sleep
import os
import numpy as np
import pymysql
from sqlalchemy import create_engine
import joblib
import requests
from bs4 import BeautifulSoup
import pulp

## TODO:
- [ ] EXPANDED WEEK - FEB 20th - MARCH 2nd - CHANGE TIME DELTA TO MATCH
- [ ] Change `end_date = start_date + timedelta(days=x)` back to `end_date = start_date + timedelta(days=6)` 

In [None]:
# Constants
SEASON_START = '2024-10-04'
CURRENT_YEAR = 2025

# Optimization Rules
MAX_COST = 63.00
MIN_COST = MAX_COST * 0.99
NUM_FORWARDS = 6
NUM_DEFENSEMEN = 4 
NUM_GOALIES = 2
MAX_PLAYERS_PER_TEAM = 5

# TEAM NAMES
team_abbreviations = {
 'Anaheim Ducks':'ANA',
 'Boston Bruins':'BOS',
 'Buffalo Sabres':'BUF',
 'Calgary Flames':'CGY',
 'Carolina Hurricanes':'CAR',
 'Chicago Blackhawks':'CHI',
 'Colorado Avalanche':'COL',
 'Columbus Blue Jackets':'CBJ',
 'Dallas Stars':'DAL',
 'Detroit Red Wings':'DET',
 'Edmonton Oilers':'EDM',
 'Florida Panthers':'FLA',
 'Los Angeles Kings':'L.A',
 'Minnesota Wild':'MIN',
 'Montreal Canadiens':'MTL',
 'Nashville Predators':'NSH',
 'New Jersey Devils':'N.J',
 'New York Islanders':'NYI',
 'New York Rangers':'NYR',
 'Ottawa Senators':'OTT',
 'Philadelphia Flyers':'PHI',
 'Pittsburgh Penguins':'PIT',
 'San Jose Sharks':'S.J',
 'Seattle Kraken':'SEA',
 'St. Louis Blues':'STL',
 'Tampa Bay Lightning':'T.B',
 'Toronto Maple Leafs':'TOR',
 'Utah Hockey Club':'UTA',
 'Vancouver Canucks':'VAN',
 'Vegas Golden Knights':'VGK',
 'Washington Capitals':'WSH',
 'Winnipeg Jets':'WPG'
}

In [None]:
username = "root"
password = "Sp1d3rman"
host = "localhost"
port = "3306"
database = "nhl_optimizer"

engine = create_engine(f"mysql+pymysql://{username}:{password}@{host}:{port}/{database}")

# Functions

## Helpers

In [None]:
def clean_player_name(name):
    """
    Cleans player names to match the format in your other data.
    
    Parameters:
    - name: Raw player name from CBS Sports
    
    Returns:
    - Cleaned player name
    """
    # Split the name using spaces to identify potential last names
    parts = name.split()
    
    # Handle names with prefixes (van, de, etc.)
    surname = parts[-2] + " " + parts[-1] if len(parts) > 2 and len(parts[-2]) <= 3 else parts[-1]
    
    # Split by surname and get the second part
    full_name = name.split(surname, 1)[1].strip()
    
    return full_name if full_name else surname


def format_lineup(lineup_df):
    """
    Returns a cleaned and formatted DataFrame of the optimal lineup.
    """
    if lineup_df is None:
        return pd.DataFrame()
        
    # Sort by position (F, D, G) and then by projected points
    position_order = {'F': 2, 'D': 1, 'G': 0}
    display_columns = ['Team', 'Injured', 'Player', 'Position', 
                      'games_this_week', 'proj_fantasy_pts', 'pv']
    
    sorted_lineup = (lineup_df[display_columns]
                    .assign(pos_order=lineup_df['Position'].map(position_order))
                    .sort_values(['pos_order', 'proj_fantasy_pts'], 
                               ascending=[True, False])
                    .drop('pos_order', axis=1)
                    .round(2))
    
    return sorted_lineup

## Player Data

In [None]:
def get_player_data(start_date='2017-10-01'):
    """
    Fetches player data from the database with error handling.
    
    Parameters:
    - start_date: Date from which to fetch data (default: '2017-10-01')
    
    Returns:
    - DataFrame with player data or empty DataFrame if error occurs
    """
    try:
        query = f"""
        SELECT *
        FROM player_data
        WHERE Date >= '{start_date}'
        """
        df = pd.read_sql(query, engine)
        print(f"Successfully fetched {len(df)} player records")
        return df
    except Exception as e:
        print(f"Error fetching player data: {e}")
        return pd.DataFrame()
    
def estimate_team_goaltending_points(weekly_multipliers, games_this_week, win_points=2, ot_loss_points=1, shutout_bonus=2, avg_ot_loss_freq=0.1, avg_shutout_freq=0.05):
    """
    Estimate team goaltending points for the week.
    """
    goaltending_data = {}
    
    for team, multiplier in weekly_multipliers.items():
        games = games_this_week.get(team, 0)
        
        # Estimate wins based on multiplier (inverse relation)
        projected_wins = games / multiplier
        projected_ot_losses = games * avg_ot_loss_freq
        projected_shutouts = games * avg_shutout_freq
        
        total_points = (projected_wins * win_points + 
                       projected_ot_losses * ot_loss_points + 
                       projected_shutouts * shutout_bonus)
        
        goaltending_data[team] = (total_points, games)
    
    return goaltending_data

## Injuries

In [None]:
def get_current_injuries():
    """
    Scrapes and processes current NHL injuries from CBS Sports.
    
    Returns:
    - DataFrame with columns: Player, Team, Position, Updated, Injury, Injury Status
    """
    try:
        # Fetch the webpage content
        url = "https://www.cbssports.com/nhl/injuries/"
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes
        
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Lists to hold our parsed data
        teams = []
        injury_dataframes = []
        
        # Iterate over each TableBaseWrapper
        for wrapper in soup.find_all('div', class_='TableBaseWrapper'):
            # Find team name
            team_name_tag = wrapper.find('span', class_='TeamName')
            if team_name_tag:
                team_name = team_name_tag.get_text(strip=True)
                teams.append(team_name)
                
                # Find the injury table for the team
                table = wrapper.find('table', class_='TableBase-table')
                if table:
                    # Use pandas to read the table
                    df = pd.read_html(StringIO(str(table)))[0]
                    df['Team'] = team_name  # Add a column for the team name
                    injury_dataframes.append(df)
        
        # Concatenate all dataframes
        if not injury_dataframes:
            raise ValueError("No injury data found")
            
        injuries_df = pd.concat(injury_dataframes, ignore_index=True)
        
        # Clean player names
        injuries_df['Player'] = injuries_df['Player'].apply(clean_player_name)
        
        return injuries_df
        
    except requests.RequestException as e:
        print(f"Error fetching injury data: {e}")
        return pd.DataFrame()  # Return empty DataFrame on error
    except Exception as e:
        print(f"Error processing injury data: {e}")
        return pd.DataFrame()  # Return empty DataFrame on error



## Schedule & Multipliers

In [None]:
def fetch_game_data(year: int):
    """
    Fetches the dates of NHL games and teams involved for a given year.
    
    Parameters:
    - year: The year for which you want to fetch game data.

    Returns:
    - DataFrame with dates, visitor, and home teams.
    """
    
    url = f"https://www.hockey-reference.com/leagues/NHL_{year}_games.html"
    dfs = pd.read_html(url)
    df = dfs[0]
    
    # Convert the "Date" column to datetime.date format
    df["Date"] = pd.to_datetime(df["Date"]).dt.date

    # Extract the relevant columns
    game_data = df[["Date", "Visitor", "Home"]]

    return game_data

def filter_dates_for_week(dates, start_date):
    """
    Filters the provided dates for those that fall within the week starting at start_date.
    
    Parameters:
    - dates: List of dates.
    - start_date: The starting date of the week. 

    Returns:
    - A list of dates that fall within the desired week.
    """
    # Convert start_date to a date object if it's a string
    if isinstance(start_date, str):
        start_date = date.fromisoformat(start_date)
    
    # Calculate the end date of the week
    end_date = start_date + timedelta(days=10)

    # Filter the dates
    week_dates = [d for d in dates if start_date <= d <= end_date]
    
    return week_dates

def opponents_for_team_for_week(year, start_date):
    """
    Fetches the opponents each team faces within a specified week.
    
    Parameters:
    - year: The year for which you want to check.
    - start_date: The starting date of the week.

    Returns:
    - A dictionary with teams as keys and lists of opponents as values.
    """
    # Fetch game data for the year
    game_data = fetch_game_data(year)
    
    # Filter the game data for the desired week
    week_dates = filter_dates_for_week(game_data["Date"].tolist(), start_date)
    week_games = game_data[game_data["Date"].isin(week_dates)]
    
    # Prepare opponent information
    visitor_opponents = week_games.set_index("Visitor")["Home"].to_dict()
    home_opponents = week_games.set_index("Home")["Visitor"].to_dict()
    
    opponents = {}
    for team in set(week_games["Visitor"].tolist() + week_games["Home"].tolist()):
        opponents[team] = list(set([visitor_opponents.get(team, "")] + [home_opponents.get(team, "")]))

    return opponents


#-------#

def calculate_weekly_team_multipliers(year, start_date, points_df):
    """
    Calculate the weekly multipliers for teams based on their opponents for a given week.
    """
    # Get opponents with abbreviated names
    games_week = opponents_for_team_for_week(year, start_date)
    
    # Convert full names to abbreviations
    games_week = {team_abbreviations[team]: [team_abbreviations[opp] 
                                           for opp in opps 
                                           if opp in team_abbreviations]
                 for team, opps in games_week.items()
                 if team in team_abbreviations}
    
    weekly_multipliers = {}
    for team, opponents in games_week.items():
        multipliers = []
        for opponent in opponents:
            if opponent:  # Skip empty strings
                multiplier_row = points_df[points_df['Abbreviation'] == opponent]
                if not multiplier_row.empty:
                    multipliers.append(multiplier_row['multiplier'].iloc[0])
        
        weekly_multipliers[team] = sum(multipliers) / len(multipliers) if multipliers else 1
    
    return weekly_multipliers

# Now, you can create a new column in your players DataFrame to store the composite multiplier for the week:


def games_count_for_team_for_week(year, start_date):
    """
    Fetches the number of games each team plays within a specified week.
    
    Parameters:
    - year: The year for which you want to check.
    - start_date: The starting date of the week.

    Returns:
    - A dictionary with teams as keys and the number of games they play that week as values.
    """
    
    # Fetch game data for the year
    game_data = fetch_game_data(year)
    
    # Filter the game data for the desired week
    week_dates = filter_dates_for_week(game_data["Date"].tolist(), start_date)
    week_games = game_data[game_data["Date"].isin(week_dates)]
    
    # Count the number of games for each team
    visitor_counts = week_games["Visitor"].value_counts().to_dict()
    home_counts = week_games["Home"].value_counts().to_dict()
    
    total_counts = {}
    for team in set(list(visitor_counts.keys()) + list(home_counts.keys())):
        total_counts[team] = visitor_counts.get(team, 0) + home_counts.get(team, 0)

    return total_counts


def get_team_standings(year):
    """
    Fetches and processes NHL team standings from Hockey Reference.
    
    Parameters:
    - year: Current NHL season year (e.g., 2025 for 2024-25 season)
    
    Returns:
    - DataFrame with team standings including points percentage
    """
    try:
        # Fetch standings tables
        url = f"https://www.hockey-reference.com/leagues/NHL_{year}_standings.html"
        dfs = pd.read_html(url, index_col=0)
        
        # Process Eastern Conference
        east_df = (dfs[0]
                  .reset_index()
                  .rename(columns={'index': 'Team Name'})
                  .drop([0, 9], axis=0)
                  .reset_index(drop=True))
        
        # Process Western Conference
        west_df = (dfs[1]
                  .reset_index()
                  .rename(columns={'index': 'Team Name'})
                  .drop([0, 9], axis=0)
                  .reset_index(drop=True))
        
        # Clean team names and select columns
        for df in [east_df, west_df]:
            df['Team Name'] = df['Team Name'].str.replace('*', '', regex=False)
        
        # Keep only needed columns
        east_df = east_df[['Team Name', 'PTS%']]
        west_df = west_df[['Team Name', 'PTS%']]
        
        # Combine conferences
        points_df = pd.concat([east_df, west_df], ignore_index=True)
        
        # Convert points percentage to float
        points_df['PTS%'] = points_df['PTS%'].astype(float)
        
        # Add team abbreviations
        points_df['Abbreviation'] = points_df['Team Name'].map(team_abbreviations)
        
        # Calculate multiplier (assuming weaker teams = higher multiplier)
        points_df['multiplier'] = points_df['PTS%'].apply(lambda x: 0.5 / x if x != 0 else 1.8)
        
        return points_df
        
    except Exception as e:
        print(f"Error fetching team standings: {e}")
        return pd.DataFrame()


# Use existing functions but let's wrap them together
def get_weekly_schedule_info(start_date=date.today()):
    """Get all schedule related information for the week"""
    
    # Get games per team
    games_count_full = games_count_for_team_for_week(CURRENT_YEAR, start_date)
    
    # Convert full team names to abbreviations in games_count
    games_count = {team_abbreviations[team]: count 
                  for team, count in games_count_full.items()
                  if team in team_abbreviations}
    
    # Get team standings
    points_df = get_team_standings(CURRENT_YEAR)
    
    # Calculate multipliers using abbreviations
    opponents_full = opponents_for_team_for_week(CURRENT_YEAR, start_date)
    
    # Convert opponents dict to use abbreviations and remove empty strings
    opponents = {}
    for team, opp_list in opponents_full.items():
        if team in team_abbreviations:
            team_abbrev = team_abbreviations[team]
            opponents[team_abbrev] = [team_abbreviations[opp] 
                                    for opp in opp_list 
                                    if opp in team_abbreviations]
    
    # Calculate multipliers using abbreviated team names
    multipliers = calculate_weekly_team_multipliers(CURRENT_YEAR, start_date, points_df)
    
    print("\nDebug (after conversion):")
    print("\nGames count with abbreviations:")
    print(games_count)
    print("\nOpponents with abbreviations:")
    print(opponents)
    
    return games_count, multipliers

## Pipeline

In [None]:
def create_player_features(df, season_start=SEASON_START):
    """
    Process player stats and create prediction features with validation.
    """
    # Input validation
    if df.empty:
        print("No player data provided")
        return pd.DataFrame()
    
    required_columns = ['Date', 'Player', 'Team', 'Goals/60', 'Total Assists/60', 
                       'Shots/60', 'ixG/60', 'TOI/GP', 'IPP', 'iHDCF/60']
    
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        print(f"Missing required columns: {missing_columns}")
        return pd.DataFrame()
    
    try:
        # Handle different date formats
        def convert_season_format(date_str):
            try:
                # Try parsing as regular date first
                return pd.to_datetime(date_str)
            except:
                # If that fails, assume it's season format (e.g., "20172018")
                if len(str(date_str)) == 8:  # Season format
                    year = int(str(date_str)[:4])
                    return pd.to_datetime(f"{year}-01-01")
                return pd.NaT

        # Convert dates
        df['Date'] = df['Date'].apply(convert_season_format)
        
        # Remove any rows where date conversion failed
        invalid_dates = df['Date'].isna()
        if invalid_dates.any():
            print(f"Warning: Removed {invalid_dates.sum()} rows with invalid dates")
            df = df[~invalid_dates]

        df = df.sort_values(['Player', 'Date'])
        
        print(f"Processing data for {df['Player'].nunique()} players")
        
        # Create recent performance metrics (only for current season)
        current_season = df[df['Date'] >= season_start].copy()
        print(f"Found {len(current_season)} records for current season")
        
        # Group by player and calculate rolling averages
        grouped = current_season.groupby('Player')
        
        # List of stats to create rolling averages for
        stats = ['Goals/60', 'Total Assists/60', 'Shots/60', 'ixG/60', 
                'TOI/GP', 'IPP', 'iHDCF/60']
        
        # Calculate rolling averages with progress updates
        for stat in stats:
            print(f"Calculating rolling averages for {stat}")
            current_season[f'{stat}_rolling_5'] = grouped[stat].transform(
                lambda x: x.rolling(5, min_periods=1).mean()
            )
            current_season[f'{stat}_rolling_10'] = grouped[stat].transform(
                lambda x: x.rolling(10, min_periods=1).mean()
            )
        
        # Get most recent values for each player
        recent_stats = current_season.groupby('Player').last().reset_index()
        print(f"Generated features for {len(recent_stats)} players")
        
        return recent_stats
        
    except Exception as e:
        print(f"Error processing player features: {e}")
        return pd.DataFrame()

def calculate_projections(player_stats, games_count, multipliers):
    """
    Calculate final fantasy projections
    """
    df = player_stats.copy()
    current_date = date.today()
    season_start_date = pd.to_datetime(SEASON_START).date()
    
#     print("\nCalculation Debug:")
#     print(f"Initial dataframe shape: {df.shape}")
    
    # Add schedule info
    df['games_this_week'] = df['Team'].map(games_count).fillna(0)
    df['schedule_multiplier'] = df['Team'].map(multipliers).fillna(999.0)
    
#     print("After adding schedule info:")
#     print(f"Null values in games_this_week: {df['games_this_week'].isnull().sum()}")
#     print(f"Null values in schedule_multiplier: {df['schedule_multiplier'].isnull().sum()}")
    
    # Calculate base projections per game
    df['proj_goals_per_game'] = (
        0.4 * df['Goals/60'] +
        0.3 * df['Goals/60_rolling_5'] +
        0.3 * df['Goals/60_rolling_10']
    ) * (df['TOI/GP'] / 60)
    
    df['proj_assists_per_game'] = (
        0.4 * df['Total Assists/60'] +
        0.3 * df['Total Assists/60_rolling_5'] +
        0.3 * df['Total Assists/60_rolling_10']
    ) * (df['TOI/GP'] / 60)
    
    # Fill NaN values with 0
    df['proj_goals_per_game'] = df['proj_goals_per_game'].fillna(0)
    df['proj_assists_per_game'] = df['proj_assists_per_game'].fillna(0)
    
    # Calculate final fantasy points
    df['proj_fantasy_pts'] = (
        (df['proj_goals_per_game'] * 2 +
         df['proj_assists_per_game'] * 1) *
        df['games_this_week'] *
        df['schedule_multiplier']
    )
    
    print("\nProjection Stats:")
    print(f"Non-zero fantasy points: {(df['proj_fantasy_pts'] > 0).sum()}")
    print(f"Mean fantasy points: {df['proj_fantasy_pts'].mean():.2f}")
    print(f"Max fantasy points: {df['proj_fantasy_pts'].max():.2f}")
    
    # Check for any remaining NaN values
    null_values = df[df['proj_fantasy_pts'].isnull()]
    if not null_values.empty:
        print("\nRows with null projections:")
        print(null_values[['Player', 'Team', 'Position', 'games_this_week', 'schedule_multiplier', 
                          'proj_goals_per_game', 'proj_assists_per_game']].head())
    
    return df

def get_preseason_weights():
    """Weights for pre-season projections"""
    return {
        'last_20_games': 0.4,
        'last_season': 0.35,
        'career': 0.25
    }

def get_early_season_weights(weeks_into_season):
    """Weights that transition from historical to current season"""
    current_season_weight = min(0.7, weeks_into_season * 0.175)  # Gradually increase up to 0.7
    historical_weight = 1 - current_season_weight
    
    return {
        'current_season': current_season_weight,
        'rolling_5': 0,  # Not enough games yet
        'rolling_10': 0,  # Not enough games yet
        'last_season': historical_weight * 0.7,
        'career': historical_weight * 0.3
    }

def get_midseason_weights():
    """Weights for mid-season projections"""
    return {
        'current_season': 0.4,
        'rolling_5': 0.3,
        'rolling_10': 0.3
    }

def calculate_weighted_projections(df, weights, season_start):
    """Calculate projections based on provided weights"""
    
    # Keep key identifying columns
    key_columns = ['Player', 'Team', 'Position', 'TOI/GP']
    base_df = df[key_columns].drop_duplicates()
    
    print("\nProjection Debug:")
    print(f"Total players: {len(base_df)}")
    
    # Split data into current and historical
    current_season = df[df['Date'] >= season_start].copy()
    historical = df[df['Date'] < season_start].copy()
    
    print(f"Players with current season data: {len(current_season['Player'].unique())}")
    print(f"Players with historical data: {len(historical['Player'].unique())}")
    
    # Calculate different stat bases based on available data
    stats = ['Goals/60', 'Total Assists/60']
    projections = {}
    
    for stat in stats:
        if 'current_season' in weights and not current_season.empty:
            # Current season stats
            current_avg = current_season.groupby('Player')[stat].mean()
            rolling_5 = current_season.groupby('Player')[stat].transform(
                lambda x: x.rolling(5, min_periods=1).mean()
            ).groupby(current_season['Player']).last()  # Take last value for each player
            rolling_10 = current_season.groupby('Player')[stat].transform(
                lambda x: x.rolling(10, min_periods=1).mean()
            ).groupby(current_season['Player']).last()  # Take last value for each player
        
        if not historical.empty:
            # Historical stats
            last_season = historical.groupby('Player')[stat].last()
            career_avg = historical.groupby('Player')[stat].mean()
            last_20 = historical.groupby('Player')[stat].transform(
                lambda x: x.tail(20).mean()
            ).groupby(historical['Player']).last()  # Take last value for each player
        
        # Initialize projection with zeros
        projection = pd.Series(0, index=base_df['Player'].unique())
        
        for weight_type, weight in weights.items():
            if weight_type == 'current_season' and 'current_avg' in locals():
                projection += weight * current_avg.fillna(0)
            elif weight_type == 'rolling_5' and 'rolling_5' in locals():
                projection += weight * rolling_5.fillna(0)
            elif weight_type == 'rolling_10' and 'rolling_10' in locals():
                projection += weight * rolling_10.fillna(0)
            elif weight_type == 'last_season' and 'last_season' in locals():
                projection += weight * last_season.fillna(0)
            elif weight_type == 'career' and 'career_avg' in locals():
                projection += weight * career_avg.fillna(0)
            elif weight_type == 'last_20_games' and 'last_20' in locals():
                projection += weight * last_20.fillna(0)
        
        projections[stat] = projection
    
    # Create final projections DataFrame
    proj_df = pd.DataFrame(projections)
    
    # Merge projections with base information
    final_df = base_df.merge(proj_df, left_on='Player', right_index=True, how='left')
    
    # Calculate per-game projections
    final_df['proj_goals_per_game'] = (final_df['Goals/60'] * (final_df['TOI/GP'] / 60)).fillna(0)
    final_df['proj_assists_per_game'] = (final_df['Total Assists/60'] * (final_df['TOI/GP'] / 60)).fillna(0)
    
    print("\nProjection Results:")
    print(f"Players with goal projections: {(final_df['proj_goals_per_game'] > 0).sum()}")
    print(f"Players with assist projections: {(final_df['proj_assists_per_game'] > 0).sum()}")
    print(f"Players with zero projections: {((final_df['proj_goals_per_game'] == 0) & (final_df['proj_assists_per_game'] == 0)).sum()}")
    
    return final_df

def get_projection_weights():
    current_date = date.today()
    season_start_date = SEASON_START
    weeks_into_season = ((current_date - season_start_date).days // 7)

    if current_date < season_start_date:
        return get_preseason_weights()
    elif weeks_into_season < 4:
        return get_early_season_weights(weeks_into_season)
    else:
        return get_midseason_weights()

## Optimize Lineup

In [None]:
def select_best_team(df, max_cost, min_cost, num_forwards, num_defensemen, num_goalies, max_players_per_team):
    # Create a linear programming problem
    prob = pulp.LpProblem("FantasyHockeyTeam", pulp.LpMaximize)

    # Create decision variables (binary: 1 if player i is selected, 0 otherwise)
    player_vars = pulp.LpVariable.dicts("player", df.index, cat="Binary")

    # Objective: Maximize total fantasy points
    prob += pulp.lpSum(df['proj_fantasy_pts'][i] * player_vars[i] for i in df.index), "Total Fantasy Points"

    # Salary cap constraints
    prob += pulp.lpSum(df['pv'][i] * player_vars[i] for i in df.index) <= max_cost, "Total Salary Cost"
    prob += pulp.lpSum(df['pv'][i] * player_vars[i] for i in df.index) >= min_cost, "Minimum Salary Cost"

    # Positional constraints
    prob += pulp.lpSum(player_vars[i] for i in df[df['Position'] == 'F'].index) == num_forwards, "Number of Forwards"
    prob += pulp.lpSum(player_vars[i] for i in df[df['Position'] == 'D'].index) == num_defensemen, "Number of Defensemen"
    prob += pulp.lpSum(player_vars[i] for i in df[df['Position'] == 'G'].index) == num_goalies, "Number of Goalies"

    # Limit players per team
    teams = df['Team'].unique()
    for team in teams:
        team_player_indices = df[df['Team'] == team].index
        prob += pulp.lpSum(player_vars[i] for i in team_player_indices) <= max_players_per_team, f"MaxPlayers_{team}"

    # 1) At most 1 defenseman per team
    for team in teams:
        team_defense_indices = df[(df['Team'] == team) & (df['Position'] == 'D')].index
        prob += pulp.lpSum(player_vars[i] for i in team_defense_indices) <= 1, f"Max1D_{team}"

#     # 2) At least one team has >= 2 forwards
#     b_team_has2F = pulp.LpVariable.dicts("team_has_2_forwards", teams, cat="Binary")
#     BIG_M = 6  # Since we choose 6 forwards total, no team can exceed 6
#     for team in teams:
#         team_forward_indices = df[(df['Team'] == team) & (df['Position'] == 'F')].index
#         f_t = pulp.lpSum(player_vars[i] for i in team_forward_indices)
#         prob += f_t <= BIG_M * b_team_has2F[team], f"Fwd_Upp_{team}"
#         prob += f_t >= 3 * b_team_has2F[team], f"Fwd_Low_{team}"

#     # At least one team with b_team_has2F[team] = 1
#     prob += pulp.lpSum(b_team_has2F[t] for t in teams) >= 1, "AtLeastOneTeamHas2F"

    # Solve the problem
    prob.solve(pulp.PULP_CBC_CMD())

    # Extract selected players
    selected_players = [i for i in df.index if player_vars[i].varValue == 1]
    best_team = df.loc[selected_players]

    return best_team

## Generate Weekly Projections

In [None]:
def get_weekly_schedule_info(start_date=date.today()):
    """Get all schedule related information for the week, only considering future games"""
    print("\nGetting schedule information for remaining games this week...")
    
    # Get games per team
    games_count_full = games_count_for_team_for_week(CURRENT_YEAR, start_date)
    
    # Convert full team names to abbreviations in games_count
    games_count = {team_abbreviations[team]: count 
                  for team, count in games_count_full.items()
                  if team in team_abbreviations}
    
    # Get team standings
    points_df = get_team_standings(CURRENT_YEAR)
    
    # Calculate multipliers only for teams with remaining games
    multipliers = {}
    for team, count in games_count.items():
        if count > 0:  # Only calculate multiplier if team has remaining games
            team_row = points_df[points_df['Abbreviation'] == team]
            if not team_row.empty:
                multipliers[team] = team_row['multiplier'].iloc[0]
# DEBUGGING    
#     print("\nSchedule Summary:")
#     print(f"Teams with remaining games: {len([t for t, c in games_count.items() if c > 0])}")
#     print("\nRemaining games by team:")
#     for team, count in games_count.items():
#         if count > 0:
#             print(f"{team}: {count} games")
    
    return games_count, multipliers


def generate_weekly_projections():
    """Generate weekly fantasy projections and optimize lineup"""
    try:
        print("\n=== Starting Weekly Projections ===")
        
        print("\n1. Fetching player data...")
        player_data = get_player_data()
        if player_data.empty:
            raise ValueError("No player data available")
#         print(f"Initial player data shape: {player_data.shape}")
        
        print("\n2. Getting schedule information...")
        games_count, multipliers = get_weekly_schedule_info()
        if not games_count:
            raise ValueError("Failed to get schedule information")
            
        # Check if there are any remaining games this week
        remaining_games = sum(games_count.values())
        if remaining_games == 0:
            raise ValueError("No remaining games this week to optimize")
        
#         print(f"\nTotal remaining games this week: {remaining_games}")
        
        print("\n3. Creating features and projections...")
        player_features = create_player_features(player_data)
        if player_features.empty:
            raise ValueError("Failed to create player features")
            
        projections = calculate_projections(player_features, games_count, multipliers)
#         print(f"Projections shape: {projections.shape}")
        
        # Filter out players from teams with no remaining games
        active_teams = [team for team, count in games_count.items() if count > 0]
        projections = projections[projections['Team'].isin(active_teams)]
#         print(f"Players from teams with remaining games: {len(projections)}")
        
        # Add salary information
        print("\n4. Loading salary data...")
        try:
            salary_df = pd.read_csv('nhl_players.csv')
            salary_df['Team'] = salary_df['Team'].map(team_abbreviations)
            salary_df['Player_upper'] = salary_df['Player'].str.upper()
            print(f"Loaded salary data for {len(salary_df)} players")
        except Exception as e:
            print(f"Error loading salary data: {e}")
            return None
        
        # Merge player data
        print("\n5. Preparing final dataset...")
        projections['Player_upper'] = projections['Player'].str.upper()
        
        temp_df = projections.merge(
            salary_df[['Player_upper', 'Team', 'Position', 'pv']], 
            on=['Player_upper', 'Team'],
            how='inner',
            suffixes=('_orig', '')
        )
        
        # Convert detailed positions to generic F/D
        temp_df['Position'] = temp_df['Position'].map(
            lambda x: 'F' if x in ['C', 'L', 'R', 'LW', 'RW'] else x
        )
        
        # Drop unnecessary columns
        temp_df = temp_df.drop(['Player_upper', 'Position_orig'], axis=1)

        # Add injury information
        print("\n6. Getting injury information...")
        injuries_df = get_current_injuries()
#         print(f"Injuries data shape: {injuries_df.shape}")
        final_df = temp_df.merge(
            injuries_df[['Player', 'Injury Status']], 
            on='Player',
            how='left'
        )
        final_df['Injured'] = ~final_df['Injury Status'].isnull()
        final_df.loc[final_df['Injured'], 'proj_fantasy_pts'] = 0
        
        
        print("\n7. Adding goaltending projections...")
        goalie_data = estimate_team_goaltending_points(multipliers, games_count)
        
        # Create goalie DataFrame
        goalie_rows = []
        for team, (points, games) in goalie_data.items():
            goalie_rows.append({
                'Player': f"{team} Goaltending",
                'Team': team,
                'Position': 'G',
                'games_this_week': games,
                'proj_fantasy_pts': points,
                'pv': 0.0,  # Goalies don't count against salary cap
                'Injured': False
            })
        
        goalie_df = pd.DataFrame(goalie_rows)
        
        # Combine skater and goalie data
        final_df = pd.concat([final_df, goalie_df], ignore_index=True)
        final_df.dropna()
        
        print("\nFinal dataset summary:")
        print(f"Total players: {len(final_df)}")
        print(f"Skaters: {len(final_df[final_df['Position'] != 'G'])}")
        print(f"Goalies: {len(final_df[final_df['Position'] == 'G'])}")
        print(f"Injured players: {final_df['Injured'].sum()}")
        
        print("\n8. Running optimization...")
        optimal_lineup = select_best_team(
            final_df, 
            MAX_COST, 
            MIN_COST,
            NUM_FORWARDS, 
            NUM_DEFENSEMEN, 
            NUM_GOALIES,
            MAX_PLAYERS_PER_TEAM
        )
        
        return optimal_lineup
        
        print("\n=== Projection Complete ===")
            
             
    except Exception as e:
        print(f"\nError generating projections: {e}")
        return None

# Weekly Lineup

In [None]:
# Generate optimal lineup
optimal_lineup = generate_weekly_projections()

# Lineup

In [None]:
# Format the lineup
clean_lineup = format_lineup(optimal_lineup)

# Print totals
print(f"\nTeam Summary:")
print(f"Total Projected Points: {clean_lineup['proj_fantasy_pts'].sum():.2f}")
print(f"Total Cost: ${clean_lineup['pv'].sum():.2f}M")

In [None]:
clean_lineup

### Captains (Not needed for now)

In [None]:
captains = clean_lineup.loc[clean_lineup.groupby('Position')['proj_fantasy_pts'].idxmax()]
captains