In [41]:

import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
import os
import pandas as pd
import re 

from utility.constants import *

# Utility: Load files
def load_file(folder, filename):
    """Function to read csv files"""
    file_path = os.path.join(folder, filename)
    if os.path.exists(file_path):
        return pd.read_csv(file_path)
    raise FileNotFoundError(f"File {filename} not found in folder {folder}")

def extract_years(folder_path):
    """Extract years from CSV file names."""
    years = [int(re.match(r"^\d{4}", file.name).group()) 
                for file in folder_path.glob("*.csv") if re.match(r"^\d{4}", file.name)]
    return sorted(years)

# Load ADP file
def load_adp_file(adp_dir, given_year=None):
    """Load ADP file at Random year; else use given year"""

    if given_year == None:
        years = extract_years(adp_dir)
        year = random.choice(years)
        print(f"Randomly selected ADP year of:", year)

    else: 
        year = given_year

    file_name = f"{year}ADP.csv"
    adp_df = load_file(ADP_DIR, file_name)
    adp_df['year'] = year   
    return adp_df

# Load stats
def load_seasonal_stats(seasonal_stats_dir, year):
    return load_file(seasonal_stats_dir, f"player_stats_{year}.csv")

def load_defensive_stats(defensive_stats_dir, year):
    return load_file(defensive_stats_dir, f"seasonal_defensive_stats_{year}.csv")

# Merge stats into ADP
def merge_stats(adp_df, seasonal_stats_df, defensive_stats_df):
    adp_df = adp_df.merge(
        seasonal_stats_df[["player_id", "fppr"]], on="player_id", how="left"
    )
    defensive_stats_df = defensive_stats_df.rename(columns={"pa_team": "player_id", "fpts": "def_fpts"})
    adp_df = adp_df.merge(
        defensive_stats_df[["player_id", "def_fpts"]], on="player_id", how="left"
    )
    adp_df["fpts"] = adp_df.apply(
        lambda row: row["def_fpts"] if row["POSITION"] == "DST" else row["fppr"], axis=1
    )
    return adp_df

In [2]:
def get_min_player_count_by_position(adp_dir, dstats_dir):
    """
    Get the minimum count of players for each position across all ADP season data
    and the minimum count of teams (DST) across all years.

    Args:
        adp_dir (Path): Directory containing ADP data files.
        dstats_dir (Path): Directory containing defensive stats files.

    Returns:
        dict: A dictionary where keys are positions and 'DST',
              and values are the minimum number of players or teams available across all years.
    """
    years = extract_years(adp_dir)
    position_counts = {}
    dst_counts = []  # List to store the count of teams (DST) for each year

    for year in years:
        # Load ADP data for the year
        df = load_adp_file(adp_dir, year)
        position_grouped = df.groupby("POSITION").size()  # Count players per position

        # Load defensive stats (DST) data for the year
        ddf = load_defensive_stats(dstats_dir, year)
        team_count = ddf['pa_team'].nunique()  # Count unique teams in defensive stats
        dst_counts.append(team_count)

        # Collect position-specific counts
        for position, count in position_grouped.items():
            if position not in position_counts:
                position_counts[position] = []
            position_counts[position].append(count)

    # Get the minimum count for each position across all years
    min_counts = {position: min(counts) for position, counts in position_counts.items()}

    # Add DST (team count) to the results
    min_counts["DST"] = min(dst_counts)

    return min_counts


In [7]:
min_player_dict = get_min_player_count_by_position(ADP_DIR, DEFENSIVE_STATS_DIR)

In [68]:
adp_df = load_adp_file(ADP_DIR, "2023")
year = adp_df['year'].iloc[0]
seasonal_stats_df = load_seasonal_stats(SEASONAL_STATS_DIR, year)
defensive_stats_df = load_defensive_stats(DEFENSIVE_STATS_DIR, year)
data_df = merge_stats(adp_df, seasonal_stats_df, defensive_stats_df)

# Sort players by FPPRAVG
data_df = data_df.sort_values(by="FPPRAVG").reset_index(drop=True)

# Initialize draft setup
DRAFT_ORDER = list(range(1, NUM_MANAGERS + 1))
random.shuffle(DRAFT_ORDER)

results = []
pick_order = 1

In [69]:
data_df.shape

(593, 13)

In [70]:
def filter_top_players_by_position(data_df, top_players):
    """
    Filter the top players for each position based on FPPRAVG (lower is better).
    
    Args:
    - data_df (pd.DataFrame): The original data frame containing player data.
    - top_players (dict): Dictionary specifying the number of top players to retain for each position.
    
    Returns:
    - pd.DataFrame: Filtered data containing only the top players for each position.
    """
    filtered_data = pd.DataFrame()
    for position, top_n in top_players.items():
        position_data = data_df[data_df["POSITION"] == position]
        # Sort by FPPRAVG (ascending) and select the top N players
        top_position_data = position_data.sort_values("FPPRAVG").head(top_n)
        filtered_data = pd.concat([filtered_data, top_position_data])
        filtered_data = filtered_data.sort_values(by='FPPRAVG')
    return filtered_data

In [71]:
data_df2 = filter_top_players_by_position(data_df, min_player_dict)

In [72]:
def assign_tiers(data_df, metric_col, original_breakpoints):
    """
    Assign tiers to players based on scaled breakpoints calculated from total rows.

    Args:
    - data_df (pd.DataFrame): Dataframe containing player data.
    - position_col (str): Column name for player positions.
    - metric_col (str): Column name for the metric to tier by (e.g., "FPPRAVG").
    - original_breakpoints (list): List of original tier breakpoints.

    Returns:
    - pd.DataFrame: Dataframe with an additional 'Tier' column.
    """
    total_rows = data_df.shape[0]
    scaling_factor = total_rows / sum(original_breakpoints)
    scaled_breakpoints = [int(bp * scaling_factor) for bp in original_breakpoints]
    
    # Ensure the scaled breakpoints sum to total_rows
    scaled_breakpoints[-1] += total_rows - sum(scaled_breakpoints)
    
    print("Scaled_breakpoints:", scaled_breakpoints)

    # Sort the data by the metric column
    data_df = data_df.sort_values(by=metric_col).reset_index(drop=True)
    
    # Assign tiers based on scaled breakpoints
    data_df['Tier'] = None
    start_idx = 0
    for tier, end_idx in enumerate(scaled_breakpoints, start=1):
        data_df.loc[start_idx:start_idx + end_idx - 1, 'Tier'] = tier
        start_idx += end_idx

    return data_df

# Example usage
tier_breakpoints_proportion = [3, 7, 12, 20, 30]

# Apply the function to the data
data_df3 = assign_tiers(data_df2, metric_col='FPPRAVG', original_breakpoints=tier_breakpoints_proportion)


Scaled_breakpoints: [14, 33, 58, 96, 147]


In [75]:
data_df3

Unnamed: 0,player_name,player_id,POSITION,FPPRPOS,FPPRAVG,HPPRPOS,HPPRAVG,STRDPOS,STRDAVG,year,fppr,def_fpts,fpts,Tier
0,Justin Jefferson,00-0036322,WR,WR1,1,WR1,1.0,WR1,2.0,2023,202.200001,,202.200001,1
1,Christian McCaffrey,00-0033280,RB,RB1,2,RB1,2.0,RB1,1.0,2023,391.300011,,391.300011,1
2,Ja'Marr Chase,00-0036900,WR,WR2,3,WR2,3.0,WR2,4.0,2023,262.719998,,262.719998,1
3,Austin Ekeler,00-0033699,RB,RB2,4,RB2,4.0,RB2,3.0,2023,184.200001,,184.200001,1
4,Travis Kelce,00-0030506,TE,TE1,5,TE1,6.0,TE1,5.0,2023,219.400002,,219.400002,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
343,Le'Veon Bell,00-0030496,RB,RB95,404,,,,,2023,,,,5
344,Israel Abanikanda,00-0038389,RB,RB82,413,RB77,266.0,RB78,261.0,2023,16.300000,,16.300000,5
345,Derek Watt,00-0032897,RB,RB96,414,,,RB83,274.0,2023,,,,5
346,D'Vonte Price,00-0037400,RB,RB97,426,,,,,2023,,,,5


In [79]:
def calculate_scarcity(data_df, tier_weights, position_col='POSITION', tier_col='Tier'):
    """
    Calculate scarcity for each position based on remaining players and tier weights.

    Args:
    - data_df (pd.DataFrame): DataFrame with columns 'POSITION', 'Tier', and 'Remaining Players'.
    - tier_weights (dict): Dictionary of weights for each tier (e.g., {1: 10, 2: 7, ...}).

    Returns:
    - dict: Scarcity scores for each position.
    """
    scarcity_scores = {}
    
    for position in data_df[position_col].unique():
        position_data = data_df[data_df[position_col] == position]
        total_players = len(position_data)  # Total players in this position
        scarcity = 0
        
        for tier in position_data[tier_col].unique():
            tier_data = position_data[position_data[tier_col] == tier]
            remaining_players = len(tier_data)  # Count remaining players in this tier
            print(remaining_players)
            tier_weight = tier_weights[tier]
            
            # Add the tier contribution to scarcity
            scarcity += (remaining_players / total_players) / tier_weight
        
        scarcity_scores[position] = scarcity
    
    return scarcity_scores

# Example setup
tier_weights = {1: 10, 2: 7, 3: 5, 4: 3, 5: 2}  # Define weights for each tier


In [80]:
calculate_scarcity(data_df3, tier_weights = {1: 10, 2: 7, 3: 5, 4: 3, 5: 2})

{'WR': 0.32630173564753007,
 'RB': 0.3394207167403044,
 'TE': 0.38906926406926406,
 'QB': 0.34505494505494505,
 'DST': 0.409375,
 'K': 0.403448275862069}