In [2]:

import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
import os
import pandas as pd
import re 

from utility.constants import *

# Utility: Load files
def load_file(folder, filename):
    """Function to read csv files"""
    file_path = os.path.join(folder, filename)
    if os.path.exists(file_path):
        return pd.read_csv(file_path)
    raise FileNotFoundError(f"File {filename} not found in folder {folder}")

def extract_years(folder_path):
    """Extract years from CSV file names."""
    years = [int(re.match(r"^\d{4}", file.name).group()) 
                for file in folder_path.glob("*.csv") if re.match(r"^\d{4}", file.name)]
    return sorted(years)

# Load ADP file
def load_adp_file(adp_dir, given_year=None):
    """Load ADP file at Random year; else use given year"""

    if given_year == None:
        years = extract_years(adp_dir)
        year = random.choice(years)
        print(f"Randomly selected ADP year of:", year)

    else: 
        year = given_year

    file_name = f"{year}ADP.csv"
    adp_df = load_file(ADP_DIR, file_name)
    adp_df['year'] = year   
    return adp_df

# Load stats
def load_seasonal_stats(seasonal_stats_dir, year):
    return load_file(seasonal_stats_dir, f"player_stats_{year}.csv")

def load_defensive_stats(defensive_stats_dir, year):
    return load_file(defensive_stats_dir, f"seasonal_defensive_stats_{year}.csv")

# Merge stats into ADP
def merge_stats(adp_df, seasonal_stats_df, defensive_stats_df):
    adp_df = adp_df.merge(
        seasonal_stats_df[["player_id", "fppr"]], on="player_id", how="left"
    )
    defensive_stats_df = defensive_stats_df.rename(columns={"pa_team": "player_id", "fpts": "def_fpts"})
    adp_df = adp_df.merge(
        defensive_stats_df[["player_id", "def_fpts"]], on="player_id", how="left"
    )
    adp_df["fpts"] = adp_df.apply(
        lambda row: row["def_fpts"] if row["POSITION"] == "DST" else row["fppr"], axis=1
    )
    return adp_df

In [55]:
def get_min_player_count_by_position(adp_dir, dstats_dir):
    """
    Get the minimum count of players for each position across all ADP season data
    and the minimum count of teams (DST) across all years.

    Args:
        adp_dir (Path): Directory containing ADP data files.
        dstats_dir (Path): Directory containing defensive stats files.

    Returns:
        dict: A dictionary where keys are positions and 'DST',
              and values are the minimum number of players or teams available across all years.
    """
    years = extract_years(adp_dir)
    position_counts = {}
    dst_counts = []  # List to store the count of teams (DST) for each year

    for year in years:
        # Load ADP data for the year
        df = load_adp_file(adp_dir, year)
        position_grouped = df.groupby("POSITION").size()  # Count players per position

        # Load defensive stats (DST) data for the year
        ddf = load_defensive_stats(dstats_dir, year)
        team_count = ddf['pa_team'].nunique()  # Count unique teams in defensive stats
        dst_counts.append(team_count)

        # Collect position-specific counts
        for position, count in position_grouped.items():
            if position not in position_counts:
                position_counts[position] = []
            position_counts[position].append(count)

    # Get the minimum count for each position across all years
    min_counts = {position: min(counts) for position, counts in position_counts.items()}

    # Add DST (team count) to the results
    min_counts["DST"] = min(dst_counts)

    return min_counts


In [56]:
min_player_dict = get_min_player_count_by_position(ADP_DIR, DEFENSIVE_STATS_DIR)

In [192]:
adp_df = load_adp_file(ADP_DIR, "2023")
year = adp_df['year'].iloc[0]
seasonal_stats_df = load_seasonal_stats(SEASONAL_STATS_DIR, year)
defensive_stats_df = load_defensive_stats(DEFENSIVE_STATS_DIR, year)
data_df = merge_stats(adp_df, seasonal_stats_df, defensive_stats_df)

# Sort players by FPPRAVG
data_df = data_df.sort_values(by="FPPRAVG").reset_index(drop=True)

# Initialize draft setup
DRAFT_ORDER = list(range(1, NUM_MANAGERS + 1))
random.shuffle(DRAFT_ORDER)

results = []
pick_order = 1

In [193]:
data_df.shape

(593, 13)

In [194]:
def filter_top_players_by_position(data_df, top_players):
    """
    Filter the top players for each position based on FPPRAVG (lower is better).
    
    Args:
    - data_df (pd.DataFrame): The original data frame containing player data.
    - top_players (dict): Dictionary specifying the number of top players to retain for each position.
    
    Returns:
    - pd.DataFrame: Filtered data containing only the top players for each position.
    """
    filtered_data = pd.DataFrame()
    for position, top_n in top_players.items():
        position_data = data_df[data_df["POSITION"] == position]
        # Sort by FPPRAVG (ascending) and select the top N players
        top_position_data = position_data.sort_values("FPPRAVG").head(top_n)
        filtered_data = pd.concat([filtered_data, top_position_data])
        filtered_data = filtered_data.sort_values(by='FPPRAVG')
    return filtered_data

In [195]:
data_df2 = filter_top_players_by_position(data_df, min_player_dict)

In [196]:
def calculate_tiers_by_metric_geo(data_df, metric_col='FPPRAVG', num_tiers=5, r=2):
    """
    Calculate tier proportions using corrected geometric progression for ascending metric.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data.
    - metric_col (str): Column name for the performance metric (e.g., 'FPPRAVG').
    - num_tiers (int): Number of tiers to create.
    - r (float): Common ratio for geometric progression (default=2).

    Returns:
    - pd.DataFrame: DataFrame with an additional "Tier" column.
    - list: Number of players in each tier.
    """
    # Sort the dataframe by the metric column in ascending order (better performance first)
    sorted_data = data_df.sort_values(by=metric_col, ascending=True).reset_index(drop=True)
    
    # Calculate reversed geometric progression weights
    weights = [r ** (i - 1) for i in range(1, num_tiers + 1)]
    
    # Normalize weights to sum to 1
    normalized_weights = [w / sum(weights) for w in weights]
    
    # Calculate the total number of players
    total_players = len(sorted_data)
    
    # Allocate players to tiers based on normalized weights
    players_per_tier = [int(total_players * w) for w in normalized_weights]
    
    # Adjust to ensure the total number of players matches exactly
    players_per_tier[-1] += total_players - sum(players_per_tier)
    
    # Assign tiers to players
    tier_assignments = []
    start_idx = 0
    for tier, count in enumerate(players_per_tier, start=1):
        end_idx = start_idx + count
        tier_assignments.extend([tier] * count)
        start_idx = end_idx

    sorted_data['Tier'] = tier_assignments
    return sorted_data, players_per_tier


data_df3, players_per_tier_geo = calculate_tiers_by_metric_geo(data_df2, metric_col='FPPRAVG', r=2)

print("Players per Tier (Geometric Progression):", players_per_tier_geo)


Players per Tier (Geometric Progression): [11, 22, 44, 89, 182]


In [197]:
data_df3.head(30)

Unnamed: 0,player_name,player_id,POSITION,FPPRPOS,FPPRAVG,HPPRPOS,HPPRAVG,STRDPOS,STRDAVG,year,fppr,def_fpts,fpts,Tier
0,Justin Jefferson,00-0036322,WR,WR1,1,WR1,1.0,WR1,2.0,2023,202.200001,,202.200001,1
1,Christian McCaffrey,00-0033280,RB,RB1,2,RB1,2.0,RB1,1.0,2023,391.300011,,391.300011,1
2,Ja'Marr Chase,00-0036900,WR,WR2,3,WR2,3.0,WR2,4.0,2023,262.719998,,262.719998,1
3,Austin Ekeler,00-0033699,RB,RB2,4,RB2,4.0,RB2,3.0,2023,184.200001,,184.200001,1
4,Travis Kelce,00-0030506,TE,TE1,5,TE1,6.0,TE1,5.0,2023,219.400002,,219.400002,1
5,Tyreek Hill,00-0033040,WR,WR3,6,WR3,5.0,WR3,7.0,2023,376.400009,,376.400009,1
6,Saquon Barkley,00-0034844,RB,RB3,7,RB5,9.0,RB4,8.0,2023,223.200005,,223.200005,1
7,Bijan Robinson,00-0038542,RB,RB4,8,RB3,7.0,RB3,6.0,2023,246.299999,,246.299999,1
8,Stefon Diggs,00-0031588,WR,WR4,9,WR4,10.0,WR4,10.0,2023,273.800003,,273.800003,1
9,Nick Chubb,00-0034791,RB,RB5,10,RB4,8.0,RB5,9.0,2023,23.1,,23.1,1


In [198]:

print("Players per Tier (Geometric Progression):", players_per_tier_geo)


Players per Tier (Geometric Progression): [11, 22, 44, 89, 182]


In [199]:
data_df3.groupby('Tier')['fpts'].agg({'mean', 'std', 'max', 'median', 'min'})

Unnamed: 0_level_0,mean,min,max,std,median
Tier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,242.547276,23.1,391.300011,98.011968,246.299999
2,244.083637,135.400002,403.200009,76.464482,229.450001
3,183.263638,11.7,331.219994,75.531928,201.950001
4,138.98427,0.0,342.84,69.083621,130.0
5,92.434641,-1.3,319.060001,66.844582,78.0


In [200]:
data_df3.groupby(['Tier', 'POSITION'])['fpts'].agg({'mean', 'std', 'max', 'median', 'min','count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,min,max,std,count,median
Tier,POSITION,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,RB,213.620003,23.1,391.300011,132.186754,5,223.200005
1,TE,219.400002,219.400002,219.400002,,1,219.400002
1,WR,276.104003,202.200001,376.400009,62.889001,5,265.400002
2,QB,343.193325,280.119991,392.639992,57.484365,3,356.819992
2,RB,222.882502,145.700003,282.400005,46.099529,8,232.35
2,TE,135.400002,135.400002,135.400002,,1,135.400002
2,WR,242.180003,137.599998,403.200009,79.152187,10,226.5
3,QB,214.676666,84.799998,331.219994,86.870243,6,232.170001
3,RB,159.458825,11.7,290.500004,82.588968,17,179.200003
3,TE,161.800001,113.200001,219.0,46.363401,5,137.300005


In [208]:
def calculate_tier_weights(data_df, position_col='POSITION', tier_col='Tier', metric_col='fpts'):
    """
    Calculate tier weights for each position based on mean fpts and tier.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data with tiers.
    - position_col (str): Column name for positions (e.g., 'POSITION').
    - tier_col (str): Column name for tier information (e.g., 'Tier').
    - metric_col (str): Column name for performance metric (e.g., 'fpts').

    Returns:
    - dict: Tier weights for each position.
    """
    tier_weights = {}

    # Group by position and tier, and calculate mean fpts
    tier_stats = data_df.groupby([position_col, tier_col])[metric_col].mean()

    # Normalize weights for each position
    for position in data_df[position_col].unique():
        position_tiers = tier_stats.loc[position] if position in tier_stats.index else []
        max_fpts = position_tiers.max() if len(position_tiers) > 0 else 0
        weights = {
            tier: round((fpts / max_fpts) ** 1.2, 2) if max_fpts > 0 else 0
            for tier, fpts in position_tiers.items()
        }
        tier_weights[position] = weights

    return tier_weights


In [209]:
calculate_tier_weights(data_df3)

{'WR': {1: np.float64(1.0),
  2: np.float64(0.85),
  3: np.float64(0.69),
  4: np.float64(0.42),
  5: np.float64(0.27)},
 'RB': {1: np.float64(0.95),
  2: np.float64(1.0),
  3: np.float64(0.67),
  4: np.float64(0.52),
  5: np.float64(0.25)},
 'TE': {1: np.float64(1.0),
  2: np.float64(0.56),
  3: np.float64(0.69),
  4: np.float64(0.64),
  5: np.float64(0.27)},
 'QB': {2: np.float64(1.0),
  3: np.float64(0.57),
  4: np.float64(0.5),
  5: np.float64(0.4)},
 'DST': {4: np.float64(1.0), 5: np.float64(0.66)},
 'K': {4: np.float64(0.96), 5: np.float64(1.0)}}

In [211]:
data_df3.groupby('POSITION').size().to_dict()

{'DST': 32, 'K': 29, 'QB': 39, 'RB': 97, 'TE': 44, 'WR': 107}

In [212]:
def calculate_adjusted_scarcity(data_df, position_limits, required_positions, total_pool_sizes, tier_weights, position_col='POSITION', tier_col='Tier', vorp_col='VORP'):
    """
    Calculate scarcity with normalization for positional pool sizes and adjusted tier weights.

    Args:
    - data_df (pd.DataFrame): DataFrame with player data, including VORP and tiers.
    - position_limits (dict): Maximum players allowed per position per team.
    - required_positions (dict): Remaining required starters across all teams.
    - total_pool_sizes (dict): Total player pool size for each position.
    - tier_weights (dict): Tier weights for each tier.
    - position_col (str): Column name for positions (e.g., 'POSITION').
    - tier_col (str): Column name for tier information (e.g., 'Tier').
    - vorp_col (str): Column name for VORP values (e.g., 'VORP').

    Returns:
    - dict: Adjusted scarcity scores for each position.
    """
    scarcity_scores = {}

    for position, required in required_positions.items():
        position_data = data_df[data_df[position_col] == position]
        remaining_players = len(position_data)
        
        # Handle positions with no remaining players
        if remaining_players == 0:
            scarcity_scores[position] = float('inf')
            continue

        # Normalize remaining players by total pool size
        normalized_remaining = remaining_players / total_pool_sizes[position]

        # Adjust tier weights based on pool size
        adjusted_tier_weights = {
            tier: tier_weights.get(tier, 1) / (total_pool_sizes[position] ** 0.5)
            for tier in position_data[tier_col].unique()
        }

        # Calculate weighted scarcity
        weighted_scarcity = 0
        for tier, weight in adjusted_tier_weights.items():
            tier_data = position_data[position_data[tier_col] == tier]
            weighted_scarcity += len(tier_data) * weight

        # Final scarcity calculation
        scarcity = (weighted_scarcity * required) / normalized_remaining
        scarcity_scores[position] = round(scarcity, 2)

    return scarcity_scores


In [213]:
calculate_adjusted_scarcity(data_df3, POSITION_LIMITS, STARTER_POSITIONS, data_df3.groupby('POSITION').size().to_dict(), calculate_tier_weights(data_df3) )

{'QB': 6.24, 'K': 5.39, 'DST': 5.66, 'RB': 19.7, 'WR': 20.69, 'TE': 6.63}

In [215]:
data_df3.groupby('POSITION').size().to_dict()

{'DST': 32, 'K': 29, 'QB': 39, 'RB': 97, 'TE': 44, 'WR': 107}

In [202]:
def calculate_dynamic_scarcity_v2(data_df, position_limits, required_positions, position_col='POSITION', vorp_col='VORP', tier_col='Tier'):
    """
    Calculate dynamic scarcity based on remaining VORP, positional depth, and team constraints.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data with VORP and tiers.
    - position_limits (dict): Maximum number of players allowed per position.
    - required_positions (dict): Minimum starters required per position.
    - position_col (str): Column name for positions (e.g., 'POSITION').
    - vorp_col (str): Column name for VORP values (e.g., 'VORP').
    - tier_col (str): Column name for tier information (e.g., 'Tier').

    Returns:
    - dict: Dynamic scarcity scores for each position.
    """
    scarcity_scores = {}

    for position in position_limits.keys():
        position_data = data_df[data_df[position_col] == position]

        # Total remaining VORP
        total_vorp = position_data[vorp_col].sum()

        # Normalize by remaining player count for positional depth
        remaining_count = len(position_data)
        normalized_vorp = total_vorp / max(remaining_count, 1)  # Avoid division by zero

        # Factor in required starters and position limits
        remaining_slots = position_limits[position] - len(position_data)  # Remaining total slots
        required_starters = required_positions.get(position, 0)
        filled_starters = max(required_starters - len(position_data[position_data[tier_col] == 1]), 0)  # Starters needed

        # Scarcity formula
        scarcity = normalized_vorp * (1 + filled_starters / max(remaining_slots, 1))
        scarcity_scores[position] = round(scarcity, 2)
    return scarcity_scores


In [203]:
data_df3 = calculate_vorp_by_tier(data_df3)

In [204]:

# Step 2: Add VORP
calculate_dynamic_scarcity_v2(data_df3, POSITION_LIMITS, STARTER_POSITIONS)

{'QB': np.float64(45.81),
 'RB': np.float64(20.91),
 'WR': np.float64(25.57),
 'TE': np.float64(21.77),
 'K': np.float64(-3.32),
 'DST': np.float64(21.58)}

In [214]:
def calculate_position_specific_tier_weights(data_stats):
    """
    Calculate position-specific tier weights without normalization or scaling.

    Args:
    - data_stats (pd.DataFrame): DataFrame with tier-position statistics (mean, std, count, etc.).

    Returns:
    - dict: Nested dictionary of position-specific tier weights.
    """
    position_tier_weights = {}

    # Loop through each tier
    for tier in data_stats.index.levels[0]:  # Access tiers
        tier_data = data_stats.loc[tier]  # Data for the current tier

        # Calculate weights for each position in this tier
        tier_weights = {}
        for position, row in tier_data.iterrows():
            mean = row['mean']
            std = row['std'] if not np.isnan(row['std']) else 0  # Handle NaN std as 0
            count = row['count']

            # Avoid invalid weights for missing or zero data
            if mean == 0 or count == 0:
                tier_weights[position] = 0
                continue

            # Calculate weight using mean and variance
            weight = mean / (1 + (std / mean))  # Penalize variability
            tier_weights[position] = round(weight, 2)

        position_tier_weights[tier] = tier_weights

    return position_tier_weights

# Example usage
data_stats = data_df3.groupby(['Tier', 'POSITION'])['fpts'].agg({'mean', 'std', 'max', 'median', 'min','count'})
position_tier_weights = calculate_position_specific_tier_weights(data_stats)

# Output the calculated weights
print("Position-Specific Tier Weights:", position_tier_weights)


Position-Specific Tier Weights: {1: {'RB': np.float64(131.96), 'TE': np.float64(219.4), 'WR': np.float64(224.88)}, 2: {'QB': np.float64(293.96), 'RB': np.float64(184.68), 'TE': np.float64(135.4), 'WR': np.float64(182.52)}, 3: {'QB': np.float64(152.83), 'RB': np.float64(105.05), 'TE': np.float64(125.76), 'WR': np.float64(153.62)}, 4: {'DST': np.float64(86.98), 'K': np.float64(102.07), 'QB': np.float64(122.43), 'RB': np.float64(84.44), 'TE': np.float64(110.43), 'WR': np.float64(90.21)}, 5: {'DST': np.float64(59.36), 'K': np.float64(110.93), 'QB': np.float64(96.88), 'RB': np.float64(36.97), 'TE': np.float64(43.45), 'WR': np.float64(53.8)}}


In [217]:
def calculate_scarcity_from_weights_and_remaining(
    position_weights, remaining_players, position_col='POSITION', tier_col='Tier'
):
    """
    Calculate scarcity for each position based on tier weights and remaining players.

    Args:
    - position_weights (dict): Nested dictionary of position-specific tier weights.
    - remaining_players (dict): Dictionary of remaining players by position and tier.
      Example: {'RB': {1: 5, 2: 10, ...}, 'WR': {1: 8, 2: 12, ...}}
    - position_col (str): Column name for positions (default='POSITION').
    - tier_col (str): Column name for tiers (default='Tier').

    Returns:
    - dict: Scarcity scores for each position.
    """
    scarcity_scores = {}

    for position, tiers in position_weights.items():
        # Initialize scarcity score for this position
        scarcity = 0

        for tier, weight in tiers.items():
            # Get remaining players for this position and tier
            remaining = remaining_players.get(position, {}).get(tier, 0)

            # Add the weighted contribution to scarcity
            scarcity += remaining * weight

        # Assign scarcity score for the position
        scarcity_scores[position] = round(scarcity, 2)

    return scarcity_scores


In [226]:
def calculate_remaining_players(data_df, position_col='POSITION', tier_col='Tier'):
    """
    Calculate the number of remaining players for each position and tier.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data with positions and tiers.
    - position_col (str): Column name for positions (e.g., 'POSITION').
    - tier_col (str): Column name for tier information (e.g., 'Tier').

    Returns:
    - dict: Remaining players by position and tier.
    """
    # Group by position and tier, and count the number of players
    remaining_counts = (
        data_df.groupby([position_col, tier_col])
        .size()
        .unstack(fill_value=0)
        .to_dict('index')
    )

    # Convert nested dictionary structure
    remaining_players = {
        position: {tier: count for tier, count in tier_counts.items()}
        for position, tier_counts in remaining_counts.items()
    }
    
    return remaining_players


In [227]:
remaining_players = calculate_remaining_players(data_df3)

In [230]:
position_tier_weights

{1: {'RB': np.float64(131.96),
  'TE': np.float64(219.4),
  'WR': np.float64(224.88)},
 2: {'QB': np.float64(293.96),
  'RB': np.float64(184.68),
  'TE': np.float64(135.4),
  'WR': np.float64(182.52)},
 3: {'QB': np.float64(152.83),
  'RB': np.float64(105.05),
  'TE': np.float64(125.76),
  'WR': np.float64(153.62)},
 4: {'DST': np.float64(86.98),
  'K': np.float64(102.07),
  'QB': np.float64(122.43),
  'RB': np.float64(84.44),
  'TE': np.float64(110.43),
  'WR': np.float64(90.21)},
 5: {'DST': np.float64(59.36),
  'K': np.float64(110.93),
  'QB': np.float64(96.88),
  'RB': np.float64(36.97),
  'TE': np.float64(43.45),
  'WR': np.float64(53.8)}}

In [235]:
remaining_players

{'DST': {1: 0, 2: 0, 3: 0, 4: 11, 5: 21},
 'K': {1: 0, 2: 0, 3: 0, 4: 11, 5: 18},
 'QB': {1: 0, 2: 3, 3: 6, 4: 12, 5: 18},
 'RB': {1: 5, 2: 8, 3: 17, 4: 18, 5: 49},
 'TE': {1: 1, 2: 1, 3: 5, 4: 11, 5: 26},
 'WR': {1: 5, 2: 10, 3: 16, 4: 26, 5: 50}}

In [232]:
def calculate_scarcity(
    position_weights, 
    remaining_players, 
    position_col='POSITION', 
    tier_col='Tier'
):
    """
    Calculate scarcity scores for each position using tier weights and remaining players.

    Args:
    - position_weights (dict): Nested dictionary of position-specific tier weights.
      Example: {1: {'RB': 131.96, 'WR': 224.88}, ...}
    - remaining_players (dict): Remaining players by position and tier.
      Example: {'RB': {1: 5, 2: 8, ...}, 'WR': {1: 5, 2: 10, ...}}

    Returns:
    - dict: Scarcity scores for each position.
    """
    scarcity_scores = {}

    # Loop through each position in remaining players
    for position, tiers in remaining_players.items():
        scarcity = 0

        # Loop through each tier for the position
        for tier, count in tiers.items():
            # Get the weight for the current position and tier
            weight = position_weights.get(tier, {}).get(position, 0)

            # Calculate contribution to scarcity
            scarcity += count * weight

        # Assign total scarcity score for the position
        scarcity_scores[position] = round(scarcity, 2)

    return scarcity_scores


In [233]:
calculate_scarcity_from_weights_and_remaining(position_tier_weights, remaining_players)

{1: np.float64(0.0),
 2: np.float64(0.0),
 3: np.float64(0.0),
 4: np.float64(0.0),
 5: np.float64(0.0)}

In [93]:
def calculate_scaled_tier_weights_with_std(data_df, tier_col='Tier', metric_col='fpts', max_weight=10, scaling_factor=1.2):
    """
    Calculate and scale tier weights using mean and std deviation for tiers.

    Args:
    - data_df (pd.DataFrame): DataFrame containing tier and performance data.
    - tier_col (str): Column name for tier information (e.g., 'Tier').
    - metric_col (str): Column name for the performance metric (e.g., 'fpts').
    - max_weight (float): Maximum weight for Tier 1.
    - scaling_factor (float): Factor to control non-linearity.

    Returns:
    - dict: Scaled tier weights with a maximum of 10.
    """
    # Group by tier and calculate mean and std deviation
    tier_stats = data_df.groupby(tier_col)[metric_col].agg(['mean', 'std'])

    # Normalize mean to Tier 1
    tier_1_mean = tier_stats['mean'].max()
    normalized_means = tier_stats['mean'] / tier_1_mean

    # Adjust weights based on normalized mean and penalize by std deviation
    adjusted_weights = {
        tier: (normalized_means[tier] ** scaling_factor) * (1 / (1 + tier_stats.loc[tier, 'std'] / 100)) * max_weight
        for tier in tier_stats.index
    }
    
    # Scale weights so that the maximum is 10
    max_calculated_weight = max(adjusted_weights.values())
    scaled_weights = {tier: round((weight / max_calculated_weight) * 10, 2) for tier, weight in adjusted_weights.items()}
    
    return scaled_weights

# Example usage
tier_weights = calculate_scaled_tier_weights_with_std(
    data_df3, tier_col='Tier', metric_col='fpts', max_weight=10, scaling_factor=1.2
)

print("Scaled Tier Weights with Mean and Std Adjustment:", tier_weights)


Scaled Tier Weights with Mean and Std Adjustment: {1: np.float64(10.0), 2: np.float64(8.22), 3: np.float64(7.43), 4: np.float64(5.45), 5: np.float64(3.15)}


In [98]:
def calculate_tier_weights(data_df, tier_col='Tier', metric_col='fpts'):
    """
    Calculate tier weights based on normalized means of fpts.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data.
    - tier_col (str): Column name for tier information (e.g., 'Tier').
    - metric_col (str): Column name for performance metric (e.g., 'fpts').

    Returns:
    - dict: Tier weights normalized to a maximum of 1.
    """
    # Group by tier and calculate mean
    tier_stats = data_df.groupby(tier_col)[metric_col].mean()

    # Normalize weights (divide by max mean to ensure Tier 1 gets the highest weight of 1)
    max_mean = tier_stats.max()
    normalized_weights = {tier: round(mean / max_mean, 4) for tier, mean in tier_stats.items()}
    
    return normalized_weights

# Example usage
tier_weights = calculate_tier_weights(data_df3, tier_col='Tier', metric_col='fpts')
print("Tier Weights:", tier_weights)


Tier Weights: {1: np.float64(1.0), 2: np.float64(0.9768), 3: np.float64(0.8733), 4: np.float64(0.6189), 5: np.float64(0.3703)}


In [92]:
def calculate_scarcity_dynamic(data_df, tier_weights, position_col='POSITION', tier_col='Tier', use_top_tier=True):
    """
    Calculate scarcity for each position based on remaining players and tier weights, using top-tier or total weights.

    Args:
    - data_df (pd.DataFrame): DataFrame with columns for position, tier, and remaining players.
    - tier_weights (dict): Weights assigned to each tier (e.g., {1: 10, 2: 7, ...}).
    - position_col (str): Column name for position information.
    - tier_col (str): Column name for tier information.
    - use_top_tier (bool): Whether to divide by the top-tier weight only or total tier weights.

    Returns:
    - dict: Scarcity scores for each position.
    """
    scarcity_scores = {}
    max_tier_weight = max(tier_weights.values())  # Weight for the top tier
    total_tier_weight = sum(tier_weights.values())  # Sum of all tier weights

    for position in data_df[position_col].unique():
        position_data = data_df[data_df[position_col] == position]
        scarcity = 0

        for tier in position_data[tier_col].unique():
            tier_data = position_data[position_data[tier_col] == tier]
            remaining_players = len(tier_data)  # Count remaining players in this tier
            tier_weight = tier_weights[tier]

            # Add the tier contribution to scarcity
            scarcity += remaining_players * tier_weight

        # Normalize scarcity by the chosen reference weight
        reference_weight = max_tier_weight if use_top_tier else total_tier_weight
        scarcity_scores[position] = scarcity / reference_weight

    return scarcity_scores

# Example setup
# tier_weights = {1: 10, 2: 7, 3: 5, 4: 3, 5: 2}  # Define weights for each tier


# Calculate scarcity using top-tier normalization
scarcity_top_tier = calculate_scarcity_dynamic(data_df3, tier_weights, use_top_tier=True)

# Calculate scarcity using total tier weights normalization
scarcity_total_tier = calculate_scarcity_dynamic(data_df3, tier_weights, use_top_tier=False)

print("Scarcity (Top Tier Reference):", scarcity_top_tier)
print("Scarcity (Total Tier Reference):", scarcity_total_tier)

Scarcity (Top Tier Reference): {'RB': np.float64(50.663), 'WR': np.float64(55.165), 'TE': np.float64(19.603), 'QB': np.float64(19.285000000000004), 'DST': np.float64(12.379999999999999), 'K': np.float64(10.515)}
Scarcity (Total Tier Reference): {'RB': np.float64(14.792116788321168), 'WR': np.float64(16.106569343065694), 'TE': np.float64(5.723503649635036), 'QB': np.float64(5.63065693430657), 'DST': np.float64(3.6145985401459853), 'K': np.float64(3.07007299270073)}


In [91]:
tier_weights

{1: np.float64(10.0),
 2: np.float64(8.22),
 3: np.float64(7.43),
 4: np.float64(5.45),
 5: np.float64(3.15)}

In [94]:
def calculate_vor_scarcity(data_df, scarcity_scores, position_col='POSITION', metric_col='fpts', num_teams=12):
    """
    Adjust scarcity scores using VOR (Value Over Replacement).

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data with fpts and positions.
    - scarcity_scores (dict): Initial scarcity scores by position.
    - position_col (str): Column name for player positions.
    - metric_col (str): Column name for performance metric (e.g., 'fpts').
    - num_teams (int): Number of teams in the league.

    Returns:
    - dict: Adjusted scarcity scores using VOR.
    """
    # Define the number of starters for each position
    starters = {'QB': num_teams, 'RB': num_teams * 2, 'WR': num_teams * 3, 'TE': num_teams, 'DST': num_teams, 'K': num_teams}
    
    # Calculate replacement-level fpts for each position
    replacement_fpts = {}
    for position in starters:
        position_data = data_df[data_df[position_col] == position]
        replacement_level = starters[position]
        replacement_fpts[position] = position_data[metric_col].nlargest(replacement_level).min()

    # Calculate VOR for each position
    vor_totals = {}
    for position, replacement_value in replacement_fpts.items():
        position_data = data_df[data_df[position_col] == position]
        position_data['VOR'] = position_data[metric_col] - replacement_value
        vor_totals[position] = position_data['VOR'].sum()

    # Scale scarcity scores using VOR
    max_vor = max(vor_totals.values())
    adjusted_scarcity = {
        position: round(scarcity_scores[position] * (vor_totals[position] / max_vor), 2)
        for position in scarcity_scores
    }

    return adjusted_scarcity, vor_totals, replacement_fpts

# Example usage
adjusted_scarcity, vor_totals, replacement_fpts = calculate_vor_scarcity(data_df3, scarcity_top_tier)

print("Adjusted Scarcity Scores by Position:", adjusted_scarcity)
print("VOR Totals by Position:", vor_totals)
print("Replacement-Level fpts by Position:", replacement_fpts)


Adjusted Scarcity Scores by Position: {'RB': np.float64(1437.91), 'WR': np.float64(671.86), 'TE': np.float64(219.53), 'QB': np.float64(205.11), 'DST': np.float64(32.43), 'K': np.float64(10.52)}
VOR Totals by Position: {'QB': np.float64(-1733.6401011273265), 'RB': np.float64(-4626.240337438881), 'WR': np.float64(-1985.1802344694734), 'TE': np.float64(-1825.3799797818065), 'DST': np.float64(-427.0), 'K': np.float64(-163.0)}
Replacement-Level fpts by Position: {'QB': np.float64(268.36000061035156), 'RB': np.float64(173.90000534057617), 'WR': np.float64(165.40000343322754), 'TE': np.float64(159.0), 'DST': np.float64(84.0), 'K': np.float64(125.0)}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  position_data['VOR'] = position_data[metric_col] - replacement_value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  position_data['VOR'] = position_data[metric_col] - replacement_value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  position_data['VOR'] = position_data[metric_col] - replacement_valu