In [2]:

import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
import os
import pandas as pd
import re 

from utility.constants import *

# Utility: Load files
def load_file(folder, filename):
    """Function to read csv files"""
    file_path = os.path.join(folder, filename)
    if os.path.exists(file_path):
        return pd.read_csv(file_path)
    raise FileNotFoundError(f"File {filename} not found in folder {folder}")

def extract_years(folder_path):
    """Extract years from CSV file names."""
    years = [int(re.match(r"^\d{4}", file.name).group()) 
                for file in folder_path.glob("*.csv") if re.match(r"^\d{4}", file.name)]
    return sorted(years)

# Load ADP file
def load_adp_file(adp_dir, given_year=None):
    """Load ADP file at Random year; else use given year"""

    if given_year == None:
        years = extract_years(adp_dir)
        year = random.choice(years)
        print(f"Randomly selected ADP year of:", year)

    else: 
        year = given_year

    file_name = f"{year}ADP.csv"
    adp_df = load_file(ADP_DIR, file_name)
    adp_df['year'] = year   
    return adp_df

# Load stats
def load_seasonal_stats(seasonal_stats_dir, year):
    return load_file(seasonal_stats_dir, f"player_stats_{year}.csv")

def load_defensive_stats(defensive_stats_dir, year):
    return load_file(defensive_stats_dir, f"seasonal_defensive_stats_{year}.csv")

# Merge stats into ADP
def merge_stats(adp_df, seasonal_stats_df, defensive_stats_df):
    adp_df = adp_df.merge(
        seasonal_stats_df[["player_id", "fppr"]], on="player_id", how="left"
    )
    defensive_stats_df = defensive_stats_df.rename(columns={"pa_team": "player_id", "fpts": "def_fpts"})
    adp_df = adp_df.merge(
        defensive_stats_df[["player_id", "def_fpts"]], on="player_id", how="left"
    )
    adp_df["fpts"] = adp_df.apply(
        lambda row: row["def_fpts"] if row["POSITION"] == "DST" else row["fppr"], axis=1
    )
    return adp_df

In [55]:
def get_min_player_count_by_position(adp_dir, dstats_dir):
    """
    Get the minimum count of players for each position across all ADP season data
    and the minimum count of teams (DST) across all years.

    Args:
        adp_dir (Path): Directory containing ADP data files.
        dstats_dir (Path): Directory containing defensive stats files.

    Returns:
        dict: A dictionary where keys are positions and 'DST',
              and values are the minimum number of players or teams available across all years.
    """
    years = extract_years(adp_dir)
    position_counts = {}
    dst_counts = []  # List to store the count of teams (DST) for each year

    for year in years:
        # Load ADP data for the year
        df = load_adp_file(adp_dir, year)
        position_grouped = df.groupby("POSITION").size()  # Count players per position

        # Load defensive stats (DST) data for the year
        ddf = load_defensive_stats(dstats_dir, year)
        team_count = ddf['pa_team'].nunique()  # Count unique teams in defensive stats
        dst_counts.append(team_count)

        # Collect position-specific counts
        for position, count in position_grouped.items():
            if position not in position_counts:
                position_counts[position] = []
            position_counts[position].append(count)

    # Get the minimum count for each position across all years
    min_counts = {position: min(counts) for position, counts in position_counts.items()}

    # Add DST (team count) to the results
    min_counts["DST"] = min(dst_counts)

    return min_counts


In [56]:
min_player_dict = get_min_player_count_by_position(ADP_DIR, DEFENSIVE_STATS_DIR)

In [57]:
adp_df = load_adp_file(ADP_DIR, "2021")
year = adp_df['year'].iloc[0]
seasonal_stats_df = load_seasonal_stats(SEASONAL_STATS_DIR, year)
defensive_stats_df = load_defensive_stats(DEFENSIVE_STATS_DIR, year)
data_df = merge_stats(adp_df, seasonal_stats_df, defensive_stats_df)

# Sort players by FPPRAVG
data_df = data_df.sort_values(by="FPPRAVG").reset_index(drop=True)

# Initialize draft setup
DRAFT_ORDER = list(range(1, NUM_MANAGERS + 1))
random.shuffle(DRAFT_ORDER)

results = []
pick_order = 1

In [58]:
data_df.shape

(527, 13)

In [59]:
def filter_top_players_by_position(data_df, top_players):
    """
    Filter the top players for each position based on FPPRAVG (lower is better).
    
    Args:
    - data_df (pd.DataFrame): The original data frame containing player data.
    - top_players (dict): Dictionary specifying the number of top players to retain for each position.
    
    Returns:
    - pd.DataFrame: Filtered data containing only the top players for each position.
    """
    filtered_data = pd.DataFrame()
    for position, top_n in top_players.items():
        position_data = data_df[data_df["POSITION"] == position]
        # Sort by FPPRAVG (ascending) and select the top N players
        top_position_data = position_data.sort_values("FPPRAVG").head(top_n)
        filtered_data = pd.concat([filtered_data, top_position_data])
        filtered_data = filtered_data.sort_values(by='FPPRAVG')
    return filtered_data

In [176]:
data_df2 = filter_top_players_by_position(data_df, min_player_dict)

In [177]:
def calculate_tiers_by_metric_geo(data_df, metric_col='FPPRAVG', num_tiers=5, r=2):
    """
    Calculate tier proportions using corrected geometric progression for ascending metric.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data.
    - metric_col (str): Column name for the performance metric (e.g., 'FPPRAVG').
    - num_tiers (int): Number of tiers to create.
    - r (float): Common ratio for geometric progression (default=2).

    Returns:
    - pd.DataFrame: DataFrame with an additional "Tier" column.
    - list: Number of players in each tier.
    """
    # Sort the dataframe by the metric column in ascending order (better performance first)
    sorted_data = data_df.sort_values(by=metric_col, ascending=True).reset_index(drop=True)
    
    # Calculate reversed geometric progression weights
    weights = [r ** (i - 1) for i in range(1, num_tiers + 1)]
    
    # Normalize weights to sum to 1
    normalized_weights = [w / sum(weights) for w in weights]
    
    # Calculate the total number of players
    total_players = len(sorted_data)
    
    # Allocate players to tiers based on normalized weights
    players_per_tier = [int(total_players * w) for w in normalized_weights]
    
    # Adjust to ensure the total number of players matches exactly
    players_per_tier[-1] += total_players - sum(players_per_tier)
    
    # Assign tiers to players
    tier_assignments = []
    start_idx = 0
    for tier, count in enumerate(players_per_tier, start=1):
        end_idx = start_idx + count
        tier_assignments.extend([tier] * count)
        start_idx = end_idx

    sorted_data['Tier'] = tier_assignments
    return sorted_data, players_per_tier


data_df3, players_per_tier_geo = calculate_tiers_by_metric_geo(data_df2, metric_col='FPPRAVG', r=2)

print("Players per Tier (Geometric Progression):", players_per_tier_geo)


Players per Tier (Geometric Progression): [11, 22, 44, 89, 182]


In [178]:
data_df3.head(30)

Unnamed: 0,player_name,player_id,FPPRPOS,FPPRAVG,HPPRPOS,HPPRAVG,STRDPOS,STRDAVG,POSITION,year,fppr,def_fpts,fpts,Tier
0,Christian McCaffrey,00-0033280,RB1,1.0,RB1,1.0,RB1,1.0,RB,2021,127.5,,127.5,1
1,Dalvin Cook,00-0033893,RB2,2.0,RB2,2.0,RB2,2.0,RB,2021,206.300001,,206.300001,1
2,Alvin Kamara,00-0033906,RB3,3.0,RB4,4.0,RB4,4.0,RB,2021,234.700005,,234.700005,1
3,Derrick Henry,00-0032764,RB4,4.0,RB3,3.0,RB3,3.0,RB,2021,193.300005,,193.300005,1
4,Ezekiel Elliott,00-0033045,RB5,5.0,RB5,5.0,RB5,5.0,RB,2021,250.660006,,250.660006,1
5,Davante Adams,00-0031381,WR1,6.0,WR1,8.0,WR1,8.0,WR,2021,344.300003,,344.300003,1
6,Travis Kelce,00-0030506,TE1,7.0,TE1,12.0,TE1,12.0,TE,2021,262.8,,262.8,1
7,Aaron Jones,00-0033293,RB6,8.0,RB7,7.0,RB7,7.0,RB,2021,229.000004,,229.000004,1
8,Saquon Barkley,00-0034844,RB7,9.0,RB9,10.0,RB9,10.0,RB,2021,148.6,,148.6,1
9,Nick Chubb,00-0034791,RB10,10.0,RB6,6.0,RB6,6.0,RB,2021,215.300001,,215.300001,1


In [179]:

print("Players per Tier (Geometric Progression):", players_per_tier_geo)


Players per Tier (Geometric Progression): [11, 22, 44, 89, 182]


In [180]:
data_df3.groupby('Tier')['fpts'].agg({'mean', 'std', 'max', 'median', 'min'})

Unnamed: 0_level_0,mean,min,max,std,median
Tier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,232.387275,127.5,344.300003,68.362919,229.000004
2,226.997275,48.100001,402.580002,99.249043,230.950004
3,202.94381,2.0,439.499997,92.65111,201.149999
4,143.823371,8.6,338.159997,73.716104,133.0
5,86.06443,0.0,259.100003,62.112826,81.0


In [181]:
data_df3.groupby(['Tier', 'POSITION'])['fpts'].agg({'mean', 'std', 'max', 'median', 'min','count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,min,max,std,count,median
Tier,POSITION,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,RB,216.573336,127.5,343.800003,62.284396,9,215.300001
1,TE,262.8,262.8,262.8,,1,262.8
1,WR,344.300003,344.300003,344.300003,,1,344.300003
2,QB,382.120001,361.66,402.580002,28.934811,2,382.120001
2,RB,221.600002,48.100001,373.100006,112.028911,7,229.100004
2,TE,165.75,133.5,198.0,45.608387,2,165.75
2,WR,213.363639,71.1,330.400006,84.119253,11,232.800004
3,QB,312.919997,239.980003,380.759998,56.692055,7,319.059992
3,RB,155.925002,2.0,226.000004,61.433026,12,168.500002
3,TE,207.666668,145.299999,301.100006,82.415193,3,176.599998


In [182]:
def calculate_vorp_by_tier(data_df, position_col='POSITION', metric_col='fpts', tier_col='Tier'):
    """
    Calculate VORP for each player based on the next available tier's mean fpts.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data.
    - position_col (str): Column name for positions (e.g., 'POSITION').
    - metric_col (str): Column name for performance metric (e.g., 'fpts').
    - tier_col (str): Column name for tier information (e.g., 'Tier').

    Returns:
    - pd.DataFrame: DataFrame with an added 'VORP' column.
    """
    vorp_data = data_df.copy()

    # Calculate mean fpts for each position and tier
    tier_means = vorp_data.groupby([position_col, tier_col])[metric_col].mean()

    # Assign next tier's mean fpts as replacement level
    def get_next_tier_replacement(row):
        try:
            # Replacement = mean fpts of the next tier
            next_tier = row[tier_col] + 1
            return tier_means.loc[row[position_col], next_tier]
        except KeyError:
            # No next tier, replacement = minimum value in current tier
            return tier_means.loc[row[position_col], row[tier_col]]

    vorp_data['Replacement'] = vorp_data.apply(get_next_tier_replacement, axis=1)

    # Calculate VORP
    vorp_data['VORP'] = round(vorp_data[metric_col] - vorp_data['Replacement'], 2)

    return vorp_data


In [189]:
def calculate_dynamic_scarcity_v2(data_df, position_limits, required_positions, position_col='POSITION', vorp_col='VORP', tier_col='Tier'):
    """
    Calculate dynamic scarcity based on remaining VORP, positional depth, and team constraints.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data with VORP and tiers.
    - position_limits (dict): Maximum number of players allowed per position.
    - required_positions (dict): Minimum starters required per position.
    - position_col (str): Column name for positions (e.g., 'POSITION').
    - vorp_col (str): Column name for VORP values (e.g., 'VORP').
    - tier_col (str): Column name for tier information (e.g., 'Tier').

    Returns:
    - dict: Dynamic scarcity scores for each position.
    """
    scarcity_scores = {}

    for position in position_limits.keys():
        position_data = data_df[data_df[position_col] == position]

        # Total remaining VORP
        total_vorp = position_data[vorp_col].sum()

        # Normalize by remaining player count for positional depth
        remaining_count = len(position_data)
        normalized_vorp = total_vorp / max(remaining_count, 1)  # Avoid division by zero

        # Factor in required starters and position limits
        remaining_slots = position_limits[position] - len(position_data)  # Remaining total slots
        required_starters = required_positions.get(position, 0)
        filled_starters = max(required_starters - len(position_data[position_data[tier_col] == 1]), 0)  # Starters needed

        # Scarcity formula
        scarcity = normalized_vorp * (1 + filled_starters / max(remaining_slots, 1))
        scarcity_scores[position] = round(scarcity, 2)
    return scarcity_scores


In [190]:
data_df3 = calculate_vorp_by_tier(data_df3)

In [191]:

# Step 2: Add VORP
calculate_dynamic_scarcity_v2(data_df3, POSITION_LIMITS, REQUIRED_POSITIONS)

{'QB': np.float64(84.71),
 'RB': np.float64(22.05),
 'WR': np.float64(52.89),
 'TE': np.float64(19.22),
 'K': np.float64(8.95),
 'DST': np.float64(11.49)}

In [188]:
data_df3.head(20)

Unnamed: 0,player_name,player_id,FPPRPOS,FPPRAVG,HPPRPOS,HPPRAVG,STRDPOS,STRDAVG,POSITION,year,fppr,def_fpts,fpts,Tier,Replacement,VORP
0,Christian McCaffrey,00-0033280,RB1,1.0,RB1,1.0,RB1,1.0,RB,2021,127.5,,127.5,1,221.600002,-94.1
1,Dalvin Cook,00-0033893,RB2,2.0,RB2,2.0,RB2,2.0,RB,2021,206.300001,,206.300001,1,221.600002,-15.3
2,Alvin Kamara,00-0033906,RB3,3.0,RB4,4.0,RB4,4.0,RB,2021,234.700005,,234.700005,1,221.600002,13.1
3,Derrick Henry,00-0032764,RB4,4.0,RB3,3.0,RB3,3.0,RB,2021,193.300005,,193.300005,1,221.600002,-28.3
4,Ezekiel Elliott,00-0033045,RB5,5.0,RB5,5.0,RB5,5.0,RB,2021,250.660006,,250.660006,1,221.600002,29.06
5,Davante Adams,00-0031381,WR1,6.0,WR1,8.0,WR1,8.0,WR,2021,344.300003,,344.300003,1,213.363639,130.94
6,Travis Kelce,00-0030506,TE1,7.0,TE1,12.0,TE1,12.0,TE,2021,262.8,,262.8,1,165.75,97.05
7,Aaron Jones,00-0033293,RB6,8.0,RB7,7.0,RB7,7.0,RB,2021,229.000004,,229.000004,1,221.600002,7.4
8,Saquon Barkley,00-0034844,RB7,9.0,RB9,10.0,RB9,10.0,RB,2021,148.6,,148.6,1,221.600002,-73.0
9,Nick Chubb,00-0034791,RB10,10.0,RB6,6.0,RB6,6.0,RB,2021,215.300001,,215.300001,1,221.600002,-6.3


In [None]:
calculate_dynamic_scarcity(data_df3)

In [144]:
data_df3.groupby(['Tier', 'POSITION'])['VORP'].agg({'mean', 'std', 'max', 'median', 'min','count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,min,max,std,count,median
Tier,POSITION,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,RB,-5.026667,-94.1,122.2,62.284395,9,-6.3
1,TE,97.05,97.05,97.05,,1,97.05
1,WR,130.94,130.94,130.94,,1,130.94
2,QB,69.2,48.74,89.66,28.934809,2,69.2
2,RB,65.672857,-107.83,217.18,112.031276,7,73.18
2,TE,-41.92,-74.17,-9.67,45.608387,2,-41.92
2,WR,21.410909,-120.86,138.45,84.121588,11,40.85
3,QB,94.86,21.92,162.7,56.692056,7,101.0
3,RB,28.445,-125.48,98.52,61.433025,12,41.02
3,TE,77.79,15.42,171.23,82.420857,3,46.72


In [145]:
def calculate_scarcity_with_vorp(remaining_players_df, position_col='POSITION', vorp_col='VORP'):
    """
    Calculate scarcity for each position using dynamic VORP values.

    Args:
    - remaining_players_df (pd.DataFrame): DataFrame of remaining players with updated VORP values.
    - position_col (str): Column name for positions (e.g., 'POSITION').
    - vorp_col (str): Column name for VORP values (e.g., 'VORP').

    Returns:
    - dict: Scarcity scores for each position.
    """
    # Group by position and calculate total VORP for remaining players
    position_vorp = remaining_players_df.groupby(position_col)[vorp_col].sum()

    # Count remaining players for each position
    remaining_counts = remaining_players_df[position_col].value_counts()

    # Calculate scarcity as total VORP normalized by remaining player count
    scarcity_scores = {
        position: round(position_vorp[position] / remaining_counts[position], 2)
        for position in position_vorp.index
    }

    return scarcity_scores


In [147]:
scarcity_scores = calculate_scarcity_with_vorp(data_df3)


In [141]:
scarcity_scores

{'QB': np.float64(1651.89),
 'RB': np.float64(2138.39),
 'WR': np.float64(2829.48),
 'TE': np.float64(845.6),
 'K': np.float64(129.82),
 'DST': np.float64(183.88)}

In [99]:
def calculate_position_specific_tier_weights(data_stats):
    """
    Calculate position-specific tier weights without normalization or scaling.

    Args:
    - data_stats (pd.DataFrame): DataFrame with tier-position statistics (mean, std, count, etc.).

    Returns:
    - dict: Nested dictionary of position-specific tier weights.
    """
    position_tier_weights = {}

    # Loop through each tier
    for tier in data_stats.index.levels[0]:  # Access tiers
        tier_data = data_stats.loc[tier]  # Data for the current tier

        # Calculate weights for each position in this tier
        tier_weights = {}
        for position, row in tier_data.iterrows():
            mean = row['mean']
            std = row['std'] if not np.isnan(row['std']) else 0  # Handle NaN std as 0
            count = row['count']

            # Avoid invalid weights for missing or zero data
            if mean == 0 or count == 0:
                tier_weights[position] = 0
                continue

            # Calculate weight using mean and variance
            weight = mean / (1 + (std / mean))  # Penalize variability
            tier_weights[position] = round(weight, 2)

        position_tier_weights[tier] = tier_weights

    return position_tier_weights

# Example usage
data_stats = data_df3.groupby(['Tier', 'POSITION'])['fpts'].agg({'mean', 'std', 'max', 'median', 'min','count'})
position_tier_weights = calculate_position_specific_tier_weights(data_stats)

# Output the calculated weights
print("Position-Specific Tier Weights:", position_tier_weights)


Position-Specific Tier Weights: {1: {'RB': np.float64(168.2), 'TE': np.float64(262.8), 'WR': np.float64(344.3)}, 2: {'QB': np.float64(355.22), 'RB': np.float64(147.19), 'TE': np.float64(129.98), 'WR': np.float64(153.03)}, 3: {'QB': np.float64(264.92), 'RB': np.float64(111.86), 'TE': np.float64(148.67), 'WR': np.float64(129.36)}, 4: {'DST': np.float64(62.93), 'K': np.float64(117.51), 'QB': np.float64(157.33), 'RB': np.float64(84.19), 'TE': np.float64(94.56), 'WR': np.float64(100.6)}, 5: {'DST': np.float64(45.31), 'K': np.float64(78.4), 'QB': np.float64(99.59), 'RB': np.float64(31.22), 'TE': np.float64(48.1), 'WR': np.float64(53.73)}}


In [100]:
def calculate_scarcity(data_df, position_tier_weights, position_col='POSITION', tier_col='Tier'):
    """
    Calculate scarcity for each position using tier weights and player counts.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data with tiers and positions.
    - position_tier_weights (dict): Nested dictionary of position-specific tier weights.
    - position_col (str): Column name for player positions (e.g., 'POSITION').
    - tier_col (str): Column name for tier information (e.g., 'Tier').

    Returns:
    - dict: Scarcity scores for each position.
    """
    scarcity_scores = {}

    # Group data by position
    for position in data_df[position_col].unique():
        position_data = data_df[data_df[position_col] == position]
        scarcity = 0

        # Sum scarcity contributions from each tier
        for tier in position_data[tier_col].unique():
            tier_count = len(position_data[position_data[tier_col] == tier])  # Count remaining players
            weight = position_tier_weights[tier].get(position, 0)  # Get weight for the position-tier
            scarcity += tier_count * weight

        scarcity_scores[position] = round(scarcity, 2)

    return scarcity_scores

# Example usage
scarcity_scores = calculate_scarcity(data_df3, position_tier_weights, position_col='POSITION', tier_col='Tier')
print("Scarcity Scores by Position:", scarcity_scores)


Scarcity Scores by Position: {'RB': np.float64(7233.64), 'WR': np.float64(9985.7), 'TE': np.float64(3354.09), 'QB': np.float64(6303.2), 'DST': np.float64(1626.12), 'K': np.float64(2508.26)}


In [93]:
def calculate_scaled_tier_weights_with_std(data_df, tier_col='Tier', metric_col='fpts', max_weight=10, scaling_factor=1.2):
    """
    Calculate and scale tier weights using mean and std deviation for tiers.

    Args:
    - data_df (pd.DataFrame): DataFrame containing tier and performance data.
    - tier_col (str): Column name for tier information (e.g., 'Tier').
    - metric_col (str): Column name for the performance metric (e.g., 'fpts').
    - max_weight (float): Maximum weight for Tier 1.
    - scaling_factor (float): Factor to control non-linearity.

    Returns:
    - dict: Scaled tier weights with a maximum of 10.
    """
    # Group by tier and calculate mean and std deviation
    tier_stats = data_df.groupby(tier_col)[metric_col].agg(['mean', 'std'])

    # Normalize mean to Tier 1
    tier_1_mean = tier_stats['mean'].max()
    normalized_means = tier_stats['mean'] / tier_1_mean

    # Adjust weights based on normalized mean and penalize by std deviation
    adjusted_weights = {
        tier: (normalized_means[tier] ** scaling_factor) * (1 / (1 + tier_stats.loc[tier, 'std'] / 100)) * max_weight
        for tier in tier_stats.index
    }
    
    # Scale weights so that the maximum is 10
    max_calculated_weight = max(adjusted_weights.values())
    scaled_weights = {tier: round((weight / max_calculated_weight) * 10, 2) for tier, weight in adjusted_weights.items()}
    
    return scaled_weights

# Example usage
tier_weights = calculate_scaled_tier_weights_with_std(
    data_df3, tier_col='Tier', metric_col='fpts', max_weight=10, scaling_factor=1.2
)

print("Scaled Tier Weights with Mean and Std Adjustment:", tier_weights)


Scaled Tier Weights with Mean and Std Adjustment: {1: np.float64(10.0), 2: np.float64(8.22), 3: np.float64(7.43), 4: np.float64(5.45), 5: np.float64(3.15)}


In [98]:
def calculate_tier_weights(data_df, tier_col='Tier', metric_col='fpts'):
    """
    Calculate tier weights based on normalized means of fpts.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data.
    - tier_col (str): Column name for tier information (e.g., 'Tier').
    - metric_col (str): Column name for performance metric (e.g., 'fpts').

    Returns:
    - dict: Tier weights normalized to a maximum of 1.
    """
    # Group by tier and calculate mean
    tier_stats = data_df.groupby(tier_col)[metric_col].mean()

    # Normalize weights (divide by max mean to ensure Tier 1 gets the highest weight of 1)
    max_mean = tier_stats.max()
    normalized_weights = {tier: round(mean / max_mean, 4) for tier, mean in tier_stats.items()}
    
    return normalized_weights

# Example usage
tier_weights = calculate_tier_weights(data_df3, tier_col='Tier', metric_col='fpts')
print("Tier Weights:", tier_weights)


Tier Weights: {1: np.float64(1.0), 2: np.float64(0.9768), 3: np.float64(0.8733), 4: np.float64(0.6189), 5: np.float64(0.3703)}


In [92]:
def calculate_scarcity_dynamic(data_df, tier_weights, position_col='POSITION', tier_col='Tier', use_top_tier=True):
    """
    Calculate scarcity for each position based on remaining players and tier weights, using top-tier or total weights.

    Args:
    - data_df (pd.DataFrame): DataFrame with columns for position, tier, and remaining players.
    - tier_weights (dict): Weights assigned to each tier (e.g., {1: 10, 2: 7, ...}).
    - position_col (str): Column name for position information.
    - tier_col (str): Column name for tier information.
    - use_top_tier (bool): Whether to divide by the top-tier weight only or total tier weights.

    Returns:
    - dict: Scarcity scores for each position.
    """
    scarcity_scores = {}
    max_tier_weight = max(tier_weights.values())  # Weight for the top tier
    total_tier_weight = sum(tier_weights.values())  # Sum of all tier weights

    for position in data_df[position_col].unique():
        position_data = data_df[data_df[position_col] == position]
        scarcity = 0

        for tier in position_data[tier_col].unique():
            tier_data = position_data[position_data[tier_col] == tier]
            remaining_players = len(tier_data)  # Count remaining players in this tier
            tier_weight = tier_weights[tier]

            # Add the tier contribution to scarcity
            scarcity += remaining_players * tier_weight

        # Normalize scarcity by the chosen reference weight
        reference_weight = max_tier_weight if use_top_tier else total_tier_weight
        scarcity_scores[position] = scarcity / reference_weight

    return scarcity_scores

# Example setup
# tier_weights = {1: 10, 2: 7, 3: 5, 4: 3, 5: 2}  # Define weights for each tier


# Calculate scarcity using top-tier normalization
scarcity_top_tier = calculate_scarcity_dynamic(data_df3, tier_weights, use_top_tier=True)

# Calculate scarcity using total tier weights normalization
scarcity_total_tier = calculate_scarcity_dynamic(data_df3, tier_weights, use_top_tier=False)

print("Scarcity (Top Tier Reference):", scarcity_top_tier)
print("Scarcity (Total Tier Reference):", scarcity_total_tier)

Scarcity (Top Tier Reference): {'RB': np.float64(50.663), 'WR': np.float64(55.165), 'TE': np.float64(19.603), 'QB': np.float64(19.285000000000004), 'DST': np.float64(12.379999999999999), 'K': np.float64(10.515)}
Scarcity (Total Tier Reference): {'RB': np.float64(14.792116788321168), 'WR': np.float64(16.106569343065694), 'TE': np.float64(5.723503649635036), 'QB': np.float64(5.63065693430657), 'DST': np.float64(3.6145985401459853), 'K': np.float64(3.07007299270073)}


In [91]:
tier_weights

{1: np.float64(10.0),
 2: np.float64(8.22),
 3: np.float64(7.43),
 4: np.float64(5.45),
 5: np.float64(3.15)}

In [94]:
def calculate_vor_scarcity(data_df, scarcity_scores, position_col='POSITION', metric_col='fpts', num_teams=12):
    """
    Adjust scarcity scores using VOR (Value Over Replacement).

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data with fpts and positions.
    - scarcity_scores (dict): Initial scarcity scores by position.
    - position_col (str): Column name for player positions.
    - metric_col (str): Column name for performance metric (e.g., 'fpts').
    - num_teams (int): Number of teams in the league.

    Returns:
    - dict: Adjusted scarcity scores using VOR.
    """
    # Define the number of starters for each position
    starters = {'QB': num_teams, 'RB': num_teams * 2, 'WR': num_teams * 3, 'TE': num_teams, 'DST': num_teams, 'K': num_teams}
    
    # Calculate replacement-level fpts for each position
    replacement_fpts = {}
    for position in starters:
        position_data = data_df[data_df[position_col] == position]
        replacement_level = starters[position]
        replacement_fpts[position] = position_data[metric_col].nlargest(replacement_level).min()

    # Calculate VOR for each position
    vor_totals = {}
    for position, replacement_value in replacement_fpts.items():
        position_data = data_df[data_df[position_col] == position]
        position_data['VOR'] = position_data[metric_col] - replacement_value
        vor_totals[position] = position_data['VOR'].sum()

    # Scale scarcity scores using VOR
    max_vor = max(vor_totals.values())
    adjusted_scarcity = {
        position: round(scarcity_scores[position] * (vor_totals[position] / max_vor), 2)
        for position in scarcity_scores
    }

    return adjusted_scarcity, vor_totals, replacement_fpts

# Example usage
adjusted_scarcity, vor_totals, replacement_fpts = calculate_vor_scarcity(data_df3, scarcity_top_tier)

print("Adjusted Scarcity Scores by Position:", adjusted_scarcity)
print("VOR Totals by Position:", vor_totals)
print("Replacement-Level fpts by Position:", replacement_fpts)


Adjusted Scarcity Scores by Position: {'RB': np.float64(1437.91), 'WR': np.float64(671.86), 'TE': np.float64(219.53), 'QB': np.float64(205.11), 'DST': np.float64(32.43), 'K': np.float64(10.52)}
VOR Totals by Position: {'QB': np.float64(-1733.6401011273265), 'RB': np.float64(-4626.240337438881), 'WR': np.float64(-1985.1802344694734), 'TE': np.float64(-1825.3799797818065), 'DST': np.float64(-427.0), 'K': np.float64(-163.0)}
Replacement-Level fpts by Position: {'QB': np.float64(268.36000061035156), 'RB': np.float64(173.90000534057617), 'WR': np.float64(165.40000343322754), 'TE': np.float64(159.0), 'DST': np.float64(84.0), 'K': np.float64(125.0)}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  position_data['VOR'] = position_data[metric_col] - replacement_value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  position_data['VOR'] = position_data[metric_col] - replacement_value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  position_data['VOR'] = position_data[metric_col] - replacement_valu