In [2]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random
import pandas as pd
pd.set_option('display.max_columns', None)


In [3]:
from pathlib import Path
from typing import Dict, List

import os
import pandas as pd
import re 

from utility.constants import *

# Utility: Load files
def load_file(folder, filename):
    """Function to read csv files"""
    file_path = os.path.join(folder, filename)
    if os.path.exists(file_path):
        return pd.read_csv(file_path)
    raise FileNotFoundError(f"File {filename} not found in folder {folder}")

def extract_years(folder_path: Path) -> List[int]:
    """Extract years from CSV file names."""
    years = [int(re.match(r"^\d{4}", file.name).group()) 
                for file in folder_path.glob("*.csv") if re.match(r"^\d{4}", file.name)]
    return sorted(years)

# Load ADP file
def load_adp_file(adp_dir, given_year=None):
    """Load ADP file at Random year; else use given year"""

    if given_year == None:
        years = extract_years(adp_dir)
        year = random.choice(years)
        print(f"Randomly selected ADP year of:", year)

    else: 
        year = given_year

    file_name = f"{year}ADP.csv"
    adp_df = load_file(ADP_DIR, file_name)
    adp_df['year'] = year   
    return adp_df

# Load stats
def load_seasonal_stats(seasonal_stats_dir, year):
    return load_file(seasonal_stats_dir, f"player_stats_{year}.csv")

def load_defensive_stats(defensive_stats_dir, year):
    return load_file(defensive_stats_dir, f"seasonal_defensive_stats_{year}.csv")

# Merge stats into ADP
def merge_stats(adp_df, seasonal_stats_df, defensive_stats_df):
    adp_df = adp_df.merge(
        seasonal_stats_df[["player_id", "fppr"]], on="player_id", how="left"
    )
    defensive_stats_df = defensive_stats_df.rename(columns={"pa_team": "player_id", "fpts": "def_fpts"})
    adp_df = adp_df.merge(
        defensive_stats_df[["player_id", "def_fpts"]], on="player_id", how="left"
    )
    adp_df["fpts"] = adp_df.apply(
        lambda row: row["def_fpts"] if row["POSITION"] == "DST" else row["fppr"], axis=1
    )
    return adp_df

# Select player with weighted probabilities
def select_player_with_weights(players, weights):
    """
    Randomly select a player based on weighted probabilities.
    """
    return random.choices(players.to_dict("records"), weights=weights[:len(players)], k=1)[0]


In [4]:
def get_min_player_count_by_position(adp_dir, dstats_dir):
    """
    Get the minimum count of players for each position across all ADP season data
    and the minimum count of teams (DST) across all years.

    Args:
        adp_dir (Path): Directory containing ADP data files.
        dstats_dir (Path): Directory containing defensive stats files.

    Returns:
        dict: A dictionary where keys are positions and 'DST',
              and values are the minimum number of players or teams available across all years.
    """
    years = extract_years(adp_dir)
    position_counts = {}
    dst_counts = []  # List to store the count of teams (DST) for each year

    for year in years:
        # Load ADP data for the year
        df = load_adp_file(adp_dir, year)
        position_grouped = df.groupby("POSITION").size()  # Count players per position

        # Load defensive stats (DST) data for the year
        ddf = load_defensive_stats(dstats_dir, year)
        team_count = ddf['pa_team'].nunique()  # Count unique teams in defensive stats
        dst_counts.append(team_count)

        # Collect position-specific counts
        for position, count in position_grouped.items():
            if position not in position_counts:
                position_counts[position] = []
            position_counts[position].append(count)

    # Get the minimum count for each position across all years
    min_counts = {position: min(counts) for position, counts in position_counts.items()}

    # Add DST (team count) to the results
    min_counts["DST"] = min(dst_counts)

    return min_counts


In [7]:
def filter_top_players_by_position(data_df, top_players):
    """
    Filter the top players for each position based on FPPRAVG (lower is better).
    
    Args:
    - data_df (pd.DataFrame): The original data frame containing player data.
    - top_players (dict): Dictionary specifying the number of top players to retain for each position.
    
    Returns:
    - pd.DataFrame: Filtered data containing only the top players for each position.
    """
    filtered_data = pd.DataFrame()
    for position, top_n in top_players.items():
        position_data = data_df[data_df["POSITION"] == position]
        # Sort by FPPRAVG (ascending) and select the top N players
        top_position_data = position_data.sort_values("FPPRAVG").head(top_n)
        filtered_data = pd.concat([filtered_data, top_position_data])
        filtered_data = filtered_data.sort_values(by='FPPRAVG')
    return filtered_data

In [11]:
adp_df = load_adp_file(ADP_DIR)
year = adp_df['year'].iloc[0]
seasonal_stats_df = load_seasonal_stats(SEASONAL_STATS_DIR, year)
defensive_stats_df = load_defensive_stats(DEFENSIVE_STATS_DIR, year)
data_df = merge_stats(adp_df, seasonal_stats_df, defensive_stats_df)

# Sort players by FPPRAVG
data_df = data_df.sort_values(by="FPPRAVG").reset_index(drop=True)
min_player_dict = get_min_player_count_by_position(ADP_DIR, DEFENSIVE_STATS_DIR)
data_df2 = filter_top_players_by_position(data_df, min_player_dict)

Randomly selected ADP year of: 2021


In [12]:
def calculate_tiers_by_metric_geo(data_df, metric_col='FPPRAVG', num_tiers=5, r=2):
    """
    Calculate tier proportions using corrected geometric progression for ascending metric.

    Args:
    - data_df (pd.DataFrame): DataFrame containing player data.
    - metric_col (str): Column name for the performance metric (e.g., 'FPPRAVG').
    - num_tiers (int): Number of tiers to create.
    - r (float): Common ratio for geometric progression (default=2).

    Returns:
    - pd.DataFrame: DataFrame with an additional "Tier" column.
    - list: Number of players in each tier.
    """
    # Sort the dataframe by the metric column in ascending order (better performance first)
    sorted_data = data_df.sort_values(by=metric_col, ascending=True).reset_index(drop=True)
    
    # Calculate reversed geometric progression weights
    weights = [r ** (i - 1) for i in range(1, num_tiers + 1)]
    
    # Normalize weights to sum to 1
    normalized_weights = [w / sum(weights) for w in weights]
    
    # Calculate the total number of players
    total_players = len(sorted_data)
    
    # Allocate players to tiers based on normalized weights
    players_per_tier = [int(total_players * w) for w in normalized_weights]
    
    # Adjust to ensure the total number of players matches exactly
    players_per_tier[-1] += total_players - sum(players_per_tier)
    
    # Assign tiers to players
    tier_assignments = []
    start_idx = 0
    for tier, count in enumerate(players_per_tier, start=1):
        end_idx = start_idx + count
        tier_assignments.extend([tier] * count)
        start_idx = end_idx

    sorted_data['Tier'] = tier_assignments
    return sorted_data, players_per_tier


data_df3, players_per_tier_geo = calculate_tiers_by_metric_geo(data_df2, metric_col='FPPRAVG', r=2)

print("Players per Tier (Geometric Progression):", players_per_tier_geo)


Players per Tier (Geometric Progression): [11, 22, 44, 89, 182]


In [13]:
def calculate_scaled_tier_weights_with_std(data_df, tier_col='Tier', metric_col='fpts', max_weight=10, scaling_factor=1.2):
    """
    Calculate and scale tier weights using mean and std deviation for tiers.

    Args:
    - data_df (pd.DataFrame): DataFrame containing tier and performance data.
    - tier_col (str): Column name for tier information (e.g., 'Tier').
    - metric_col (str): Column name for the performance metric (e.g., 'fpts').
    - max_weight (float): Maximum weight for Tier 1.
    - scaling_factor (float): Factor to control non-linearity.

    Returns:
    - dict: Scaled tier weights with a maximum of 10.
    """
    # Group by tier and calculate mean and std deviation
    tier_stats = data_df.groupby(tier_col)[metric_col].agg(['mean', 'std'])

    # Normalize mean to Tier 1
    tier_1_mean = tier_stats['mean'].max()
    normalized_means = tier_stats['mean'] / tier_1_mean

    # Adjust weights based on normalized mean and penalize by std deviation
    adjusted_weights = {
        tier: (normalized_means[tier] ** scaling_factor) * (1 / (1 + tier_stats.loc[tier, 'std'] / 100)) * max_weight
        for tier in tier_stats.index
    }
    
    # Scale weights so that the maximum is 10
    max_calculated_weight = max(adjusted_weights.values())
    scaled_weights = {tier: round((weight / max_calculated_weight) * 10, 2) for tier, weight in adjusted_weights.items()}
    
    return scaled_weights

# Example usage
tier_weights = calculate_scaled_tier_weights_with_std(
    data_df3, tier_col='Tier', metric_col='fpts', max_weight=10, scaling_factor=1.2
)

print("Scaled Tier Weights with Mean and Std Adjustment:", tier_weights)


Scaled Tier Weights with Mean and Std Adjustment: {1: np.float64(10.0), 2: np.float64(8.22), 3: np.float64(7.43), 4: np.float64(5.45), 5: np.float64(3.15)}


In [14]:
def calculate_scarcity_dynamic(data_df, tier_weights, position_col='POSITION', tier_col='Tier', use_top_tier=True):
    """
    Calculate scarcity for each position based on remaining players and tier weights, using top-tier or total weights.

    Args:
    - data_df (pd.DataFrame): DataFrame with columns for position, tier, and remaining players.
    - tier_weights (dict): Weights assigned to each tier (e.g., {1: 10, 2: 7, ...}).
    - position_col (str): Column name for position information.
    - tier_col (str): Column name for tier information.
    - use_top_tier (bool): Whether to divide by the top-tier weight only or total tier weights.

    Returns:
    - dict: Scarcity scores for each position.
    """
    scarcity_scores = {}
    max_tier_weight = max(tier_weights.values())  # Weight for the top tier
    total_tier_weight = sum(tier_weights.values())  # Sum of all tier weights

    for position in data_df[position_col].unique():
        position_data = data_df[data_df[position_col] == position]
        scarcity = 0

        for tier in position_data[tier_col].unique():
            tier_data = position_data[position_data[tier_col] == tier]
            remaining_players = len(tier_data)  # Count remaining players in this tier
            tier_weight = tier_weights[tier]

            # Add the tier contribution to scarcity
            scarcity += remaining_players * tier_weight

        # Normalize scarcity by the chosen reference weight
        reference_weight = max_tier_weight if use_top_tier else total_tier_weight
        scarcity_scores[position] = scarcity / reference_weight

    return scarcity_scores

# Example setup
# tier_weights = {1: 10, 2: 7, 3: 5, 4: 3, 5: 2}  # Define weights for each tier


# Calculate scarcity using top-tier normalization
scarcity_top_tier = calculate_scarcity_dynamic(data_df3, tier_weights, use_top_tier=True)

# Calculate scarcity using total tier weights normalization
scarcity_total_tier = calculate_scarcity_dynamic(data_df3, tier_weights, use_top_tier=False)

print("Scarcity (Top Tier Reference):", scarcity_top_tier)
print("Scarcity (Total Tier Reference):", scarcity_total_tier)

Scarcity (Top Tier Reference): {'RB': np.float64(50.663), 'WR': np.float64(55.165), 'TE': np.float64(19.603), 'QB': np.float64(19.285000000000004), 'DST': np.float64(12.379999999999999), 'K': np.float64(10.515)}
Scarcity (Total Tier Reference): {'RB': np.float64(14.792116788321168), 'WR': np.float64(16.106569343065694), 'TE': np.float64(5.723503649635036), 'QB': np.float64(5.63065693430657), 'DST': np.float64(3.6145985401459853), 'K': np.float64(3.07007299270073)}


In [3]:
class DraftEnvironment(gym.Env):
    def __init__(self, adp_dir, stats_dir, dstats_dir, num_teams=NUM_MANAGERS, num_rounds=TOTAL_NUM_ROUNDS):
        """
        Initialize the Draft Environment.

        Args:
            adp_dir (Path): Directory containing ADP data files.
            stats_dir (Path): Directory containing player stats files.
            dstats_dir (Path): Directory containing defense stats files.
            num_teams (int): Number of teams participating in the draft.
            roster_size (int): Number of players each team can draft.

        """
        super().__init__() # Initilize the parent gym.Env class

        # Save input arguments as instance attributes
        self.adp_dir = adp_dir
        self.stats_dir = stats_dir
        self.dstats_dir = dstats_dir  
        self.num_teams = num_teams
        self.num_rounds = num_rounds
        
        # Load and process data
        self.adp_data = load_adp_file(adp_dir)
        self.year = self.adp_data["year"].iloc[0]
        self.offensive_stats = load_seasonal_stats(stats_dir, self.year)
        self.deffensive_stats = load_defensive_stats(dstats_dir, self.year)
        
        self.player_pool = merge_stats(self.adp_data, self.offensive_stats, self.deffensive_stats)
        
        # Sort players by FPPRAVG
        self.player_pool = self.player_pool.sort_values(by="FPPRAVG").reset_index(drop=True)
        # Round FFPRAVG to 2 decimal
        self.player_pool["fpts"] = self.player_pool["fpts"].round(2)

        # Action space: The agent can pick any available player (index-based)
        self.action_space = spaces.Discrete(len(self.player_pool))  # Placeholder, update with actual player pool size. [UPDATED] Action Space should be updated based on the amount of players avaialble. Therefore, I used self.player_pool for now.

        # Observation space: Binary mask for available players + agent roster counts
        self.observation_space = spaces.Dict({
            "available_player_name": spaces.MultiBinary(self.player_pool["Player_Name"]),  # [TODO] Agent needs to see current Draft Board with all player name, player ID, Scarcity, Player_Tier
            "available_player_id": ,
            "Scarcity": ,
            "Player_Tier" ,
            "agent_roster": spaces.MultiDiscrete(),  # QB, RB, WR, TE max counts
            "current_round": spaces.Discrete(self.round_num), #self.current_round needs to be defined.
            "current_pick" : spaces.Discrete(self.current_pick), #self.current_pick needs to be defined.
        })

        # State variables (initialized in reset())
        self.state = pd.DataFrame(columns=["trial_number", "round", "Overall_pick", "Player_name", "player_id", "position", "fpts", "year"])
        self.Trial_num = 0
        self.round_num = 0
        self.pick_num = 0
        self.required_positions = {}
        self.team_counts = {}
        self.draft_order = []
    # Randomize Draft Order
    def randomize_draft_order(self):
        self.draft_order = list(range(1, self.num_teams + 1))
        random.shuffle(self.draft_order)        

    #Simulate one round of the draft
    def simulate_round(self):
        current_order = self.draft_order if self.round_num % 2 != 0 else self.draft_order[::-1]
        
        for manager in current_order:
            self.team_name = f"Team_{manager}"
            if self.team_name == "Team_1":
                # Agent's pick
                action = self.current_action  # Provided externally during step
                self.selected_player = self.select_player(action)
            else:
                # Bot's pick
                self.selected_player = self.select_bot_player()
            # Update team and player pool after the pick
            self.process_pick()

    #Select a player based on the action index
    def select_player(self, action):
        assert 0 <= action < len(self.player_pool),
        return self.player_pool.iloc[action]

    #Selects a player for the bot based on constraints and weighted logic
    def select_bot_player(self):

        unmet_positions = [pos for pos, count in self.required_positions[self.team_name].items() if count > 0]
        available_players = self.player_pool[
            self.player_pool["POSITION"].apply(lambda pos: self.team_counts[self.team_name][pos] < POSITION_LIMITS[pos])
        ]

        # Apply constraints
        if unmet_positions:
            available_players = available_players[available_players["POSITION"].isin(unmet_positions)]
        
        # Weighted selection logic
        if self.round_num <= 3:
            top_players = available_players.head(5)
            return select_player_with_weights(top_players, ROUND_1_3_WEIGHTS)
        else:
            top_players = available_players.head(6)
            return select_player_with_weights(top_players, ROUND_4_16_WEIGHTS)
    
    # Processes a pick by updating the team state and player pool
    def process_pick(self):
        position = self.selected_player["POSITION"]
        self.team_counts[self.team_name][position] += 1

        if self.required_positions[self.team_name][position] > 0:
            self.required_positions[self.team_name][position] -= 1

        self.state.append({
            "trial_number": self.trial_number,
            "round": self.round_num,
            "overall_pick": self.pick_num,
            "team_name": self.team_name,
            "Player_name": self.selected_player["player_name"],
            "player_id": self.selected_player["player_id"],
            "position": position,
            "fpts": self.selected_player["fpts"],
            "year": self.year
        })

        # Remove the selected player from the player pool
        self.player_pool = self.player_pool[self.player_pool["player_id"] != self.selected_player["player_id"]].reset_index(drop=True)

        # Increment the pick number
        self.pick_num += 1

    
    def step(self, action):
        """Executes one step (one pick) of the draft."""
        # Set the current action for the agent
        self.current_action = action

        # Simulate a single pick for the current round and team
        self.team_name = f"Team_{self.draft_order[self.pick_num - 1]}"
        if self.team_name == "Team_1":  # Agent's turn
            self.selected_player = self.select_player(action)
        else:  # Bot's turn
            self.selected_player = self.select_bot_player()

        # Process the pick
        self.process_pick()

        # Check if we need to move to the next round
        if self.pick_num > self.num_teams:
            self.reset_round()

        # Check if the draft is complete
        done = self.round_num > self.num_rounds

        # Calculate reward only at the end of the draft
        reward = self.calculate_final_reward() if done else 0

        # Return observation, reward, done, and info
        return self.get_observation(), reward, done, {}



    
        # Calculate the reward - ONLY AFTER 16 ROUND HAS BEEN COMPLETED VS AFTER EACH ROUND

            # [Placeholder] Based on the player selected, need to bring in Year and fpts for the pick.
            # [Placeholder] Add in Starting Roster Update
            # [Placedholer] Add in Waiver Updates
        
        reward = 1 if 37 <= self.state <= 39 else -1

        # Determine if the episode is done
        terminated = False  # Use this for goal or failure logic
        truncated = self.shower_length <= 0  # Use this for time limit logic

        # Add noise to the state
        self.state += random.randint(-1, 1)

        # Return the observation, reward, terminated, truncated, and info
        info = {}
        return np.array([self.state], dtype=np.float32), reward, terminated, truncated, info
    
    def reset_round(self):
        self.pick_num = 1
        self.round_num += 1

    
    def render(self):
        # implement viz
        print(f"Current temperature: {self.state}")

    def close(self):
        pass

In [4]:
env = TestEnv()

In [6]:
episodes = 10
for ep in range(1, episodes + 1):
    state, info = env.reset()  # Unpack the tuple from reset
    terminated = False
    truncated = False
    score = 0

    while not (terminated or truncated):  # Episode ends if either is True
        action = env.action_space.sample()
        n_state, reward, terminated, truncated, info = env.step(action)  # Unpack all values
        score += reward

    print(f"Episodes: {ep}, Score: {score}")

Episodes: 1, Score: -34
Episodes: 2, Score: -32
Episodes: 3, Score: -48
Episodes: 4, Score: -58
Episodes: 5, Score: -50
Episodes: 6, Score: -48
Episodes: 7, Score: -54
Episodes: 8, Score: -22
Episodes: 9, Score: -54
Episodes: 10, Score: -36


## RL

In [7]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

In [10]:
env = TestEnv()

In [11]:
# Check the environment for compatibility with Stable-Baselines3
check_env(env, warn=True)

In [12]:
import os
import time

models = "PPO"
models_dir = f"models/{models}-{int(time.time())}"
logdir = f"logs/{models}-{int(time.time())}"
ITER_COUNT = 100

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)

In [13]:

# Define the PPO model
model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=logdir)

# Training parameters
TIMESTEPS = 100000
ITER_COUNT = 5  # Number of iterations for training

# Train and save models
for i in range(1, ITER_COUNT + 1):
    model.learn(total_timesteps=TIMESTEPS, reset_num_timesteps=False, tb_log_name=f"PPO-{TIMESTEPS}")
    model.save(f"{models_dir}/PPO_{TIMESTEPS * i}")

# Test the trained model
episodes = 10
for ep in range(1, episodes + 1):
    state, info = env.reset()
    terminated = False
    truncated = False
    score = 0

    while not (terminated or truncated):
        action, _ = model.predict(state)
        state, reward, terminated, truncated, info = env.step(action)
        score += reward
        env.render()  # Optional: Render the environment for debugging

    print(f"Episode: {ep}, Score: {score}")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs/PPO-1735042104/PPO-100000_0
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 60       |
|    ep_rew_mean     | -27.9    |
| time/              |          |
|    fps             | 7715     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 60          |
|    ep_rew_mean          | -35.6       |
| time/                   |             |
|    fps                  | 4903        |
|    iterations           | 2           |
|    time_elapsed         | 0           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.012675095 |
|    clip_fraction        | 0.0888      |
|    clip_range   