need to add year variability and add some variability around who is available (i.e. drafts don't perfectly follow ADP, some people slip and some positions are over and under targeted in certain drafts, move the ADP cutoff up and down randomly)

In [1]:
import pandas as pd
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import random

from stable_baselines3 import DQN, PPO, A2C
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

# uncomment when SB3 & wandb are working together again
# import wandb
# from wandb.integration.sb3 import WandbCallback
# wandb.login()

In [70]:
position_map = {0: 'QB', 1: 'RB', 2: 'WR', 3: 'TE'}

class FantasyFootballEnv(gym.Env):
    def __init__(self, teams=12, rounds=7, year=None, first_round_pick=None, data_first_year=2018, data_last_year=2022):
        super(FantasyFootballEnv, self).__init__()
        
        # Action space: 0: QB, 1: RB, 2: WR, 3: TE
        self.action_space = spaces.Discrete(4)
        
        self.observation_space = spaces.Box(low=0, high=rounds, shape=(5,), dtype=np.int32)
        
        self.data_first_year = data_first_year
        self.data_last_year = data_last_year
        self.teams = teams
        self.rounds = rounds
        if year is not None:
            self.year = year
        else:
            self.year = random.randint(2018, 2022)
        if first_round_pick is not None:
            self.first_round_pick = first_round_pick
        else:
            self.first_round_pick = random.randint(1, teams)
            
        self.player_df = self.create_player_df()
        
        self.flex_count = 0
        
        # Other initializations
        self.current_round = 1
        self.roster = {'QB': [], 'RB': [], 'WR': [], 'TE': []}
        self.position_counts = {
            'QB': 1,
            'RB': 2,
            'WR': 2,
            'TE': 1,
            'FLEX': 1
        }
        self.flex_positions = ['RB', 'WR', 'TE']
        self.drafted_players = []
        
        print(f'Year: {self.year}, First Round Pick: {self.first_round_pick}, Teams: {self.teams}, Rounds: {self.rounds}')
        
    def reset(self, seed=None, year=None, first_round_pick=None):
        
        # https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/#sphx-glr-tutorials-gymnasium-basics-environment-creation-py
        # We need the following line to seed self.np_random
        super().reset(seed=seed)
        
        if year is not None:
            self.year = year
        else:
            self.year = random.randint(2018, 2022)
        if first_round_pick is not None:
            self.first_round_pick = first_round_pick
        else:
            self.first_round_pick = random.randint(1, self.teams)
            
        self.flex_count = 0

        self.current_round = 1
        self.roster = {'QB': [], 'RB': [], 'WR': [], 'TE': []}
        self.drafted_players = []
        
        # Create the initial observation with the current round and counts for each position
        observation = [self.current_round, len(self.roster['QB']), len(self.roster['RB']), len(self.roster['WR']), len(self.roster['TE'])]
                
        # https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/#sphx-glr-tutorials-gymnasium-basics-environment-creation-py
        info = {}
        
        print(f'Year: {self.year}, First Round Pick: {self.first_round_pick}, Teams: {self.teams}, Rounds: {self.rounds}')

                
        return observation, info
    
    def step(self, action):
        
        penalty = False
        done = False
        reward = 0
        penalty_amt = -10
        
        selected_position = position_map[action]
        pick = self.snake_draft_pick(self.teams, self.current_round, self.first_round_pick)
        selected_player, selected_player_points = self.draft_player(selected_position, pick)
        self.roster[selected_position].append(selected_player)
        if selected_position in self.flex_positions:
            if len(self.roster[selected_position]) >= self.position_counts[selected_position]:
                if self.flex_count >= self.position_counts['FLEX']:
                    penalty = True
                self.flex_count += 1
            # for example qb which isn't a flex
            else:
                if len(self.roster[selected_position]) >= self.position_counts[selected_position]:
                    penalty = True
                    
        if penalty:
            reward += penalty_amt
        
        observation = [self.current_round, len(self.roster['QB']), len(self.roster['RB']), len(self.roster['WR']), len(self.roster['TE'])]
        
        info = {}
        info.update({
            'round': self.current_round,
            'pick': pick,
            'adp_adj': self.adp_adj,
            'selected_position': selected_position,
            'selected_player': selected_player,
            'selected_player_points': selected_player_points,
        })
        
        self.current_round += 1
        
        if self.current_round > self.rounds:
            done = True
            total_points = self.calculate_total_points()
            reward += total_points
                
        # https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/#sphx-glr-tutorials-gymnasium-basics-environment-creation-py
        return observation, reward, done, False, info
    
    def create_player_df(self):

        player_adp_df = pd.read_csv(f'adp_{self.data_first_year}_{self.data_last_year}.csv')
        player_adp_df = player_adp_df[player_adp_df['Player'].notna()]
        player_adp_df = player_adp_df[player_adp_df['Year'] == self.year]
        player_adp_df['Player'] = player_adp_df['Player'].apply(lambda x: ' '.join(str(x).split()[:2]))
        
        player_performance_df = pd.read_csv(f'player_performance_{self.data_first_year}_{self.data_last_year}.csv')
        player_performance_df = player_performance_df[player_performance_df['Player'].notna()]
        player_performance_df = player_performance_df[player_performance_df['year'] == self.year]
        player_performance_df['Player'] = player_performance_df['Player'].apply(lambda x: ' '.join(str(x).split()[:2]))
        
        player_df = pd.merge(player_adp_df, player_performance_df, on='Player', how='inner')
        player_df['Position'] = player_df['Position'].apply(lambda x: x[:2])
        player_df= player_df[['Player', 'Position', 'AVG', 'FPTS']]
        
        return player_df
    
    def snake_draft_pick(self, teams, round, first_round_pick):
        if round % 2 == 1:
            return (round - 1) * teams + first_round_pick
        else:
            return round * teams - first_round_pick + 1


    def draft_player(self, selected_position, pick):
    
        if self.current_round == 1:
            adp_adj = random.randint(int(-(self.first_round_pick - 1)/2), int((self.first_round_pick - 1)/2))
        elif self.current_round == 2:
            adp_adj = random.randint(int(-self.teams/2), int(self.teams/2))
        else:
            adp_adj = random.randint(-self.teams, self.teams)
        
        available_players = self.player_df[(self.player_df['Position'].str.startswith(selected_position)) & (self.player_df['AVG'] >= (pick + adp_adj)) & (~self.player_df['Player'].isin(self.drafted_players))]  # Filter out players who have already been drafted
        
        selected_player_df = available_players.nsmallest(1, 'AVG')
        selected_player = selected_player_df['Player'].iloc[0]
        selected_player_points = selected_player_df['FPTS'].iloc[0]
        
        self.drafted_players.append(selected_player)
        self.adp_adj = adp_adj
        
        return selected_player, selected_player_points
    
    # def custom_policy(self):
        

    
    def calculate_total_points(self):
        
        # note this method assumes you play your best player not the order they are drafted
        
        total_points = 0
        
        for pos in self.roster.keys():
            pos_points_list = [self.player_df[self.player_df['Player'] == player]['FPTS'].iloc[0] for player in self.roster[pos]]
            total_points += sum(sorted(pos_points_list, reverse=True)[:self.position_counts[pos]])
            print(f"{pos} Points: {pos_points_list} {sum(sorted(pos_points_list, reverse=True)[:self.position_counts[pos]])}")
            
        flex_points_list = []
        for pos in self.flex_positions:
            pos_points_list = [self.player_df[self.player_df['Player'] == player]['FPTS'].iloc[0] for player in self.roster[pos]]
            pos_points_list = sorted(pos_points_list, reverse=True)[self.position_counts[pos]:]
            flex_points_list+=pos_points_list
        total_points+=sum(flex_points_list)
        
        return total_points

In [4]:
def run_training_job(model_type
                    , use_wandb = 'y', wandb_verbose=2
                    , timesteps=1_000_000
                    # , policy='MultiInputPolicy'
                    , policy='MlpPolicy'
                    # should look into mandating that each pick position is considered
                    , n_eval_episodes=12
                    , vec_envs='n', n_envs=4
                    , sb3_model_verbose=0
                    # DQN
                    , dqn_exploration_final_eps=0.025, dqn_exploration_fraction=0.5
                    # PPO
                    # https://colab.research.google.com/drive/1GI0WpThwRHbl-Fu2RHfczq6dci5GBDVE#scrollTo=FMdJRrZ4n7xp
                    , ppo_n_steps = 1024, ppo_batch_size = 64, ppo_n_epochs = 4, ppo_gamma = 0.999, ppo_gae_lambda = 0.98, ppo_ent_coef = 0.01,
                    ):
    
    config = {
    "policy_type": policy,
    "total_timesteps": timesteps,
    # "env_id": "NflEnv",
    "env_id": "FantasyFootballEnv",
    }

    # https://stable-baselines3.readthedocs.io/en/master/guide/integrations.html
    if use_wandb == 'y':
        run = wandb.init(
            # project="sb3_nfl_2",
            project="sb3_FantasyFootballEnv",
            config=config,
            sync_tensorboard=True
        )

    # when using multiple environments, the total number of steps taken in counts each step taken in each environment
    # if using 4 environments and 400_000 TIMESTEPS, the agent will take a total of 100_000 steps in each environment.
    if vec_envs == 'y':
        # https://colab.research.google.com/github/araffin/rl-tutorial-jnrr19/blob/sb3/5_custom_gym_env.ipynb
        # received 'ValueError: high is out of bounds for int32' without the seed
        # env = make_vec_env(env_id = NflEnv, n_envs=n_envs, seed=1)
        # eval_env = make_vec_env(env_id = NflEnv, n_envs=1, seed=1)
        env = make_vec_env(env_id = FantasyFootballEnv, n_envs=n_envs, seed=1)
        eval_env = make_vec_env(env_id = FantasyFootballEnv, n_envs=1, seed=1)

    elif vec_envs == 'n':
        # env = NflEnv()
        # eval_env = NflEnv()
        env = FantasyFootballEnv()
        eval_env = FantasyFootballEnv()
    
    if use_wandb == 'y':
        if model_type == 'DQN':
            # default values for these parameters are exploration_final_eps=0.05 and exploration_fraction=0.1
            # with the default values, the exploration rate will linearly decrease to 0.05 over the first 10% of the timesteps
            model = DQN(config["policy_type"], env, verbose=sb3_model_verbose, tensorboard_log=f"runs/{run.id}"
                        , exploration_final_eps=dqn_exploration_final_eps, exploration_fraction = dqn_exploration_fraction)
        elif model_type == 'PPO':
            model = PPO(config["policy_type"], env, verbose=sb3_model_verbose, tensorboard_log=f"runs/{run.id}"
                        , n_steps = ppo_n_steps, batch_size = ppo_batch_size, n_epochs = ppo_n_epochs, gamma = ppo_gamma
                        , gae_lambda = ppo_gae_lambda, ent_coef = ppo_ent_coef)
        elif model_type == 'A2C':
            model = A2C(config["policy_type"], env, verbose=sb3_model_verbose, tensorboard_log=f"runs/{run.id}")
    else:
        if model_type == 'DQN':
            model = DQN(config["policy_type"], env, verbose=sb3_model_verbose, exploration_final_eps=dqn_exploration_final_eps
                        , exploration_fraction = dqn_exploration_fraction)
        elif model_type == 'PPO':
            model = PPO(config["policy_type"], env, verbose=sb3_model_verbose
                        , n_steps = ppo_n_steps, batch_size = ppo_batch_size, n_epochs = ppo_n_epochs, gamma = ppo_gamma
                        , gae_lambda = ppo_gae_lambda, ent_coef = ppo_ent_coef)
        elif model_type == 'A2C':
            model = A2C(config["policy_type"], env, verbose=sb3_model_verbose)

    mean_reward, std_reward = evaluate_policy(model=model, env=eval_env, n_eval_episodes=n_eval_episodes)
    print(f"mean_reward before training:{mean_reward:.2f} +/- {std_reward:.2f}")

    if use_wandb == 'y':
        model.learn(
            total_timesteps=config["total_timesteps"],
            callback=WandbCallback(
                model_save_path=f"models/{run.id}",
                verbose=wandb_verbose,
            ),
        )
        run.finish()
    else:
        model.learn(total_timesteps=config["total_timesteps"])

    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=n_eval_episodes)
    print(f"mean_reward after training:{mean_reward:.2f} +/- {std_reward:.2f}")

    # parameters_saved = model.get_parameters()
    
    if vec_envs == 'y':
        model.save(f"models/{model_type}_{timesteps}_vecEnv")
    else:
        model.save(f"models/{model_type}_{timesteps}")
    
    return model

In [11]:
model_type = 'PPO'
time_steps = 250_000

In [12]:
model = run_training_job(model_type,timesteps=time_steps, use_wandb='n')

Year: 2018, First Round Pick: 10, Teams: 12, Rounds: 7
Year: 2018, First Round Pick: 9, Teams: 12, Rounds: 7
Year: 2018, First Round Pick: 11, Teams: 12, Rounds: 7
Year: 2021, First Round Pick: 5, Teams: 12, Rounds: 7
Year: 2018, First Round Pick: 4, Teams: 12, Rounds: 7
Year: 2020, First Round Pick: 1, Teams: 12, Rounds: 7
Year: 2021, First Round Pick: 12, Teams: 12, Rounds: 7
Year: 2019, First Round Pick: 6, Teams: 12, Rounds: 7
Year: 2021, First Round Pick: 10, Teams: 12, Rounds: 7




Year: 2020, First Round Pick: 12, Teams: 12, Rounds: 7
Year: 2022, First Round Pick: 4, Teams: 12, Rounds: 7
Year: 2022, First Round Pick: 4, Teams: 12, Rounds: 7
Year: 2020, First Round Pick: 7, Teams: 12, Rounds: 7
Year: 2021, First Round Pick: 1, Teams: 12, Rounds: 7
Year: 2018, First Round Pick: 9, Teams: 12, Rounds: 7
mean_reward before training:598.15 +/- 62.25
Year: 2022, First Round Pick: 2, Teams: 12, Rounds: 7
Year: 2020, First Round Pick: 9, Teams: 12, Rounds: 7
Year: 2018, First Round Pick: 11, Teams: 12, Rounds: 7
Year: 2021, First Round Pick: 4, Teams: 12, Rounds: 7
Year: 2018, First Round Pick: 10, Teams: 12, Rounds: 7
Year: 2022, First Round Pick: 3, Teams: 12, Rounds: 7
Year: 2020, First Round Pick: 3, Teams: 12, Rounds: 7
Year: 2018, First Round Pick: 5, Teams: 12, Rounds: 7
Year: 2021, First Round Pick: 1, Teams: 12, Rounds: 7
Year: 2019, First Round Pick: 9, Teams: 12, Rounds: 7
Year: 2018, First Round Pick: 9, Teams: 12, Rounds: 7
Year: 2021, First Round Pick: 12, 



Year: 2021, First Round Pick: 5, Teams: 12, Rounds: 7
Year: 2019, First Round Pick: 11, Teams: 12, Rounds: 7
Year: 2019, First Round Pick: 5, Teams: 12, Rounds: 7
Year: 2021, First Round Pick: 7, Teams: 12, Rounds: 7
Year: 2019, First Round Pick: 11, Teams: 12, Rounds: 7
Year: 2021, First Round Pick: 7, Teams: 12, Rounds: 7
Year: 2019, First Round Pick: 8, Teams: 12, Rounds: 7
Year: 2020, First Round Pick: 7, Teams: 12, Rounds: 7
Year: 2018, First Round Pick: 1, Teams: 12, Rounds: 7
Year: 2020, First Round Pick: 11, Teams: 12, Rounds: 7
Year: 2022, First Round Pick: 10, Teams: 12, Rounds: 7
Year: 2021, First Round Pick: 1, Teams: 12, Rounds: 7
Year: 2020, First Round Pick: 6, Teams: 12, Rounds: 7
mean_reward after training:1260.19 +/- 81.83


In [7]:
model = PPO.load(f'models/{model_type}_{time_steps}')

In [43]:
def agent_draft(teams, rounds, model, first_round_pick=None, year=None):
    env = FantasyFootballEnv(teams=teams, rounds=rounds, first_round_pick=first_round_pick, year=year)
    state, info = env.reset(first_round_pick=first_round_pick, year=year)
    for i in range(rounds):
        action, _states = model.predict(state)
        action = int(action)  # If action is an array with a single value that can be directly converted to int
        new_state, reward, done, placeholder, info = env.step(action)
        # print(f'Round {info["round"]}, Pick {info["pick"]}, Selected Position {info["selected_position"]}, Selected Player {info["selected_player"]}')
        print(f'Round {info["round"]}, Pick {info["pick"]}, adp_adj {info["adp_adj"]}, Selected Position {info["selected_position"]}, Selected Player {info["selected_player"]}')
        state = new_state
        if done:
            print(f'Reward: {reward}')

def manual_draft(teams, rounds, actions, first_round_pick= None, year=None):
    env = FantasyFootballEnv(teams=teams, rounds=rounds, first_round_pick=first_round_pick, year=year)
    state, info = env.reset(first_round_pick=first_round_pick, year=year)
    for action in actions:
        new_state, reward, done, placeholder, info = env.step(action)
        # print(f'Round {info["round"]}, Pick {info["pick"]}, Selected Position {info["selected_position"]}, Selected Player {info["selected_player"]}')
        print(f'Round {info["round"]}, Pick {info["pick"]}, adp_adj {info["adp_adj"]}, Selected Position {info["selected_position"]}, Selected Player {info["selected_player"]}')
        state = new_state
        if done:
            print(f'Reward: {reward}')
            
def draft(teams, rounds, first_round_pick=None, year=None, model=None, actions=None):
    env = FantasyFootballEnv(teams=teams, rounds=rounds, first_round_pick=first_round_pick, year=year)
    state, info = env.reset(first_round_pick=first_round_pick, year=year)
    for i in range(rounds):
        if model is not None:
            action, _states = model.predict(state)
            action = int(action)  # If action is an array with a single value that can be directly converted to int
        elif actions is not None:
            action = actions[i]
        new_state, reward, done, placeholder, info = env.step(action)
        print(f'Round {info["round"]}, Pick {info["pick"]}, adp_adj {info["adp_adj"]}, Selected Position {info["selected_position"]}, Selected Player {info["selected_player"]}, points {info["selected_player_points"]}')
        state = new_state
        if done:
            print(f'total score: {reward}')

teams=12
rounds=7

In [71]:
# you're here testing because pick adjust broke things I think

year = 2022
first_round_pick=1

print(f'first round pick: {first_round_pick}\n')
print('run first manual')
# rb first
actions = [1, 1, 1, 2, 2, 3, 0]
draft(teams=teams, rounds=rounds, first_round_pick=first_round_pick, year=year, actions=actions)
print('agent')
draft(teams=teams, rounds=rounds, first_round_pick=first_round_pick, year=year, model=model)

first round pick: 1

run first manual
Year: 2022, First Round Pick: 1, Teams: 12, Rounds: 7
Year: 2022, First Round Pick: 1, Teams: 12, Rounds: 7
Round 1, Pick 1, adp_adj 0, Selected Position RB, Selected Player Jonathan Taylor, points 132.4
Round 2, Pick 24, adp_adj 5, Selected Position RB, Selected Player James Conner, points 177.2
Round 3, Pick 25, adp_adj 3, Selected Position RB, Selected Player Travis Etienne, points 187.6
Round 4, Pick 48, adp_adj -11, Selected Position WR, Selected Player DJ Moore, points 167.6
Round 5, Pick 49, adp_adj 7, Selected Position WR, Selected Player Jerry Jeudy, points 170.7
Round 6, Pick 72, adp_adj -3, Selected Position TE, Selected Player Dallas Goedert, points 113.7
QB Points: [213.6] 213.6
RB Points: [132.4, 177.2, 187.6] 364.79999999999995
WR Points: [167.6, 170.7] 338.29999999999995
TE Points: [113.7] 113.7
Round 7, Pick 73, adp_adj 4, Selected Position QB, Selected Player Dak Prescott, points 213.6
total score: 1162.8
