In [84]:
import pandas as pd
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import random

from stable_baselines3 import DQN, PPO, A2C
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

# uncomment when SB3 & wandb are working together again
# import wandb
# from wandb.integration.sb3 import WandbCallback
# wandb.login()

In [134]:
position_map = {0: 'QB', 1: 'RB', 2: 'WR', 3: 'TE'}

player_adp_df = pd.read_csv('adp_2022.csv')
player_performance_df = pd.read_csv('player_performance_2022.csv')
player_adp_df['Player'] = player_adp_df['Player'].apply(lambda x: ' '.join(x.split()[:2]))
player_performance_df['Player'] = player_performance_df['Player'].apply(lambda x: ' '.join(x.split()[:2]))
player_df = pd.merge(player_adp_df, player_performance_df, on='Player', how='inner')
player_df['Position'] = player_df['Position'].apply(lambda x: x[:2])
player_df= player_df[['Player', 'Position', 'AVG', 'FPTS']]


class FantasyFootballEnv(gym.Env):
    def __init__(self, teams=12, rounds=7, first_round_pick=None):
        super(FantasyFootballEnv, self).__init__()
        
        # Action space: 0: QB, 1: RB, 2: WR, 3: TE
        self.action_space = spaces.Discrete(4)
        
        self.observation_space = spaces.Box(low=0, high=rounds, shape=(5,), dtype=np.int32)
        
        self.teams = teams
        self.rounds = rounds
        if first_round_pick is not None:
            self.first_round_pick = first_round_pick
        else:
            self.first_round_pick = random.randint(1, teams)
        
        self.flex_count = 0
        
        # Other initializations
        self.current_round = 1
        self.roster = {'QB': [], 'RB': [], 'WR': [], 'TE': []}
        self.drafted_players = []
            
    # def reset(self):
    def reset(self, seed=None, first_round_pick=None):
        
        # https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/#sphx-glr-tutorials-gymnasium-basics-environment-creation-py
        # We need the following line to seed self.np_random
        super().reset(seed=seed)
        
        if first_round_pick is not None:
            self.first_round_pick = first_round_pick
        else:
            self.first_round_pick = random.randint(1, self.teams)

        self.current_round = 1
        self.roster = {'QB': [], 'RB': [], 'WR': [], 'TE': []}
        self.drafted_players = []
        
        # Create the initial observation with the current round and counts for each position
        observation = [self.current_round, len(self.roster['QB']), len(self.roster['RB']), len(self.roster['WR']), len(self.roster['TE'])]
                
        # https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/#sphx-glr-tutorials-gymnasium-basics-environment-creation-py
        info = {}
                
        return observation, info
    
    def step(self, action):
        
        penalty = False
        done = False
        reward = 0
        penalty_amt = -10
        
        selected_position = position_map[action]
        pick = self.snake_draft_pick(self.teams, self.current_round, self.first_round_pick)
        selected_player = self.draft_player(selected_position, pick)
        self.roster[selected_position].append(selected_player)
        position_counts = {
            'QB': 1,
            'RB': 2,
            'WR': 2,
            'TE': 1,
            'FLEX': 1
        }
        flex_positions = ['RB', 'WR', 'TE']
        if selected_position in flex_positions:
            if len(self.roster[selected_position]) >= position_counts[selected_position]:
                if self.flex_count >= position_counts['FLEX']:
                    penalty = True
                self.flex_count += 1
            # for example qb which isn't a flex
            else:
                if len(self.roster[selected_position]) >= position_counts[selected_position]:
                    penalty = True
                    
        if penalty:
            reward += penalty_amt
        
        # Create the new observation with the current round and counts for each position
        observation = [self.current_round, len(self.roster['QB']), len(self.roster['RB']), len(self.roster['WR']), len(self.roster['TE'])]
        
        info = {}
        info.update({
            'round': self.current_round,
            'pick': pick,
            'selected_position': selected_position,
            'selected_player': selected_player,
        })
        
        # Update the current round
        self.current_round += 1
        
        if self.current_round > self.rounds:
            done = True
            total_points = self.calculate_total_points()
            reward += total_points
                
        # https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/#sphx-glr-tutorials-gymnasium-basics-environment-creation-py
        return observation, reward, done, False, info
    
    # def snake_draft_pick(teams, round, first_round_pick):
    def snake_draft_pick(self, teams, round, first_round_pick):
        if round % 2 == 1:
            return (round - 1) * teams + first_round_pick
        else:
            return round * teams - first_round_pick + 1


    def draft_player(self, selected_position, pick):
    
        # available_players = player_df[(player_df['Position'].str.startswith(selected_position)) & (player_df['AVG'] >= pick)]
        available_players = player_df[(player_df['Position'].str.startswith(selected_position)) & (player_df['AVG'] >= pick) & (~player_df['Player'].isin(self.drafted_players))]  # Filter out players who have already been drafted

        
        selected_player = available_players.nsmallest(1, 'AVG')
        selected_player = selected_player['Player'].iloc[0]
        
        self.drafted_players.append(selected_player)
        
        return selected_player
    
    def calculate_total_points(self):
        
        # note this method assumes you play your best player not the order they are drafted
        
        total_points = 0

        # Select the highest-scoring QB
        qb_points = [player_df[player_df['Player'] == player]['FPTS'].iloc[0] for player in self.roster['QB']]
        total_points += max(qb_points, default=0)
        # Select the two highest-scoring RBs
        rb_points = [player_df[player_df['Player'] == player]['FPTS'].iloc[0] for player in self.roster['RB']]
        total_points += sum(sorted(rb_points, reverse=True)[:2])
        # Select the two highest-scoring WRs
        wr_points = [player_df[player_df['Player'] == player]['FPTS'].iloc[0] for player in self.roster['WR']]
        total_points += sum(sorted(wr_points, reverse=True)[:2])
        # Select the highest-scoring TE
        te_points = [player_df[player_df['Player'] == player]['FPTS'].iloc[0] for player in self.roster['TE']]
        total_points += max(te_points, default=0)
        # Select the highest-scoring player for the Flex position from the remaining RBs, WRs, and TEs
        remaining_rbs = sorted(rb_points[2:], reverse=True)
        remaining_wrs = sorted(wr_points[2:], reverse=True)
        remaining_tes = sorted(te_points[1:], reverse=True)
        remaining_flex = remaining_rbs + remaining_wrs + remaining_tes
        # total_points += max(remaining_flex, default=0)
        flex_points = max(remaining_flex, default=0)
        total_points += flex_points
        return total_points

In [95]:
def run_training_job(model_type
                    , use_wandb = 'y', wandb_verbose=2
                    , timesteps=1_000_000
                    # , policy='MultiInputPolicy'
                    , policy='MlpPolicy'
                    # should look into mandating that each pick position is considered
                    , n_eval_episodes=12
                    , vec_envs='n', n_envs=4
                    , sb3_model_verbose=0
                    # DQN
                    , dqn_exploration_final_eps=0.025, dqn_exploration_fraction=0.5
                    # PPO
                    # https://colab.research.google.com/drive/1GI0WpThwRHbl-Fu2RHfczq6dci5GBDVE#scrollTo=FMdJRrZ4n7xp
                    , ppo_n_steps = 1024, ppo_batch_size = 64, ppo_n_epochs = 4, ppo_gamma = 0.999, ppo_gae_lambda = 0.98, ppo_ent_coef = 0.01,
                    ):
    
    config = {
    "policy_type": policy,
    "total_timesteps": timesteps,
    # "env_id": "NflEnv",
    "env_id": "FantasyFootballEnv",
    }

    # https://stable-baselines3.readthedocs.io/en/master/guide/integrations.html
    if use_wandb == 'y':
        run = wandb.init(
            # project="sb3_nfl_2",
            project="sb3_FantasyFootballEnv",
            config=config,
            sync_tensorboard=True
        )

    # when using multiple environments, the total number of steps taken in counts each step taken in each environment
    # if using 4 environments and 400_000 TIMESTEPS, the agent will take a total of 100_000 steps in each environment.
    if vec_envs == 'y':
        # https://colab.research.google.com/github/araffin/rl-tutorial-jnrr19/blob/sb3/5_custom_gym_env.ipynb
        # received 'ValueError: high is out of bounds for int32' without the seed
        # env = make_vec_env(env_id = NflEnv, n_envs=n_envs, seed=1)
        # eval_env = make_vec_env(env_id = NflEnv, n_envs=1, seed=1)
        env = make_vec_env(env_id = FantasyFootballEnv, n_envs=n_envs, seed=1)
        eval_env = make_vec_env(env_id = FantasyFootballEnv, n_envs=1, seed=1)

    elif vec_envs == 'n':
        # env = NflEnv()
        # eval_env = NflEnv()
        env = FantasyFootballEnv()
        eval_env = FantasyFootballEnv()
    
    if use_wandb == 'y':
        if model_type == 'DQN':
            # default values for these parameters are exploration_final_eps=0.05 and exploration_fraction=0.1
            # with the default values, the exploration rate will linearly decrease to 0.05 over the first 10% of the timesteps
            model = DQN(config["policy_type"], env, verbose=sb3_model_verbose, tensorboard_log=f"runs/{run.id}"
                        , exploration_final_eps=dqn_exploration_final_eps, exploration_fraction = dqn_exploration_fraction)
        elif model_type == 'PPO':
            model = PPO(config["policy_type"], env, verbose=sb3_model_verbose, tensorboard_log=f"runs/{run.id}"
                        , n_steps = ppo_n_steps, batch_size = ppo_batch_size, n_epochs = ppo_n_epochs, gamma = ppo_gamma
                        , gae_lambda = ppo_gae_lambda, ent_coef = ppo_ent_coef)
        elif model_type == 'A2C':
            model = A2C(config["policy_type"], env, verbose=sb3_model_verbose, tensorboard_log=f"runs/{run.id}")
    else:
        if model_type == 'DQN':
            model = DQN(config["policy_type"], env, verbose=sb3_model_verbose, exploration_final_eps=dqn_exploration_final_eps
                        , exploration_fraction = dqn_exploration_fraction)
        elif model_type == 'PPO':
            model = PPO(config["policy_type"], env, verbose=sb3_model_verbose
                        , n_steps = ppo_n_steps, batch_size = ppo_batch_size, n_epochs = ppo_n_epochs, gamma = ppo_gamma
                        , gae_lambda = ppo_gae_lambda, ent_coef = ppo_ent_coef)
        elif model_type == 'A2C':
            model = A2C(config["policy_type"], env, verbose=sb3_model_verbose)

    mean_reward, std_reward = evaluate_policy(model=model, env=eval_env, n_eval_episodes=n_eval_episodes)
    print(f"mean_reward before training:{mean_reward:.2f} +/- {std_reward:.2f}")

    if use_wandb == 'y':
        model.learn(
            total_timesteps=config["total_timesteps"],
            callback=WandbCallback(
                model_save_path=f"models/{run.id}",
                verbose=wandb_verbose,
            ),
        )
        run.finish()
    else:
        model.learn(total_timesteps=config["total_timesteps"])

    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=n_eval_episodes)
    print(f"mean_reward after training:{mean_reward:.2f} +/- {std_reward:.2f}")

    # parameters_saved = model.get_parameters()
    
    if vec_envs == 'y':
        model.save(f"models/{model_type}_{timesteps}_vecEnv")
    else:
        model.save(f"models/{model_type}_{timesteps}")
    
    return model

In [96]:
model = run_training_job('PPO',timesteps=1_000_000, use_wandb='n')



mean_reward before training:873.94 +/- 71.96




mean_reward after training:1517.58 +/- 99.54


In [130]:
def agent_draft(teams, rounds, first_round_pick, model):
    env = FantasyFootballEnv(teams=teams, rounds=rounds, first_round_pick=first_round_pick)
    state, info = env.reset(first_round_pick=first_round_pick)
    for i in range(rounds):
        action, _states = model.predict(state)
        action = int(action)  # If action is an array with a single value that can be directly converted to int
        new_state, reward, done, placeholder, info = env.step(action)
        print(f'Round {info["round"]}, Pick {info["pick"]}, Selected Position {info["selected_position"]}, Selected Player {info["selected_player"]}')
        state = new_state
        if done:
            print(f'Reward: {reward}')

def manual_draft(teams, rounds, first_round_pick, actions):
    env = FantasyFootballEnv(teams=teams, rounds=rounds, first_round_pick=first_round_pick)
    state, info = env.reset()
    for action in actions:
        new_state, reward, done, placeholder, info = env.step(action)
        print(f'Round {info["round"]}, Pick {info["pick"]}, Selected Position {info["selected_position"]}, Selected Player {info["selected_player"]}')
        state = new_state
        if done:
            print(f'Reward: {reward}')

teams=12
rounds=7
# rb first
actions = [1, 1, 1, 2, 2, 3, 0]
# manual_draft(teams=teams, rounds=rounds, first_round_pick=first_round_pick, actions=actions)
# agent_draft(teams=teams, rounds=rounds, first_round_pick=first_round_pick, model=model)

In [142]:
first_round_pick=1
print(f'first round pick: {first_round_pick}\n')
print('run first manual')
manual_draft(teams=teams, rounds=rounds, first_round_pick=first_round_pick, actions=actions)
print()
print('agent')
agent_draft(teams=teams, rounds=rounds, first_round_pick=first_round_pick, model=model)

first round pick: 1

run first manual
Round 1, Pick 4, Selected Position RB, Selected Player Derrick Henry
Round 2, Pick 21, Selected Position RB, Selected Player Leonard Fournette
Round 3, Pick 28, Selected Position RB, Selected Player James Conner
Round 4, Pick 45, Selected Position WR, Selected Player Diontae Johnson
Round 5, Pick 52, Selected Position WR, Selected Player DK Metcalf
Round 6, Pick 69, Selected Position TE, Selected Player Dallas Goedert
Round 7, Pick 76, Selected Position QB, Selected Player Dak Prescott
Reward: 1300.9

agent
Round 1, Pick 1, Selected Position TE, Selected Player Travis Kelce
Round 2, Pick 24, Selected Position WR, Selected Player Michael Pittman
Round 3, Pick 25, Selected Position QB, Selected Player Patrick Mahomes
Round 4, Pick 48, Selected Position RB, Selected Player Josh Jacobs
Round 5, Pick 49, Selected Position RB, Selected Player AJ Dillon
Round 6, Pick 72, Selected Position WR, Selected Player Adam Thielen
Round 7, Pick 73, Selected Positio

In [143]:
first_round_pick=7
print(f'first round pick: {first_round_pick}\n')
print('run first manual')
manual_draft(teams=teams, rounds=rounds, first_round_pick=first_round_pick, actions=actions)
print()
print('agent')
agent_draft(teams=teams, rounds=rounds, first_round_pick=first_round_pick, model=model)

first round pick: 7

run first manual
Round 1, Pick 1, Selected Position RB, Selected Player Jonathan Taylor
Round 2, Pick 24, Selected Position RB, Selected Player Ezekiel Elliott
Round 3, Pick 25, Selected Position RB, Selected Player James Conner
Round 4, Pick 48, Selected Position WR, Selected Player Diontae Johnson
Round 5, Pick 49, Selected Position WR, Selected Player Jaylen Waddle
Round 6, Pick 72, Selected Position TE, Selected Player Dawson Knox
Round 7, Pick 73, Selected Position QB, Selected Player Dak Prescott
Reward: 1216.4

agent
Round 1, Pick 7, Selected Position TE, Selected Player Travis Kelce
Round 2, Pick 18, Selected Position WR, Selected Player Tyreek Hill
Round 3, Pick 31, Selected Position QB, Selected Player Patrick Mahomes
Round 4, Pick 42, Selected Position RB, Selected Player David Montgomery
Round 5, Pick 55, Selected Position WR, Selected Player Jerry Jeudy
Round 6, Pick 66, Selected Position RB, Selected Player Clyde Edwards-Helaire
Round 7, Pick 79, Sele

In [144]:
first_round_pick=12
print(f'first round pick: {first_round_pick}\n')
print('run first manual')
manual_draft(teams=teams, rounds=rounds, first_round_pick=first_round_pick, actions=actions)
print()
print('agent')
agent_draft(teams=teams, rounds=rounds, first_round_pick=first_round_pick, model=model)

first round pick: 12

run first manual
Round 1, Pick 1, Selected Position RB, Selected Player Jonathan Taylor
Round 2, Pick 24, Selected Position RB, Selected Player Ezekiel Elliott
Round 3, Pick 25, Selected Position RB, Selected Player James Conner
Round 4, Pick 48, Selected Position WR, Selected Player Diontae Johnson
Round 5, Pick 49, Selected Position WR, Selected Player Jaylen Waddle
Round 6, Pick 72, Selected Position TE, Selected Player Dawson Knox
Round 7, Pick 73, Selected Position QB, Selected Player Dak Prescott
Reward: 1216.4

agent
Round 1, Pick 12, Selected Position TE, Selected Player Travis Kelce
Round 2, Pick 13, Selected Position WR, Selected Player CeeDee Lamb
Round 3, Pick 36, Selected Position QB, Selected Player Justin Herbert
Round 4, Pick 37, Selected Position RB, Selected Player Travis Etienne
Round 5, Pick 60, Selected Position WR, Selected Player Brandin Cooks
Round 6, Pick 61, Selected Position RB, Selected Player Antonio Gibson
Round 7, Pick 84, Selected P