need to add year variability and add some variability around who is available (i.e. drafts don't perfectly follow ADP, some people slip and some positions are over and under targeted in certain drafts, move the ADP cutoff up and down randomly)

In [95]:
import pandas as pd
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import random

from stable_baselines3 import DQN, PPO, A2C
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

# uncomment when SB3 & wandb are working together again
# import wandb
# from wandb.integration.sb3 import WandbCallback
# wandb.login()

In [96]:
position_map = {0: 'QB', 1: 'RB', 2: 'WR', 3: 'TE'}

class FantasyFootballEnv(gym.Env):
    def __init__(self, teams=12, rounds=7, year=None, first_round_pick=None, data_first_year=2018, data_last_year=2022
                , apply_penalty=False, penalty_amt=-10):
        super(FantasyFootballEnv, self).__init__()
        
        # Action space: 0: QB, 1: RB, 2: WR, 3: TE
        self.action_space = spaces.Discrete(4)
        
        self.observation_space = spaces.Box(low=0, high=rounds, shape=(5,), dtype=np.int32)
        
        self.data_first_year = data_first_year
        self.data_last_year = data_last_year
        self.teams = teams
        self.rounds = rounds
        if year is not None:
            self.year = year
        else:
            self.year = random.randint(2018, 2022)
        if first_round_pick is not None:
            self.first_round_pick = first_round_pick
        else:
            self.first_round_pick = random.randint(1, teams)  
        self.pick = self.first_round_pick
            
        self.player_df = self.create_player_df()
        
        self.flex_count = 0
        
        self.apply_penalty = apply_penalty
        self.penalty_amt = penalty_amt
        
        # Other initializations
        self.current_round = 1
        self.roster = {'QB': [], 'RB': [], 'WR': [], 'TE': []}
        self.position_counts = {
            'QB': 1,
            'RB': 2,
            'WR': 2,
            'TE': 1,
            'FLEX': 1
        }
        self.flex_positions = ['RB', 'WR', 'TE']
        self.drafted_players = []
        
        # print(f'Year: {self.year}, First Round Pick: {self.first_round_pick}, Teams: {self.teams}, Rounds: {self.rounds}')
        
    def reset(self, seed=None, year=None, first_round_pick=None):
        
        # https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/#sphx-glr-tutorials-gymnasium-basics-environment-creation-py
        # We need the following line to seed self.np_random
        super().reset(seed=seed)
        
        if year is not None:
            self.year = year
        else:
            self.year = random.randint(2018, 2022)
        if first_round_pick is not None:
            self.first_round_pick = first_round_pick
        else:
            self.first_round_pick = random.randint(1, self.teams)
        self.pick = self.first_round_pick
            
        self.flex_count = 0

        self.current_round = 1
        self.roster = {'QB': [], 'RB': [], 'WR': [], 'TE': []}
        self.drafted_players = []
        
        # Create the initial observation with the current round and counts for each position
        observation = [self.current_round, len(self.roster['QB']), len(self.roster['RB']), len(self.roster['WR']), len(self.roster['TE'])]
                
        # https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/#sphx-glr-tutorials-gymnasium-basics-environment-creation-py
        info = {}
        
        # print(f'Year: {self.year}, First Round Pick: {self.first_round_pick}, Teams: {self.teams}, Rounds: {self.rounds}')

                
        return observation, info
    
    def step(self, action):
        
        penalty = False
        done = False
        reward = 0
        
        selected_position = position_map[action]
        self.pick = self.snake_draft_pick(self.teams, self.current_round, self.first_round_pick)
        selected_player, selected_player_points = self.draft_player(selected_position)
        self.roster[selected_position].append(selected_player)
        
        pos_max_exceeded = len(self.roster[selected_position]) > self.position_counts[selected_position]
        flex_pos_selected = selected_position in self.flex_positions
        if pos_max_exceeded & flex_pos_selected:
            self.flex_count += 1
            
        if self.apply_penalty:
            flex_max_exceeded = self.flex_count > self.position_counts['FLEX']
            if (pos_max_exceeded & flex_pos_selected & flex_max_exceeded) | (pos_max_exceeded & ~flex_pos_selected): penalty = True
            if penalty: reward += self.penalty_amt
        
        observation = [self.current_round, len(self.roster['QB']), len(self.roster['RB']), len(self.roster['WR']), len(self.roster['TE'])]
        
        if self.current_round >= self.rounds:
            done = True
            total_points = self.calculate_total_points()
            reward += total_points
        
        info = {}
        info.update({
            'round': self.current_round,
            'pick': self.pick,
            'adp_adj': self.adp_adj,
            'selected_position': selected_position,
            'selected_player': selected_player,
            'selected_player_points': selected_player_points,
            'reward': reward,
            
        })
        
        self.current_round += 1
                
        # https://gymnasium.farama.org/tutorials/gymnasium_basics/environment_creation/#sphx-glr-tutorials-gymnasium-basics-environment-creation-py
        return observation, reward, done, False, info
    
    def create_player_df(self):

        player_adp_df = pd.read_csv(f'adp_{self.data_first_year}_{self.data_last_year}.csv')
        player_adp_df = player_adp_df[player_adp_df['Player'].notna()]
        player_adp_df = player_adp_df[player_adp_df['Year'] == self.year]
        player_adp_df['Player'] = player_adp_df['Player'].apply(lambda x: ' '.join(str(x).split()[:2]))
        
        player_performance_df = pd.read_csv(f'player_performance_{self.data_first_year}_{self.data_last_year}.csv')
        player_performance_df = player_performance_df[player_performance_df['Player'].notna()]
        player_performance_df = player_performance_df[player_performance_df['year'] == self.year]
        player_performance_df['Player'] = player_performance_df['Player'].apply(lambda x: ' '.join(str(x).split()[:2]))
        
        player_df = pd.merge(player_adp_df, player_performance_df, on='Player', how='inner')
        player_df['Position'] = player_df['Position'].apply(lambda x: x[:2])        
        player_df['Position'] = player_df['Position'].str[:2]        
        player_df= player_df[['Player', 'Position', 'AVG', 'FPTS']]
        
        return player_df
    
    def snake_draft_pick(self, teams, round, first_round_pick):
        if round % 2 == 1:
            return (round - 1) * teams + first_round_pick
        else:
            return round * teams - first_round_pick + 1

    
    def calc_adp_adj(self):
        if self.current_round == 1:
            adp_adj = random.randint(int(-(self.first_round_pick - 1)/2), int((self.first_round_pick - 1)/2))
        elif self.current_round == 2:
            adp_adj = random.randint(int(-self.teams/2), int(self.teams/2))
        else:
            adp_adj = random.randint(-self.teams, self.teams)
        return adp_adj
    
    
    def draft_player(self, selected_position):
        
        adp_adj = self.calc_adp_adj()
        
        available_players = self.player_df[(self.player_df['Position'] == selected_position) & (self.player_df['AVG'] >= (self.pick + adp_adj)) & (~self.player_df['Player'].isin(self.drafted_players))]  # Filter out players who have already been drafted

        selected_player_df = available_players.nsmallest(1, 'AVG')
        selected_player = selected_player_df['Player'].iloc[0]
        selected_player_points = selected_player_df['FPTS'].iloc[0]
        
        self.drafted_players.append(selected_player)
        self.adp_adj = adp_adj
        
        return selected_player, selected_player_points
    
    def custom_policy(self):
        
        eligible_pos = []
        for pos in self.roster.keys():
            if len(self.roster[pos]) < self.position_counts[pos]:
                eligible_pos.append(pos)
        if self.flex_count < self.position_counts['FLEX']:
            eligible_pos += self.flex_positions
        
        adp_adj = self.calc_adp_adj()
        
        available_players = self.player_df[(self.player_df['Position'].isin(eligible_pos)) & (self.player_df['AVG'] >= (self.pick + adp_adj)) & (~self.player_df['Player'].isin(self.drafted_players))]  # Filter out players who have already been drafted

        selected_player_df = available_players.nsmallest(1, 'AVG')
        selected_player = selected_player_df['Player'].iloc[0]
        selected_player_position = selected_player_df['Position'].iloc[0]
        selected_player_points = selected_player_df['FPTS'].iloc[0]
        
        key_found = None

        for key, value in position_map.items():
            if value == selected_player_position:
                action = key
                break
                    
        return action
        
    
    def calculate_total_points(self):
        
        # note this method assumes you play your best player not the order they are drafted
        
        total_points = 0
        
        for pos in self.roster.keys():
            pos_points_list = [self.player_df[self.player_df['Player'] == player]['FPTS'].iloc[0] for player in self.roster[pos]]
            pos_points_list = sorted(pos_points_list, reverse=True)[:self.position_counts[pos]]
            total_points += sum(pos_points_list)
            
        flex_points_list = []
        for pos in self.flex_positions:
            pos_points_list = [self.player_df[self.player_df['Player'] == player]['FPTS'].iloc[0] for player in self.roster[pos]]
            pos_points_list = sorted(pos_points_list, reverse=True)[self.position_counts[pos]:]
            flex_points_list+=pos_points_list        
        flex_points_list = sorted(flex_points_list, reverse=True)[:self.position_counts[pos]]
        
        total_points+=sum(flex_points_list)
        
        return total_points

In [97]:
def run_training_job(model_type
                    , use_wandb = 'y', wandb_verbose=2
                    , timesteps=1_000_000
                    # , policy='MultiInputPolicy'
                    , policy='MlpPolicy'
                    # should look into mandating that each pick position is considered
                    , n_eval_episodes=12
                    , vec_envs='n', n_envs=4
                    , sb3_model_verbose=0
                    # DQN
                    , dqn_exploration_final_eps=0.025, dqn_exploration_fraction=0.5
                    # PPO
                    # https://colab.research.google.com/drive/1GI0WpThwRHbl-Fu2RHfczq6dci5GBDVE#scrollTo=FMdJRrZ4n7xp
                    , ppo_n_steps = 1024, ppo_batch_size = 64, ppo_n_epochs = 4, ppo_gamma = 0.999, ppo_gae_lambda = 0.98, ppo_ent_coef = 0.01
                    , name_suffix = ''
                    ):
    
    config = {
    "policy_type": policy,
    "total_timesteps": timesteps,
    # "env_id": "NflEnv",
    "env_id": "FantasyFootballEnv",
    }

    # https://stable-baselines3.readthedocs.io/en/master/guide/integrations.html
    if use_wandb == 'y':
        run = wandb.init(
            # project="sb3_nfl_2",
            project="sb3_FantasyFootballEnv",
            config=config,
            sync_tensorboard=True
        )

    # when using multiple environments, the total number of steps taken in counts each step taken in each environment
    # if using 4 environments and 400_000 TIMESTEPS, the agent will take a total of 100_000 steps in each environment.
    if vec_envs == 'y':
        # https://colab.research.google.com/github/araffin/rl-tutorial-jnrr19/blob/sb3/5_custom_gym_env.ipynb
        # received 'ValueError: high is out of bounds for int32' without the seed
        # env = make_vec_env(env_id = NflEnv, n_envs=n_envs, seed=1)
        # eval_env = make_vec_env(env_id = NflEnv, n_envs=1, seed=1)
        env = make_vec_env(env_id = FantasyFootballEnv, n_envs=n_envs, seed=1)
        eval_env = make_vec_env(env_id = FantasyFootballEnv, n_envs=1, seed=1)

    elif vec_envs == 'n':
        # env = NflEnv()
        # eval_env = NflEnv()
        env = FantasyFootballEnv()
        eval_env = FantasyFootballEnv()
    
    if use_wandb == 'y':
        if model_type == 'DQN':
            # default values for these parameters are exploration_final_eps=0.05 and exploration_fraction=0.1
            # with the default values, the exploration rate will linearly decrease to 0.05 over the first 10% of the timesteps
            model = DQN(config["policy_type"], env, verbose=sb3_model_verbose, tensorboard_log=f"runs/{run.id}"
                        , exploration_final_eps=dqn_exploration_final_eps, exploration_fraction = dqn_exploration_fraction)
        elif model_type == 'PPO':
            model = PPO(config["policy_type"], env, verbose=sb3_model_verbose, tensorboard_log=f"runs/{run.id}"
                        , n_steps = ppo_n_steps, batch_size = ppo_batch_size, n_epochs = ppo_n_epochs, gamma = ppo_gamma
                        , gae_lambda = ppo_gae_lambda, ent_coef = ppo_ent_coef)
        elif model_type == 'A2C':
            model = A2C(config["policy_type"], env, verbose=sb3_model_verbose, tensorboard_log=f"runs/{run.id}")
    else:
        if model_type == 'DQN':
            model = DQN(config["policy_type"], env, verbose=sb3_model_verbose, exploration_final_eps=dqn_exploration_final_eps
                        , exploration_fraction = dqn_exploration_fraction)
        elif model_type == 'PPO':
            model = PPO(config["policy_type"], env, verbose=sb3_model_verbose
                        , n_steps = ppo_n_steps, batch_size = ppo_batch_size, n_epochs = ppo_n_epochs, gamma = ppo_gamma
                        , gae_lambda = ppo_gae_lambda, ent_coef = ppo_ent_coef)
        elif model_type == 'A2C':
            model = A2C(config["policy_type"], env, verbose=sb3_model_verbose)

    mean_reward, std_reward = evaluate_policy(model=model, env=eval_env, n_eval_episodes=n_eval_episodes)
    print(f"mean_reward before training:{mean_reward:.2f} +/- {std_reward:.2f}")

    if use_wandb == 'y':
        model.learn(
            total_timesteps=config["total_timesteps"],
            callback=WandbCallback(
                model_save_path=f"models/{run.id}",
                verbose=wandb_verbose,
            ),
        )
        run.finish()
    else:
        model.learn(total_timesteps=config["total_timesteps"])

    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=n_eval_episodes)
    print(f"mean_reward after training:{mean_reward:.2f} +/- {std_reward:.2f}")

    # parameters_saved = model.get_parameters()
    
    if vec_envs == 'y':
        model.save(f"models/{model_type}_{timesteps}_vecEnv{name_suffix}")
    else:
        model.save(f"models/{model_type}_{timesteps}{name_suffix}")
    
    return model

In [98]:
# model_type = 'PPO'
# time_steps = 250_000
# model = run_training_job(model_type,timesteps=time_steps, use_wandb='n', ppo_gamma=1)



mean_reward before training:908.66 +/- 63.18




mean_reward after training:1274.97 +/- 87.12


In [99]:
# model_type = 'PPO'
# time_steps = 500_000
# model = run_training_job(model_type,timesteps=time_steps, use_wandb='n', ppo_gamma=1)

In [103]:
def draft(teams=12, rounds=7, first_round_pick=None, year=None, model=None, actions=None, custom_policy=False):
    env = FantasyFootballEnv(teams=teams, rounds=rounds, first_round_pick=first_round_pick, year=year)
    state, info = env.reset(first_round_pick=first_round_pick, year=year)
    for i in range(rounds):
        if model is not None:
            action, _states = model.predict(state)
            action = int(action)  # If action is an array with a single value that can be directly converted to int
        elif actions is not None:
            action = actions[i]
        
        elif custom_policy:
            action = env.custom_policy()
        
        new_state, reward, done, placeholder, info = env.step(action)
        print(f'Round {info["round"]}, Pick {info["pick"]}, adp_adj {info["adp_adj"]}, Pos {info["selected_position"]}, Player {info["selected_player"]}, points {info["selected_player_points"]}, reward {info["reward"]}')
        state = new_state
        if done:
            print(f'total score: {reward}\n')

In [109]:
year = 2022
first_round_pick=1
print(f'first round pick: {first_round_pick}\n')

print('run first manual')
# rb first
actions = [1, 1, 2, 2, 2, 3, 0]
draft(first_round_pick=first_round_pick, year=year, actions=actions)

print('custom_policy')
draft(first_round_pick=first_round_pick, year=year, custom_policy=True)

first round pick: 1

run first manual
Round 1, Pick 1, adp_adj 0, Pos RB, Player Jonathan Taylor, points 132.4, reward 0
Round 2, Pick 24, adp_adj -6, Pos RB, Player Leonard Fournette, points 190.6, reward 0
Round 3, Pick 25, adp_adj -1, Pos WR, Player Michael Pittman, points 167.0, reward 0
Round 4, Pick 48, adp_adj -7, Pos WR, Player Terry McLaurin, points 190.5, reward 0
Round 5, Pick 49, adp_adj 10, Pos WR, Player Brandin Cooks, points 117.1, reward 0
Round 6, Pick 72, adp_adj 5, Pos TE, Player Dawson Knox, points 111.7, reward 0
Round 7, Pick 73, adp_adj -4, Pos QB, Player Tom Brady, points 280.5, reward 1189.8
total score: 1189.8

custom_policy
Round 1, Pick 1, adp_adj 0, Pos RB, Player Jonathan Taylor, points 132.4, reward 0
Round 2, Pick 24, adp_adj 5, Pos RB, Player James Conner, points 177.2, reward 0
Round 3, Pick 25, adp_adj 2, Pos QB, Player Patrick Mahomes, points 428.9, reward 0
Round 4, Pick 48, adp_adj 11, Pos WR, Player Brandin Cooks, points 117.1, reward 0
Round 5, P

In [156]:
year = random.randint(2018, 2022)
first_round_pick = random.randint(1, 12)
model_type = 'PPO'

print(f'year: {year}, first round pick: {first_round_pick}\n')

print('agent 1')
time_steps = 250_000
model = PPO.load(f'models/{model_type}_{time_steps}')
draft(first_round_pick=first_round_pick, year=year, model=model)
print('agent 2')
time_steps = 500_000
model = PPO.load(f'models/{model_type}_{time_steps}')
draft(first_round_pick=first_round_pick, year=year, model=model)

year: 2020, first round pick: 8

agent 1
Round 1, Pick 8, adp_adj -3, Pos WR, Player Michael Thomas, points 63.9, reward 0
Round 2, Pick 17, adp_adj 3, Pos TE, Player George Kittle, points 101.1, reward 0
Round 3, Pick 32, adp_adj 8, Pos QB, Player Dak Prescott, points 139.1, reward 0
Round 4, Pick 41, adp_adj 0, Pos RB, Player Le'Veon Bell, points 66.6, reward 0
Round 5, Pick 56, adp_adj 5, Pos RB, Player D'Andre Swift, points 166.8, reward 0
Round 6, Pick 65, adp_adj 9, Pos RB, Player Ronald Jones, points 172.3, reward 0
Round 7, Pick 80, adp_adj 4, Pos WR, Player Julian Edelman, points 45.7, reward 755.5000000000001
total score: 755.5000000000001

agent 2
Round 1, Pick 8, adp_adj 1, Pos QB, Player Patrick Mahomes, points 380.3, reward 0
Round 2, Pick 17, adp_adj 6, Pos TE, Player Mark Andrews, points 141.1, reward 0
Round 3, Pick 32, adp_adj 5, Pos RB, Player Jonathan Taylor, points 234.8, reward 0
Round 4, Pick 41, adp_adj -6, Pos WR, Player A.J. Brown, points 212.5, reward 0
Round

In [163]:
year = random.randint(2018, 2022)
first_round_pick = random.randint(1, 12)
model_type = 'PPO'

print(f'year: {year}, first round pick: {first_round_pick}\n')

print('custom_policy')
draft(first_round_pick=first_round_pick, year=year, custom_policy=True)

print('agent')
time_steps = 500_000
model = PPO.load(f'models/{model_type}_{time_steps}')
draft(first_round_pick=first_round_pick, year=year, model=model)

year: 2020, first round pick: 9

custom_policy
Round 1, Pick 9, adp_adj 4, Pos WR, Player Julio Jones, points 120.6, reward 0
Round 2, Pick 16, adp_adj 0, Pos RB, Player Austin Ekeler, points 138.3, reward 0
Round 3, Pick 33, adp_adj -3, Pos WR, Player Odell Beckham, points 75.3, reward 0
Round 4, Pick 40, adp_adj -3, Pos WR, Player A.J. Brown, points 212.5, reward 0
Round 5, Pick 57, adp_adj -8, Pos RB, Player Raheem Mostert, points 91.7, reward 0
Round 6, Pick 64, adp_adj -1, Pos QB, Player Matt Ryan, points 293.3, reward 0
Round 7, Pick 81, adp_adj -1, Pos TE, Player Rob Gronkowski, points 126.8, reward 1058.5
total score: 1058.5

agent
Round 1, Pick 9, adp_adj -2, Pos QB, Player Patrick Mahomes, points 380.3, reward 0
Round 2, Pick 16, adp_adj 2, Pos TE, Player Travis Kelce, points 260.3, reward 0
Round 3, Pick 33, adp_adj -3, Pos RB, Player Chris Carson, points 169.3, reward 0
Round 4, Pick 40, adp_adj 8, Pos WR, Player Tyler Lockett, points 215.4, reward 0
Round 5, Pick 57, adp_a