<a href="https://colab.research.google.com/github/jimjoseph51000/fpl-simulator/blob/main/simulator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [2]:
% cd /gdrive/MyDrive/CSE_519_DSF/gitcode/fpl-simulator-main/

/gdrive/MyDrive/CSE_519_DSF/gitcode/fpl-simulator-main


In [3]:
% ls

'CSE_519_DSF_Proposal___Fantasy_Premier_League (1).pdf'   player_types.csv
'CSE_519___FPL_Progress_Report (1).pdf'                  [0m[01;34m'Progress Report'[0m/
 LICENSE                                                  [01;34m__pycache__[0m/
 model.py                                                 README.md
 [01;34mmodels[0m/                                                  readme_plot.png
 [01;34mPlayer_Cost_Weekwise[0m/                                   'Sanity Checker.ipynb'
'Player Embedding.ipynb'                                  scout.py
'Player Profiles.ipynb'                                   simulator.ipynb
 [01;34mPlayer_Rankings[0m/                                         simulator.py
 [01;34mPlayers_Weekwise[0m/


In [4]:
import numpy as np
import pandas as pd
import os
from matplotlib import pyplot as plt

from simulator import *
from model import *
from scout import * 
# from util import *

In [9]:

import math
import glob
import io
import base64
import time
from IPython.display import HTML
from IPython import display as ipythondisplay
from collections import namedtuple
from itertools import count

# Colab comes with PyTorch
import torch
import torch.nn as nn
import torch.autograd as autograd
import torch.optim as optim
import torch.nn.functional as F

import datetime
import copy

def random_action():
  idx = np.random.randint(N_actions)
  return possible_actions[idx], idx

def epsilon_greedy_action(env, state, model, epsilon, t):

  state = np.array(state) # (N_states,)
  assert(state.shape == (620,))
  assert(state.ndim == 1)
  # bp()
  week_vector = np.zeros(current_week)
  week_vector[t] = 1.0
  state = np.append(state, week_vector) # (630,)
  # we are gonna evaluate over all the different set of actions
  # print(possible_actions.shape) # (5,11)
  input = np.hstack((np.broadcast_to(state,(possible_actions.shape[0], state.shape[0])), possible_actions)) # (N_A,N_S+N_A)
  # print(input.shape)
  assert(input.shape == (N_actions, possible_actions.shape[1] + N_states + current_week)) # we are adding in the week vector too for now

  X = torch.from_numpy(input).float().cuda()
  model.eval()
  Y = model(X)
  assert(Y.shape == (N_actions,1))
  Y = Y.detach().cpu().numpy()
  best_idx = np.argmax(Y.reshape(-1))

  # you dont get to choose the best action. have soft probabilities based on exploration epsilon
  selection_prob_arr = np.zeros(N_actions, dtype=np.float)
  selection_prob_arr[:] = epsilon / N_actions
  selection_prob_arr[best_idx] = (epsilon / N_actions) + (1 - epsilon)
  # bp()
  assert(selection_prob_arr.sum().round() == 1.0)
  idx = np.random.choice(N_actions,1,replace=False, p = selection_prob_arr)
  # assert(idx.shape == (1,))
  model.train()
  # bp()
  # env.action_space.sample()
  return possible_actions[idx[0]], idx[0]

def calculate_episode_return(episode, gamma):
  episode = np.array(episode)
  assert(episode.ndim == 2)
  assert(episode.shape[0] == current_week)
  states_array = episode[:,0]
  # actions_array = episode[:,1]
  rewards_array = episode[:,2]
  # gamma_array = [gamma**i for i in range(episode.shape[0])]
  # Gt = rt + gamma * Gt\+1.
  # bp()
  G = np.array([0 for i in range(states_array.shape[0])], dtype = np.float)
  for i in range(G.shape[0]-1,-1,-1):
    # temp = G[i+1]
    if i == G.shape[0] - 1:
      temp = 0
    else:
      temp = G[i+1]
    G[i] = rewards_array[i] + gamma * temp
  # bp()
  # print(G)
  # assert(G[-1] == 1.0)
  return G


def update_Q_value(G, episode, model, optimizer):
  episode = np.array(episode)
  model = model.cuda()
  model.train()
  optimizer.zero_grad()
  loss_criterion = nn.MSELoss()
  # bp()
  action_arr = possible_actions[np.array(episode[:,1], np.int),:] #(num_weeks,N_actions)
  state_arr = np.array([i.tolist() for i in episode[:,0]], np.float) #(num_weeks,N_states)
 
  assert(state_arr.shape[0] == action_arr.shape[0])
  week_vector = np.eye(current_week) # (num_weeks,num_weeks)
  # print('update',state_arr.shape, week_vector.shape)
  state_arr = np.hstack((state_arr, week_vector)) # (num_weeks, N_states + num_weeks)
  X = torch.from_numpy(np.hstack((state_arr, action_arr))).cuda().float() #(num_weeks, N_states + num_weeks + N_actions) 
  # print(X.shape)
  Y = torch.from_numpy(G[:,np.newaxis]).float().cuda() # (B,1)
  # old_mean = Y.mean()
  # old_std = Y.std()
  # Y = (Y - old_mean) / old_std 
  # bp()
  output = model(X)
  # print('output {}, Y {}'.format(output.mean(),Y.mean()))
  assert(output.shape == Y.shape)
  loss = loss_criterion(output,Y)
  loss.backward()
  optimizer.step()
  assert(loss.grad_fn != None)
  model.eval()
  return loss.detach().cpu().numpy(), output.detach().cpu().numpy(), Y.detach().cpu().numpy()

### Model Training


In [11]:
"""
Training the model for MC Control for FPL environment
"""
# % rm /content/video/*.*

print('Training the model for MC Control for FPL environment')

save_path = './models/'
if not os.path.exists(save_path):
  os.makedirs(save_path)
print('save path :{}'.format(save_path))

np.random.seed(11)
random.seed(11)
torch.manual_seed(11)
torch.backends.cudnn.enabled = False
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.cuda.manual_seed(11)
torch.cuda.manual_seed_all(11)
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.device(DEVICE)

epsilon = 0.2
epsilon_decay = 0.99
epsilon_min = 0.1
gamma = 1.0
# possible_actions = np.array([[1,0,0,0,0],[0,1]]) # left or right discretized (2,2)



# -----------------------------------------------
# TODO : try adjusting these params
n_episode = 10000     # number of episodes
max_steps = 1000  # maximum steps for each episode
lr = 1e-3
epsilon_iter_delta = 200 # this needs to be high . need more exploration 

fpl_manager_id = '2757'
current_week = 10
env = FPLSimulator(current_week, fpl_manager_id, req_cols=['stats.ict_index','element_type'])
min_balance = 10
current_balance = env.balance
K = 15
# scout = Scout(env, 0, min_balance, env.balance, K)

N_actions = len(profiles)
N_states = env.all_player_ids.shape[0] # (620,)
possible_actions = np.array([d['prob_dist'] for d in profiles])

network = RecruiterNetwork(N_states + current_week, possible_actions.shape[1])
network = network.cuda()
optimizer = optim.Adam(network.parameters(), lr=lr)
network.train()



# possible_actions = np.eye(N_actions)


for i_episode in range(n_episode):
  observation = env.reset() # get the state
  episode = []
  G_all = []

  if i_episode % epsilon_iter_delta == 0:
    epsilon = max(epsilon_min, epsilon * epsilon_decay)

  # this creates 'W' different transfers, like one every week
  # method of training : 1. have a 
  
  for t in range(current_week):
    scout = Scout(env, t, min_balance, env.balance, K)

    s = observation
    a, action_idx = epsilon_greedy_action(env, s, network, epsilon, t)
    # a, action_idx = random_action() 
    observation, r, done = env.step(action_idx, t, scout)
    # s_prime = observation
    episode.append([s,action_idx,r])
    # print('episode {}, game week {}'.format(i_episode, t))
    if done:
      break
  # more_episodes = generate_episodes(env, episode)

  G_all = calculate_episode_return(episode,gamma)
  loss,out,gt = update_Q_value(G_all, episode, network, optimizer)
  if i_episode % 100 == 0:
     print('{} # {}, epsilon {},return {}, out {}, Y {} loss : {}'.format(datetime.datetime.now(), i_episode , epsilon, G_all[0], out.mean(), gt.mean(), loss))
  # break


print('Done !')
print('saved model')
torch.save(network.state_dict(), os.path.join(save_path,'recruiter_mgr_id_{}.pth'.format(fpl_manager_id)))

Training the model for MC Control for FPL environment
save path :./models/
(620,) (620, 10) (620, 10) (2, 620, 10)
cumsum of per_week_total_points:  [ 80. 166. 238. 297. 355. 401. 436. 484. 540. 588.]
(15, 10) (15, 10) (15, 10) (2, 15, 10)
2021-12-01 04:51:49.142388 # 0, epsilon 0.495,return -4.0, out -0.07043598592281342, Y -2.200000047683716 loss : 6.89596700668335
2021-12-01 04:51:56.138292 # 100, epsilon 0.495,return -2.0, out -0.9195262789726257, Y -1.600000023841858 loss : 0.7216588258743286
2021-12-01 04:52:03.068090 # 200, epsilon 0.49005,return -2.0, out -0.8611918687820435, Y -1.2000000476837158 loss : 0.2681889533996582
2021-12-01 04:52:09.938022 # 300, epsilon 0.49005,return 0.0, out -0.7002339363098145, Y 0.0 loss : 0.7159218788146973
2021-12-01 04:52:16.859820 # 400, epsilon 0.48514949999999996,return -3.0, out -1.3210338354110718, Y -1.7000000476837158 loss : 0.38828524947166443
2021-12-01 04:52:23.952096 # 500, epsilon 0.48514949999999996,return -2.0, out -0.76113027334

KeyboardInterrupt: ignored

In [None]:
test = np.eye(5)
assert(np.all(test > 1))

In [None]:
np.array(episode)[:,2]