<a href="https://colab.research.google.com/github/icehube/Kovy/blob/main/Kovy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
import os
import copy
import time
from datetime import datetime
import json
import pandas as pd

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

from collections import deque

drive.mount('/content/drive')

from lib.env import *
from lib.dqn_utils import *


In [None]:

config = {
    'train_config' : {
        'replay_buffer_size' : 10000,
        'target_model_sync_period' : 1000,
        'epsilon_frames' : 10000,
        'epsilon_start' : 1.0,
        'epsilon_final' : 0.1,
        'learning_rate' : 0.0001,
        'gamma' : 0.99,
        'batch_size' : 64,
    },

    'env_config' : {
        'budget' : 56.8,
        'min_bid' : 0.5,
        'max_bid' : 11.4,
        'bid_unit' : 0.1,
        'forward' : 12,
        'defense' : 6,
        'goalie' : 2,
        'bench' : 4,
        'teams' : ['GVR', 'MAC', 'BOT', 'SHF', 'ZSK', 'LGN',
                   'SRL', 'LPT', 'HSM', 'JHN', 'VPP'],
        'agent_team' : 'BOT',

    }
}


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


train_config = config['train_config']

data = load_data('players.csv')

run_name = 'DQN_%s' % datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

os.makedirs('runs/%s' % run_name, exist_ok=True)
os.makedirs('models/%s' % run_name, exist_ok=True)

writer = SummaryWriter('runs/%s' % run_name)

with open('runs/%s/config.json' % run_name, 'w') as f:
    json.dump(config, f, indent=4)
with open('models/%s/config.json' % run_name, 'w') as f:
    json.dump(config, f, indent=4)

env = Env(data, config['env_config'])

main_model = Model_v1(env.n_states, env.n_actions).to(device)
target_model = copy.deepcopy(main_model)

buffer = ExperienceReplayBuffer(buffer_size=train_config['replay_buffer_size'])
optimizer = optim.Adam(main_model.parameters(), lr=train_config['learning_rate'])

NameError: ignored

In [None]:
frame_idx = 0

while True:
    frame_idx += 1

    epsilon = max(train_config['epsilon_final'], train_config['epsilon_start'] - frame_idx / train_config['epsilon_frames'])

    if env.done == True:
        env.reset()
    state = env.state
    state_v = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device)
    if np.random.rand() < epsilon:
        action = np.random.choice(env.n_actions)
    else:
        action = main_model(state_v).detach().cpu().numpy().argmax()

    next_state, reward, done, _ = env.step(action)

    buffer.add(state, action, reward, done, next_state)
    state = next_state

    if len(buffer.state_buffer) < train_config['replay_buffer_size']:
        continue


    optimizer.zero_grad()
    states, actions, rewards, dones, next_states = buffer.sample(train_config['batch_size'])

    states_v = torch.tensor(states, dtype=torch.float32).to(device)
    next_states_v = torch.tensor(next_states, dtype=torch.float32).to(device)
    actions_v = torch.tensor(actions).to(device)
    rewards_v = torch.tensor(rewards).to(device)
    done_mask = torch.ByteTensor(dones).to(device)

    state_action_values = main_model(states_v).gather(0, actions_v.unsqueeze(-1)).squeeze(-1)
    next_state_actions = main_model(next_states_v).max(1)[1]
    next_state_values = target_model(next_states_v).gather(0, next_state_actions.unsqueeze(-1)).squeeze(-1)
    next_state_values[done_mask] = 0.0

    expected_state_action_values = next_state_values.detach() * train_config['gamma'] + rewards_v
    loss_v = nn.MSELoss()(state_action_values, expected_state_action_values)
    loss_v.backward()
    optimizer.step()

    if frame_idx % train_config['target_model_sync_period'] == 0:
        target_model.load_state_dict(main_model.state_dict())
        torch.save(main_model, 'models/%s/model_%d.pth' % (run_name, frame_idx / train_config['target_model_sync_period']))

    if frame_idx % 100 == 0:
        writer.add_scalar('Epsilon', epsilon, frame_idx)
