In [1]:
import pandas as pd
import numpy as np
import torch as T
import json 
import os
from Network.deepnetwork2 import ImitationNetwork
from Memory.dataset import *
from torch.utils.data import DataLoader
from Utils.utils import *
from sklearn.metrics import precision_score, recall_score
import torch.nn as nn
from DQNAgent import DQNAgent
from ImitationAgent import ImitationAgent
from DDQNAgent import DDQNAgent

In [2]:
from torch.utils.tensorboard import SummaryWriter

writer_Imitation = SummaryWriter('TensorBoard/Evaluate_Train_Imitation_TotalReward')
writer_DQN = SummaryWriter('TensorBoard/Evaluate_Train_DQN_TotalReward')
writer_DDQN = SummaryWriter('TensorBoard/Evaluate_Train_DDQN_TotalReward')
writer_NFSP = SummaryWriter('TensorBoard/Evaluate_Train_NFSP_TotalReward')
writer_NFSP_DDQN = SummaryWriter('TensorBoard/Evaluate_Train_NFSP_DDQN_TotalReward')

In [3]:
dataset = Feeder3('Log/ImitationLog/Train/')

In [None]:
dataloader = DataLoader(dataset,batch_size = 256, num_workers = 20,shuffle=True)

In [4]:
#########################
#### Imitation Agent ####
#########################

agentImitation = ImitationAgent(alpha=0.001,
                 input_dims=516,
                 n_actions=NUM_ACTIONS, mem_size=50000, 
                 batch_size=32, replace=1000,
                 checkpoint_dir='Models/Evaluate_Train/1stImitationAgent_OldData.ckpt',algo='Imitation',
                 env_name='Quest_of_Divinity')
agentImitation.load_models()

#########################
####### DQN Agent #######
#########################

agentDQN = DQNAgent(gamma=0.1, epsilon=0.7, alpha=0.0001,replace_target_cnt=50,
                 input_dims=INPUT_DIM,
                 n_actions=NUM_ACTIONS, mem_size=50000, eps_min=0.1,
                 batch_size=32, replace=1000, eps_dec=1e-5,
                 checkpoint_dir='Models/Evaluate_Train', algo='DQNAgent',
                 env_name='Quest_of_Divinity')
agentDQN.load_models()

#########################
####### DDQN Agent ######
#########################

agentDDQN = DDQNAgent(gamma=0.1, epsilon=0.7, alpha=0.0001,input_dim=INPUT_DIM,
                     n_actions=NUM_ACTIONS, mem_size=50000, eps_min=0.1,
                     batch_size=32, replace=1000, eps_dec=1e-5,
                     chkpt_dir='Models/Evaluate_Train', algo='DDQNAgent',
                     env_name='Quest_of_Divinity')
agentDDQN.load_models()

#########################
#### DDQN NFSP Agent ####
#########################

agentImitation_DDQN_NFSP = ImitationAgent(alpha=0.001,
                 input_dims=516,
                 n_actions=NUM_ACTIONS, mem_size=50000, 
                 batch_size=32, replace=1000,
                 checkpoint_dir='Models/Evaluate_Train/NFSP/1stImitationAgent_OldData.ckpt',algo='Imitation',
                 env_name='Quest_of_Divinity')
agentImitation_DDQN_NFSP.load_models()

agentDDQN_DDQN_NFSP = DDQNAgent(gamma=0.1, epsilon=0.7, alpha=0.0001,input_dim=INPUT_DIM,
                 n_actions=NUM_ACTIONS, mem_size=50000, eps_min=0.1,
                 batch_size=32, replace=1000, eps_dec=1e-5,
                 chkpt_dir='Models/Evaluate_Train/NFSP/', algo='DDQNAgent',
                 env_name='Quest_of_Divinity')
agentDDQN_DDQN_NFSP.load_models()

#########################
##### DQN NFSP Agent ####
#########################

agentImitation_DQN_NFSP = ImitationAgent(alpha=0.001,
                 input_dims=516,
                 n_actions=NUM_ACTIONS, mem_size=50000, 
                 batch_size=32, replace=1000,
                 checkpoint_dir='Models/Evaluate_Train/NFSP/1stImitationAgent_OldData_DDQN.ckpt',algo='Imitation',
                 env_name='Quest_of_Divinity')
agentImitation_DQN_NFSP.load_models()
# torch.save(agentImitation.network.state_dict(), 'Models/NFSP/1stImitationAgent_OldData')

agentDQN_DQN_NFSP = DQNAgent(gamma=0.1, epsilon=0.7, alpha=0.0001,replace_target_cnt=50,
                 input_dims=INPUT_DIM,
                 n_actions=NUM_ACTIONS, mem_size=50000, eps_min=0.1,
                 batch_size=32, replace=1000, eps_dec=1e-5,
                 checkpoint_dir='Models/Evaluate_Train/NFSP/', algo='DQNAgent',
                 env_name='Quest_of_Divinity')
agentDQN_DQN_NFSP.load_models()

########## Loading checkpoint ##########
########## Loading checkpoint ##########
########## Loading checkpoint ##########
########## Loading checkpoint ##########
########## Loading checkpoint ##########
########## Loading checkpoint ##########
########## Loading checkpoint ##########
########## Loading checkpoint ##########


In [5]:
total_score_imi = 0.0
total_score_dqn = 0.0
total_score_ddqn = 0.0
total_score_nfsp = 0.0
total_score_ddqn_nfsp = 0.0
DEVICE = torch.device('cuda:0')

score_DQN_NFSP = 0.0
score_Imi_NFSP = 0.0
num_DQN_NFSP = 0
num_Imi_NFSP = 0
nuy_DQN_NFSP = 0.5
count_NFSP = 0

score_DDQN_NFSP = 0.0
score_Imi_DDQN_NFSP = 0.0
num_DDQN_NFSP = 0
num_Imi_DDQN_NFSP = 0
nuy_DDQN_NFSP = 0.5
count_DDQN_NFSP = 0

In [None]:
# for j in range(2):
for i, data in enumerate(dataset.data):
#     i = j + 313320
    # Imitation 
    state_imi = torch.from_numpy(np.array(decode_state_old_test(data))).float()
    state_device = state_imi.to(DEVICE)
    idx = i
    action_imi = torch.argmax(agentImitation.network.forward(state_device).detach().cpu())

    reward_imi = Reward(data['player_board_card_info'],data['opponent_board_card_info'],data['player_hand_card_id'],data['opponent_life'],data['player_life'],data['player_gold'])
    total_score_imi += reward_imi[action_imi]
    writer_Imitation.add_scalar('Total Score Train',total_score_imi,i)


    # DQN

    state_dqn = decode_state_old_test(data)
    reward_dqn = Reward(data['player_board_card_info'],data['opponent_board_card_info'],data['player_hand_card_id'],data['opponent_life'],data['player_life'],data['player_gold'])
    state_dqn = T.tensor(state_dqn,dtype=T.float).to(torch.device('cuda:1'))
    actions_dqn = agentDQN.q_eval.forward(state_dqn).detach().cpu()
    action_dqn = T.argmax(actions_dqn).item()

    total_score_dqn += reward_dqn[action_dqn]
    writer_DQN.add_scalar('Total Score Train',total_score_dqn,i)
    try:
        next_state = decode_state_old_test(dataset.data[idx+1])
        agentDQN.store(state_dqn,action_dqn,reward_dqn[action_dqn],next_state, False)
    except IndexError:
        pass
    agentDQN.learn()

    # DQN NFSP

    if count_NFSP > 5:
        count_NFSP = 0
        nuy_DQN_NFSP = 0.5 
    dynamic = np.random.random()
    if  dynamic < nuy_DQN_NFSP:
        action,reward,state = agentDQN_DQN_NFSP.choose_action(data)
        score = reward[action]
        score_DQN_NFSP += score
        num_DQN_NFSP += 1 
        try:
            next_state = decode_state_old_test(dataset.data[i+1])
        except IndexError:
            break        
        agentDQN_DQN_NFSP.store(state,action,reward[action],next_state, False)
        return_reward = [max(reward[:56])] +[max(reward[56:105])] + [max(reward[105:114])]  + [max(reward[114:177])] + [max(reward[177:289])] + [reward[289]]     
        agentImitation_DQN_NFSP.store(state,action,return_reward)
        agentImitation_DQN_NFSP.learn()
        agentDQN_DQN_NFSP.learn()
        total_score_nfsp += score

    else:
        action,sco,reward,state = agentImitation_DQN_NFSP.choose_action(data)
        score_Imi_NFSP += sco
        num_Imi_NFSP += 1
        try:
            next_state = decode_state_old_test(dataset.data[i+1])
        except:
            break 
#         agentImitation_DQN_NFSP.store(state,action,reward)
        agentDQN_DQN_NFSP.store(state,action,sco,next_state, False)
        agentDQN_DQN_NFSP.learn()
#         agentImitation_DQN_NFSP.learn()
        total_score_nfsp += sco

    writer_NFSP.add_scalar('Total Score Train',total_score_nfsp,i)   

    if i % 100 == 0:
        try:
            smax = torch.nn.functional.softmax(torch.tensor([score_DQN_NFSP/num_DQN_NFSP,score_Imi_NFSP/num_Imi_NFSP]))
        except ZeroDivisionError:
            count_NFSP += 1
            continue
        nuy_DQN_NFSP = smax[0]
        score_DQN_NFSP = 0.0
        score_Imi_NFSP = 0.0
        num_DQN_NFSP = 0
        num_Imi_NFSP = 0


    # DDQN NFSP
    if count_DDQN_NFSP > 5:
        count_DDQN_NFSP = 0
        nuy_DDQN_NFSP = 0.5 
    dynamic = np.random.random()
    if  dynamic < nuy_DDQN_NFSP:
        action,reward,state = agentDDQN_DDQN_NFSP.choose_action(data)
        score = reward[action]
        score_DDQN_NFSP += score
        num_DDQN_NFSP += 1 
        try:
            next_state = decode_state_old_test(dataset.data[i+1])
        except:
            break
        agentDDQN_DDQN_NFSP.store(state,action,reward[action],next_state, False)


        return_reward = [max(reward[:56])] +[max(reward[56:105])] + [max(reward[105:114])]  + [max(reward[114:177])] + [max(reward[177:289])] + [reward[289]]     
        agentImitation_DDQN_NFSP.store(state,action,return_reward)    

        agentDDQN_DDQN_NFSP.learn()
        agentImitation_DDQN_NFSP.learn()

        total_score_ddqn_nfsp += score

    else:
        action,sco,reward,state = agentImitation_DDQN_NFSP.choose_action(data)
        score_Imi_DDQN_NFSP += sco
        num_Imi_DDQN_NFSP += 1
        try:
            next_state = decode_state_old_test(dataset.data[i+1])
        except IndexError:
            break 
#         agentImitation_DDQN_NFSP.store(state,action,reward)
        agentDDQN_DDQN_NFSP.store(state,action,sco,next_state, False)
        agentDDQN_DDQN_NFSP.learn()
#         agentImitation_DDQN_NFSP.learn()
        total_score_ddqn_nfsp += sco

    if i % 100 == 0:
        try:
            smax = torch.nn.functional.softmax(torch.tensor([score_DDQN_NFSP/num_DDQN_NFSP,score_Imi_DDQN_NFSP/num_Imi_DDQN_NFSP]))
        except ZeroDivisionError:
            count_DDQN_NFSP += 1  
            continue
        nuy_DDQN_NFSP = smax[0]
        score_DDQN_NFSP = 0.0
        score_Imi_DDQN_NFSP = 0.0
        num_DDQN_NFSP = 0
        num_Imi_DDQN_NFSP = 0

    writer_NFSP_DDQN.add_scalar('Total Score Train',total_score_ddqn_nfsp,i)   

    # DDQN 
    state_ddqn = decode_state_old_test(data)
    reward_ddqn = Reward(data['player_board_card_info'],data['opponent_board_card_info'],data['player_hand_card_id'],data['opponent_life'],data['player_life'],data['player_gold'])
    state_ddqn = T.tensor(state_ddqn,dtype=T.float).to(DEVICE)
    actions_ddqn = agentDDQN.q_eval.forward(state_ddqn).detach().cpu()
    action_ddqn = T.argmax(actions_ddqn).item()

    total_score_ddqn += reward_ddqn[action_ddqn]
    writer_DDQN.add_scalar('Total Score Train',total_score_ddqn,i)    
    try:
        next_state = decode_state_old_test(dataset.data[idx+1])
        agentDDQN.store(state_ddqn,action_ddqn,reward_ddqn[action_ddqn],next_state, False)
    except IndexError:
        pass
    agentDDQN.learn()

########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########


  loss = crit(pred,torch.nn.functional.softmax(reward_))


########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########




########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
tensor(5.6843e-14, device='cuda:1', grad

########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########
########## Saving checkpoint ##########


In [None]:
i