In [91]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from tqdm import trange
from collections import namedtuple, Counter
from ipywidgets import *
from IPython.display import display, HTML

from santorinigo.environment import Santorini
from santorinigo.qnetwork import *
from santorinigo.replay_memory import *
from santorinigo.agent import Agent

DATA_PATH = 'data/'
MODEL_PATH = f'{DATA_PATH}models/'

In [92]:
mem1 = PrioritizedMemory(capacity = 1000)
a1 = Agent(state_size = env.state_dim_flat, action_size = env.action_dim, replay_memory = mem1, seed = 1412,
          lr = 1e-3 / 4, bs = 64, nb_hidden = 128,
          gamma=0.99, tau= 1/100, update_interval = 5)
fname = f'{MODEL_PATH}half_rainbow_10k.m'
a1.qnetwork_local.load_state_dict(torch.load(fname))
a1.qnetwork_target.load_state_dict(torch.load(fname))

In [93]:
env = Santorini()
env.print_board()

Buildings:
 [[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
Workers:
 [[ 0  0 -1  0  0]
 [ 0  0  0  0  0]
 [ 1  0  0  0  2]
 [ 0  0  0  0  0]
 [ 0  0 -2  0  0]]
Parts:
 [[ 0  0  0  0  0]
 [ 0 22  0  0  0]
 [ 0  0 18  0  0]
 [ 0  0  0 14  0]
 [ 0  0  0  0 18]]


## Human's Turn

In [112]:
human_key = (-2,'q','a')
human_action = env.atoi[human_key]
env.current_player, human_key, human_action

(1, (-2, 'q', 'a'), 67)

In [113]:
env.step(human_action)
env.print_board()

Buildings:
 [[3 2 1 0 0]
 [1 0 1 0 0]
 [0 0 1 1 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
Workers:
 [[ 0  2  0  0  0]
 [ 0  0  0  0  0]
 [-1  1  0  0  0]
 [ 0  0  0  0  0]
 [ 0  0 -2  0  0]]
Parts:
 [[ 0  0  0  0  0]
 [ 0 15  0  0  0]
 [ 0  0 16  0  0]
 [ 0  0  0 13  0]
 [ 0  0  0  0 18]]


## Agent's Turn

In [114]:
state = env.get_state()
actions = a1.act(state,1000,return_list=True)
#check legality
legal_moves = env.legal_moves()
for a in actions:
    if a in legal_moves:
        agent_action = a
        break
env.current_player, env.itoa[agent_action], agent_action

(-1, (-1, 'x', 'w'), 49)

In [115]:
env.step(agent_action)
env.print_board()

Buildings:
 [[3 2 1 0 0]
 [1 0 1 0 0]
 [1 0 1 1 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
Workers:
 [[ 0  2  0  0  0]
 [ 0  0  0  0  0]
 [ 0  1  0  0  0]
 [-1  0  0  0  0]
 [ 0  0 -2  0  0]]
Parts:
 [[ 0  0  0  0  0]
 [ 0 14  0  0  0]
 [ 0  0 16  0  0]
 [ 0  0  0 13  0]
 [ 0  0  0  0 18]]
