In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from tqdm import trange
from collections import namedtuple, Counter
from ipywidgets import *
from IPython.display import display, HTML

from santorinigo.environment import Santorini
from santorinigo.qnetwork import *
from santorinigo.replay_memory import *
from santorinigo.agent import Agent

DATA_PATH = 'data/'
MODEL_PATH = f'{DATA_PATH}models/'

In [5]:
env = Santorini()
env.print_board()

Buildings:
 [[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
Workers:
 [[ 0  0 -1  0  0]
 [ 0  0  0  0  0]
 [ 1  0  0  0  2]
 [ 0  0  0  0  0]
 [ 0  0 -2  0  0]]
Parts:
 [[ 0  0  0  0  0]
 [ 0 22  0  0  0]
 [ 0  0 18  0  0]
 [ 0  0  0 14  0]
 [ 0  0  0  0 18]]


In [22]:
env.step(env.atoi[(-2,'w','x')])

(array([[[ 1,  0,  1,  0,  0],
         [ 1,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0],
         [ 0,  0,  0,  0,  0],
         [ 0,  0,  1,  0,  0]],
 
        [[ 0,  0,  0,  0,  0],
         [ 0,  0, -1,  0,  0],
         [ 1,  0,  0,  0,  2],
         [ 0,  0, -2,  0,  0],
         [ 0,  0,  0,  0,  0]]]), -0.001, False, 1)

In [25]:
len(env.legal_moves())

96

In [26]:
env.get_state()

array([[[ 1,  0,  1,  0,  0],
        [ 1,  0,  0,  0,  0],
        [ 0,  0,  0,  0,  0],
        [ 0,  0,  0,  0,  0],
        [ 0,  0,  1,  0,  0]],

       [[ 0,  0,  0,  0,  0],
        [ 0,  0, -1,  0,  0],
        [ 1,  0,  0,  0,  2],
        [ 0,  0, -2,  0,  0],
        [ 0,  0,  0,  0,  0]]])

In [6]:
mem = PrioritizedMemory(capacity = 1000)
a = Agent(state_size = env.state_dim_flat, action_size = env.action_dim, 
           env = env, replay_memory = mem, seed = 1412,
           lr = 1e-3 / 4, bs = 64, nb_hidden = 128,
           gamma=0.99, tau= 1/100, update_interval = 5)
fname = f'{MODEL_PATH}half_rainbow_9999.m'
a.qnetwork_local.load_state_dict(torch.load(fname))
a.qnetwork_target.load_state_dict(torch.load(fname))

## Agent's Turn

In [93]:
state = env.get_state()
agent_action = a.act(state,1000,legal_actions=env.legal_moves())
env.current_player, env.itoa[agent_action], agent_action

(-1, (-2, 'd', 'z'), 101)

In [94]:
print(env.step(agent_action)[1])
env.print_board()

-0.001
Buildings:
 [[1 0 0 0 0]
 [0 1 1 2 0]
 [0 0 3 0 0]
 [0 3 0 2 0]
 [0 0 0 0 0]]
Workers:
 [[ 0  0 -1  0  0]
 [ 0  0  0 -2  0]
 [ 0  1  0  0  0]
 [ 0  0  0  2  0]
 [ 0  0  0  0  0]]
Parts:
 [[ 0  0  0  0  0]
 [ 0 15  0  0  0]
 [ 0  0 14  0  0]
 [ 0  0  0 12  0]
 [ 0  0  0  0 18]]


## Human's Turn

In [91]:
human_key = (-2,'c','q')
human_action = env.atoi[human_key]
env.current_player, human_key, human_action

(1, (-2, 'c', 'q'), 120)

In [92]:
print(env.step(human_action)[1])
env.print_board()

-0.001
Buildings:
 [[1 0 0 0 0]
 [0 1 1 2 0]
 [0 0 2 0 0]
 [0 3 0 2 0]
 [0 0 0 0 0]]
Workers:
 [[ 0  0 -1  0  0]
 [ 0  0 -2  0  0]
 [ 0  1  0  0  0]
 [ 0  0  0  2  0]
 [ 0  0  0  0  0]]
Parts:
 [[ 0  0  0  0  0]
 [ 0 15  0  0  0]
 [ 0  0 14  0  0]
 [ 0  0  0 13  0]
 [ 0  0  0  0 18]]
