In [30]:
# -*- coding: utf-8 -*-
# %matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
np.set_printoptions(suppress=True)

from shutil import copyfile
import random
try:
    reload  # Python 2.7
except NameError:
    try:
        from importlib import reload  # Python 3.4+
    except ImportError:
        from imp import reload  # Python 3.0 - 3.3
#from importlib import reload


from keras.utils import plot_model

from test import test_class, Game, GameState, Node, Edge, MCTS, Gen_Model, Residual_CNN, User, Agent
from funcs import playMatches, playMatchesBetweenVersions
from memory import Memory
import loggers as lg

from settings import run_folder, run_archive_folder
import initialise
import pickle

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
env = Game()

In [32]:

# If loading an existing neural network, copy the config file to root
if initialise.INITIAL_RUN_NUMBER != None:
    copyfile(run_archive_folder  + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py')

import config

In [33]:
######## LOAD MEMORIES IF NECESSARY ########

if initialise.INITIAL_MEMORY_VERSION == None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...')
    memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p",   "rb" ) )


In [34]:
######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

In [35]:
#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

In [36]:
#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)

print('\n')






In [37]:
######## CREATE THE PLAYERS ########

current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)
#user_player = User('player1', env.state_size, env.action_size)
iteration = 0

In [38]:
player1Starts = random.randint(0,1) * 2 - 1
if player1Starts == 1:
    players = {1:{"agent": current_player, "name":current_player.name}
            , -1: {"agent": best_player, "name":best_player.name}
            }
    print(current_player.name + ' plays as X')
else:
    players = {1:{"agent": best_player, "name":best_player.name}
            , -1: {"agent": current_player, "name":current_player.name}
            }
    print(best_player.name + ' plays as X')
    print('--------------')

current_player plays as X


In [52]:
state = env.reset()
print("Allowed Actions=")
print(state.allowedActions)
#players[state.playerTurn]['agent'].act(state, 1)
action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 1)
print("Action taken=")
print(action )
print("PI of which actin needs move=")
print(pi )
print("MCTS_value=")
print(MCTS_value )
print("NN_prediction_value=")
print(NN_value )

Allowed Actions=
[35, 36, 37, 38, 39, 40, 41]
in here simulate=
currentNode.edges =
{'W': array([-0.804519], dtype=float32), 'P': 0.14285715, 'Q': array([-0.01676081], dtype=float32), 'N': 48}
action is =
35
{'W': array([-0.42191964], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00796075], dtype=float32), 'N': 53}
action is =
36
{'W': array([-0.34614706], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00629358], dtype=float32), 'N': 55}
action is =
37
{'W': array([-0.19156629], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00319277], dtype=float32), 'N': 60}
action is =
38
{'W': array([0.5778578], dtype=float32), 'P': 0.14285715, 'Q': array([0.00849791], dtype=float32), 'N': 68}
action is =
39
{'W': array([-0.7072707], dtype=float32), 'P': 0.14285715, 'Q': array([-0.01414541], dtype=float32), 'N': 50}
action is =
40
{'W': array([4.4936433], dtype=float32), 'P': 0.14285715, 'Q': array([0.03907516], dtype=float32), 'N': 115}
action is =
41
currentNode.edges =
{'W': array([-0.816555

in here simulate=
currentNode.edges =
{'W': array([-0.7811871], dtype=float32), 'P': 0.14285715, 'Q': array([-0.01594259], dtype=float32), 'N': 49}
action is =
35
{'W': array([-0.28809378], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00523807], dtype=float32), 'N': 55}
action is =
36
{'W': array([-0.34614706], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00629358], dtype=float32), 'N': 55}
action is =
37
{'W': array([-0.17819275], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00292119], dtype=float32), 'N': 61}
action is =
38
{'W': array([0.5778578], dtype=float32), 'P': 0.14285715, 'Q': array([0.00849791], dtype=float32), 'N': 68}
action is =
39
{'W': array([-0.6438321], dtype=float32), 'P': 0.14285715, 'Q': array([-0.01262416], dtype=float32), 'N': 51}
action is =
40
{'W': array([4.5965886], dtype=float32), 'P': 0.14285715, 'Q': array([0.03862679], dtype=float32), 'N': 119}
action is =
41
currentNode.edges =
{'W': array([0.09182236], dtype=float32), 'P': 0.14878541, 'Q': arr

leaf node = 
[]
leaf node allowed Actions=
[24, 30, 34, 35, 36, 39, 40]
value=
0
in here simulate=
currentNode.edges =
{'W': array([-0.7021239], dtype=float32), 'P': 0.14285715, 'Q': array([-0.01404248], dtype=float32), 'N': 50}
action is =
35
{'W': array([-0.28809378], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00523807], dtype=float32), 'N': 55}
action is =
36
{'W': array([-0.34403503], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00614348], dtype=float32), 'N': 56}
action is =
37
{'W': array([-0.08289749], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00131583], dtype=float32), 'N': 63}
action is =
38
{'W': array([0.5778578], dtype=float32), 'P': 0.14285715, 'Q': array([0.00849791], dtype=float32), 'N': 68}
action is =
39
{'W': array([-0.6438321], dtype=float32), 'P': 0.14285715, 'Q': array([-0.01262416], dtype=float32), 'N': 51}
action is =
40
{'W': array([4.5430317], dtype=float32), 'P': 0.14285715, 'Q': array([0.03663735], dtype=float32), 'N': 124}
action is =
41
curren

currentNode.edges =
{'W': array([0.93188566], dtype=float32), 'P': 0.1423719, 'Q': array([0.04437551], dtype=float32), 'N': 21}
action is =
32
{'W': array([-0.31588566], dtype=float32), 'P': 0.13864371, 'Q': array([-0.04512652], dtype=float32), 'N': 7}
action is =
35
{'W': array([-0.23022664], dtype=float32), 'P': 0.13933437, 'Q': array([-0.02558074], dtype=float32), 'N': 9}
action is =
36
{'W': array([-0.38379934], dtype=float32), 'P': 0.14240997, 'Q': array([-0.05482848], dtype=float32), 'N': 7}
action is =
37
{'W': array([-0.1360817], dtype=float32), 'P': 0.14692444, 'Q': array([-0.01360817], dtype=float32), 'N': 10}
action is =
38
{'W': array([-0.34129804], dtype=float32), 'P': 0.14268214, 'Q': array([-0.03792201], dtype=float32), 'N': 9}
action is =
40
{'W': array([-0.27888128], dtype=float32), 'P': 0.14763346, 'Q': array([-0.03486016], dtype=float32), 'N': 8}
action is =
41
currentNode.edges =
{'W': array([-0.00439909], dtype=float32), 'P': 0.13779685, 'Q': array([-0.00439909], d

currentNode.edges =
{'W': 0, 'P': 0.14594811, 'Q': 0, 'N': 0}
action is =
28
{'W': 0, 'P': 0.13599221, 'Q': 0, 'N': 0}
action is =
29
{'W': 0, 'P': 0.15761034, 'Q': 0, 'N': 0}
action is =
33
{'W': 0, 'P': 0.13745439, 'Q': 0, 'N': 0}
action is =
37
{'W': 0, 'P': 0.1293798, 'Q': 0, 'N': 0}
action is =
38
{'W': 0, 'P': 0.14797384, 'Q': 0, 'N': 0}
action is =
39
{'W': 0, 'P': 0.14564134, 'Q': 0, 'N': 0}
action is =
41
leaf node = 
[]
leaf node allowed Actions=
[21, 29, 33, 37, 38, 39, 41]
value=
0
in here simulate=
currentNode.edges =
{'W': array([-0.6444052], dtype=float32), 'P': 0.14285715, 'Q': array([-0.01215859], dtype=float32), 'N': 53}
action is =
35
{'W': array([-0.17506479], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00301836], dtype=float32), 'N': 58}
action is =
36
{'W': array([-0.40073678], dtype=float32), 'P': 0.14285715, 'Q': array([-0.00703047], dtype=float32), 'N': 57}
action is =
37
{'W': array([-0.15997466], dtype=float32), 'P': 0.14285715, 'Q': array([-0.0024996], d

currentNode.edges =
{'W': array([0.04772735], dtype=float32), 'P': 0.14967023, 'Q': array([0.02386368], dtype=float32), 'N': 2}
action is =
33
{'W': array([0.04527438], dtype=float32), 'P': 0.14133511, 'Q': array([0.02263719], dtype=float32), 'N': 2}
action is =
34
{'W': array([0.03181328], dtype=float32), 'P': 0.1383487, 'Q': array([0.01590664], dtype=float32), 'N': 2}
action is =
35
{'W': array([0.14987205], dtype=float32), 'P': 0.14863302, 'Q': array([0.04995735], dtype=float32), 'N': 3}
action is =
36
{'W': array([0.02968623], dtype=float32), 'P': 0.13958405, 'Q': array([0.01484311], dtype=float32), 'N': 2}
action is =
37
{'W': array([0.21372057], dtype=float32), 'P': 0.13243201, 'Q': array([0.07124019], dtype=float32), 'N': 3}
action is =
38
{'W': array([0.06110551], dtype=float32), 'P': 0.14999682, 'Q': array([0.03055276], dtype=float32), 'N': 2}
action is =
39
currentNode.edges =
{'W': array([0.0045211], dtype=float32), 'P': 0.12639996, 'Q': array([0.0045211], dtype=float32), 'N

In [47]:

while 1:

    iteration += 1
    reload(lg)
    reload(config)
    
    print('ITERATION NUMBER ' + str(iteration))
    
    lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## SELF PLAY ########
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)
    print('\n')
    
    memory.clear_stmemory()

ITERATION NUMBER 1
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 in here simulate
leaf node = 
[]
leaf node allowed Actions
[35, 36, 37, 38, 39, 40, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[28, 36, 37, 38, 39, 40, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[30, 35, 36, 38, 39, 40, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[34, 35, 36, 37, 38, 39, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[31, 35, 36, 37, 39, 40, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[29, 35, 37, 38, 39, 40, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[33, 35, 36, 37, 38, 39, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[32, 35, 36, 37, 38, 40, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[26, 35, 36, 37, 38, 39, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actio

in here simulate
leaf node = 
[]
leaf node allowed Actions
[29, 31, 33, 35, 37, 39, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[26, 31, 35, 36, 37, 39, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[29, 31, 33, 35, 37, 39, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[28, 29, 31, 37, 39, 40, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[29, 31, 32, 35, 37, 40, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[22, 31, 34, 35, 37, 39, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[28, 31, 34, 36, 37, 39, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[24, 33, 35, 36, 37, 39, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[28, 31, 33, 36, 37, 39, 41]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[24, 28, 36, 37, 39, 40, 41]
value=
0
in here simulate
leaf node = 


in here simulate
leaf node = 
[]
leaf node allowed Actions
[28, 29, 31, 32, 34, 37, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[22, 24, 34, 35, 37, 39, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[29, 30, 31, 32, 34, 35, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[24, 27, 29, 35, 37, 39, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[24, 29, 30, 34, 35, 39, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[22, 30, 31, 34, 35, 39, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[28, 29, 30, 31, 34, 39, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[27, 29, 31, 32, 35, 37, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[24, 29, 30, 34, 35, 39, 40]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[22, 24, 34, 35, 37, 39, 40]
value=
0
in here simulate
leaf node = 


in here simulate
leaf node = 
[]
leaf node allowed Actions
[17, 26, 29, 34, 35, 37, 39]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[17, 27, 29, 33, 35, 37, 39]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[22, 24, 33, 34, 35, 37, 39]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[17, 29, 32, 33, 34, 35, 37]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[28, 29, 30, 31, 33, 34, 39]
value=
0
vaules of Q of every edge=
[ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.         -0.03243699
  0.          0.06444561  0.         -0.01108892 -0.01942809 -0.00967597
  0.         -0.00510335  0.          0.04616505  0.          0.        ]
in here 

0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[10, 25, 29, 33, 34, 35, 37]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[24, 26, 29, 30, 32, 34, 35]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[22, 24, 27, 32, 33, 35, 37]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[21, 24, 29, 32, 33, 34, 37]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[24, 28, 29, 30, 32, 33, 34]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[24, 28, 29, 30, 32, 33, 34]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[20, 24, 29, 32, 33, 35, 37]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[22, 24, 28, 32, 33, 34, 37]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[22, 24, 30, 32, 33, 34, 35]
value=
0
in here simulate
leaf node = 
[]
leaf node allowed Actions
[24, 27, 28, 29, 32, 33, 37]
value=
0
in here simulate
leaf node =

KeyboardInterrupt: 