In [1]:
# -*- coding: utf-8 -*-
# %matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
np.set_printoptions(suppress=True)

from shutil import copyfile
import random
try:
    reload  # Python 2.7
except NameError:
    try:
        from importlib import reload  # Python 3.4+
    except ImportError:
        from imp import reload  # Python 3.0 - 3.3
#from importlib import reload


from keras.utils import plot_model

from test import test_class, Game, GameState, Node, Edge, MCTS, Gen_Model, Residual_CNN, User, Agent
from funcs import playMatches, playMatchesBetweenVersions
from memory import Memory
import loggers as lg

from settings import run_folder, run_archive_folder
import initialise
import pickle

Using TensorFlow backend.


In [2]:
env = Game()

In [3]:

# If loading an existing neural network, copy the config file to root
if initialise.INITIAL_RUN_NUMBER != None:
    copyfile(run_archive_folder  + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py')

import config

In [4]:
######## LOAD MEMORIES IF NECESSARY ########

if initialise.INITIAL_MEMORY_VERSION == None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...')
    memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p",   "rb" ) )


In [5]:
######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [6]:
#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

In [7]:
#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)

print('\n')






In [8]:
######## CREATE THE PLAYERS ########

current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)
#user_player = User('player1', env.state_size, env.action_size)
iteration = 0

In [13]:
player1Starts = random.randint(0,1) * 2 - 1
if player1Starts == 1:
    players = {1:{"agent": current_player, "name":current_player.name}
            , -1: {"agent": best_player, "name":best_player.name}
            }
    print(current_player.name + ' plays as X')
else:
    players = {1:{"agent": best_player, "name":best_player.name}
            , -1: {"agent": current_player, "name":current_player.name}
            }
    print(best_player.name + ' plays as X')
    print('--------------')

best_player plays as X
--------------


In [24]:
state = env.reset()
print("Allowed Actions=")
print(state.allowedActions)
#players[state.playerTurn]['agent'].act(state, 1)
action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 1)
print("Action taken=")
print(action )
print("PI of which actin needs move=")
print(pi )
print("MCTS_value=")
print(MCTS_value )
print("NN_prediction_value=")
print(NN_value )

Allowed Actions=
[35, 36, 37, 38, 39, 40, 41]
Action=
39
PI=
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.12917595
 0.14253898 0.15144766 0.14699332 0.1091314  0.18708241 0.13363029]
MCTS_value=
-0.022930259
NN_value=
[-0.02694018]


In [None]:

while 1:

    iteration += 1
    reload(lg)
    reload(config)
    
    print('ITERATION NUMBER ' + str(iteration))
    
    lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## SELF PLAY ########
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)
    print('\n')
    
    memory.clear_stmemory()

ITERATION NUMBER 4
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 5
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 6
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 7
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 8
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 9
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 10
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 111
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 112
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 113
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 114
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 115
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 116
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 2

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 217
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 218
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 219
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 220
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 221
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

ITERATION NUMBER 222
BEST PLAYER VERSION 0
SELF PLAYING 30 EPISODES...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 2