In [1]:
# -*- coding: utf-8 -*-
# %matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
np.set_printoptions(suppress=True)

from shutil import copyfile
import random
from importlib import reload


from keras.utils import plot_model

from game import Game, GameState
from agent import Agent
from memory import Memory
from model import Residual_CNN
from funcs import playMatches, playMatchesBetweenVersions

import loggers as lg

from settings import run_folder, run_archive_folder
import initialise
import pickle
print ("Ready OK.")

Using TensorFlow backend.


Ready OK.


In [5]:
env = Game()

# If loading an existing neural network, copy the config file to root
if initialise.INITIAL_RUN_NUMBER != None:
    copyfile(run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py')

import config

######## LOAD MEMORIES IF NECESSARY ########

if initialise.INITIAL_MEMORY_VERSION == None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...')
    memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p",   "rb" ) )

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)

print('\n')

######## CREATE THE PLAYERS ########

current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)

LOADING MODEL VERSION 9...




In [14]:


gs = GameState(np.array([
    0,0,0,0,0,
    0,0,0,0,0,
    0,0,0,0,0,
    0,0,0,0,0,
    0,0,0,0,0,
]), 1)

preds = current_player.get_preds(gs)
headValue = preds[0]
qValue = preds[1]
moves = preds[2]
print(preds, np.argmax(preds[1]))
bestMove = moves[np.argmax(qValue)]
print ('bestMove is: ', bestMove)

(array([-0.41520002], dtype=float32), array([ 0.05439956,  0.04435591,  0.03287387,  0.03674513,  0.02975684,
        0.04134964,  0.03579015,  0.04859267,  0.03557694,  0.02906657,
        0.03348422,  0.0382243 ,  0.08384039,  0.03856984,  0.03617097,
        0.03059375,  0.04515274,  0.04630377,  0.0427826 ,  0.03405229,
        0.03678911,  0.04139983,  0.03424213,  0.03180692,  0.03807981], dtype=float32), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]) 12
bestMove is:  12


In [55]:
# 第9版本，即知道点天元了

def getBestMove(board, playerTurn):
    # playerTurn 1, X; -1, O
    gs = GameState(np.array(board), playerTurn)

    preds = current_player.get_preds(gs)
    # print (preds)
    headValue = preds[0]
    qValue = preds[1]
    moves = preds[2]
    
    bestMove = np.argmax(qValue)
    assert len(np.where (moves ==bestMove)[0] == 1)
    newBoard = np.array(board)
    newBoard[bestMove] = playerTurn
    return bestMove, newBoard

initial = [
    0,0,0,0,0,
    0,0,0,0,0,
    0,0,0,0,0,
    0,0,0,0,0,
    0,0,0,0,0,
]

assert getBestMove(initial, 1)[0] == 12
print (repr(getBestMove(initial, 1)[1]))

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0])


In [62]:
from IPython.display import HTML, display
import tabulate
import numpy as np 

def showBoard(board):
    table = np.reshape(np.array(board), (5,5))
    display(HTML(tabulate.tabulate(table, tablefmt='html')))

In [66]:
nextBoard = [
    0,0,0,0,0,
    0,0,0,0,0,
    0,0,1,-1,0,
    0,0,0,0,0,
    0,0,0,0,0,
]
assert nextBoard == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0]
showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,0,0,0,0
0,0,0,0,0
0,0,1,-1,0
0,0,0,0,0
0,0,0,0,0


In [68]:
nextBoard = [
    1,0,0,0,0,
    0,0,-1,0,0,
    0,0,1,-1,0,
    0,0,0,0,0,
    0,0,0,0,0,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,0,0,0
0,0,-1,0,0
0,0,1,-1,0
0,0,0,0,0
0,0,0,0,0


In [70]:
nextBoard = [
    1,1,-1,0,0,
    0,0,-1,0,0,
    0,0,1,-1,0,
    0,0,0,0,0,
    0,0,0,0,0,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,-1,0,0
1,0,-1,0,0
0,0,1,-1,0
0,0,0,0,0
0,0,0,0,0


In [71]:
nextBoard = [
    1,1,-1,0,0,
    1,0,-1,-1,0,
    0,0,1,-1,0,
    0,0,0,0,0,
    0,0,0,0,0,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,-1,1,0
1,0,-1,-1,0
0,0,1,-1,0
0,0,0,0,0
0,0,0,0,0


In [72]:
nextBoard = [
    1,1,-1,1,0,
    1,0,-1,-1,0,
    0,0,1,-1,-1,
    0,0,0,0,0,
    0,0,0,0,0,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,-1,1,0
1,0,-1,-1,0
0,0,1,-1,-1
0,1,0,0,0
0,0,0,0,0


In [73]:
nextBoard = [
    1,1,-1,1,0,
    1,0,-1,-1,-1,
    0,0,1,-1,-1,
    0,1,0,0,0,
    0,0,0,0,0,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,-1,1,0
1,0,-1,-1,-1
0,0,1,-1,-1
0,1,0,0,0
0,1,0,0,0


In [74]:
nextBoard = [
    1,1,-1,1,0,
    1,0,-1,-1,-1,
    0,0,1,-1,-1,
    0,1,0,0,0,
    0,1,-1,0,0,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,-1,1,0
1,0,-1,-1,-1
0,0,1,-1,-1
0,1,0,0,0
0,1,-1,1,0


In [75]:
nextBoard = [
    1,1,-1,1,0,
    1,0,-1,-1,-1,
    0,0,1,-1,-1,
    0,1,0,-1,0,
    0,1,-1,1,0,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,-1,1,0
1,0,-1,-1,-1
0,0,1,-1,-1
0,1,0,-1,1
0,1,-1,1,0


In [76]:
nextBoard = [
    1,1,-1,1,0,
    1,0,-1,-1,-1,
    -1,0,1,-1,-1,
    0,1,0,-1,1,
    0,1,-1,1,0,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,-1,1,0
1,0,-1,-1,-1
-1,0,1,-1,-1
0,1,0,-1,1
0,1,-1,1,1


In [77]:
nextBoard = [
    1,1,-1,1,-1,
    1,0,-1,-1,-1,
    -1,0,1,-1,-1,
    0,1,0,-1,1,
    0,1,-1,1,1,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,-1,1,-1
1,0,-1,-1,-1
-1,0,1,-1,-1
1,1,0,-1,1
0,1,-1,1,1


In [79]:
nextBoard = [
    1,1,-1,1,-1,
    1,0,-1,-1,-1,
    -1,-1,1,-1,-1,
    1,1,0,-1,1,
    0,1,-1,1,1,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,-1,1,-1
1,0,-1,-1,-1
-1,-1,1,-1,-1
1,1,0,-1,1
1,1,-1,1,1


In [81]:
nextBoard = [
    1,1,-1,1,-1,
    1,-1,-1,-1,-1,
    -1,-1,1,-1,-1,
    1,1,0,-1,1,
    1,1,-1,1,1,
]

showBoard(getBestMove(nextBoard, 1)[1])

0,1,2,3,4
1,1,-1,1,-1
1,-1,-1,-1,-1
-1,-1,1,-1,-1
1,1,1,-1,1
1,1,-1,1,1


In [83]:
def checkBoardScore(board):
    gs = GameState(board, -1)
    gs.render()
    assert gs._checkForEndGame() == 1
    value = gs._getValue()
    print (value)
    
score = checkBoardScore(getBestMove(nextBoard, 1)[1]) 


['X', 'X', 'O', 'X', 'O']
['X', 'O', 'O', 'O', 'O']
['O', 'O', 'X', 'O', 'O']
['X', 'X', 'X', 'O', 'X']
['X', 'X', 'O', 'X', 'X']
(1, 10, 1)


In [34]:
a = np.array([10, 12, 9])
print (len(np.where (a ==12)[0]))

print(len(np.where (a ==13)[0]))


1
0


In [52]:
nextBoard = [
    1,-1,1,0,0,
    0,0,-1,0,1,
    -1,0,1,-1,-1,
    0,1,0,-1,-1,
    0,0,1,0,1
]
print (getBestMove(nextBoard, 1))

15


In [59]:
! pip install tabulate

Collecting tabulate
[?25l  Downloading https://files.pythonhosted.org/packages/12/c2/11d6845db5edf1295bc08b2f488cf5937806586afe42936c3f34c097ebdc/tabulate-0.8.2.tar.gz (45kB)
[K    100% |████████████████████████████████| 51kB 19kB/s ta 0:00:015
[?25hBuilding wheels for collected packages: tabulate
  Running setup.py bdist_wheel for tabulate ... [?25ldone
[?25h  Stored in directory: /home/jovyan/.cache/pip/wheels/2a/85/33/2f6da85d5f10614cbe5a625eab3b3aebfdf43e7b857f25f829
Successfully built tabulate
[31mtensorflow 1.3.0 has requirement tensorflow-tensorboard<0.2.0,>=0.1.0, but you'll have tensorflow-tensorboard 1.5.1 which is incompatible.[0m
Installing collected packages: tabulate
Successfully installed tabulate-0.8.2


0,1,2,3,4
0,0,0,0,0
0,0,0,0,0
0,0,1,0,0
0,0,0,0,0
0,0,0,0,0
