## Experimental Test of Value Prediction

The core idea here is to use estimates of state values produced by MCTS (i.e. estimates of value under the simulation policy used in the MCTS simulations) as the targets for a value prediction network. This would ideally make for better intermediate evaluation for Minimax and better simulations for MCTS.

In [None]:
import os, datetime
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.callbacks import ReduceLROnPlateau, TensorBoard
from tensorflow.keras.layers import Conv2D, Concatenate, Dense, Flatten, MaxPooling2D
from tensorflow.keras.optimizers import Adam
import numpy as np
from Board import *
from LocalSearch import *
import matplotlib.pyplot as plt

### Preprocessing

In [None]:
piece_to_channel = {
    "king" : 3,
    "prince" : 4,
    "duke" : 5,
    "knight": 6,
    "sergeant" : 7,
    "pikemen" : 8,
    "squire" : 9,
    "archer" : 10}

# Need 11 channels
# 1. Terrain 
# 2. Castle greens (0.5 for brown, 1 for blue)
# 3. Castle interiors (0.5 for brown, 1 for blue)
# 4-11 - King, Prince, Duke, Knight, Sergeant, Pikemen, Squire, Archer (0.5 = brown of
# that piece type, 1 = blue)
def board_to_image(board):
    board_img = np.zeros((24,24,11))
    # Add terrain 
    for loc in board.rough:
        board_img[loc[0], loc[1], 0] = 0.5 
        
    for loc in board.mountains:
        board_img[loc[0], loc[1], 0] = 1.0
        
    # Add castle greens
    board_img[board.blue_castle[0][0], board.blue_castle[0][1], 1] = 1
    board_img[board.brown_castle[0][0], board.brown_castle[0][1], 1] = 0.5
    
    # Add castle interiors
    board_img[board.blue_castle[1][0], board.blue_castle[1][1], 2] = 1
    board_img[board.brown_castle[1][0], board.brown_castle[1][1], 2] = 0.5
    
    # Add piece locations
    # Blue
    for piece_loc, piece in board.blue_pieces_locations.items():
        board_img[piece_loc[0], piece_loc[1], piece_to_channel[str(piece)]] = 1
    # Brown
    for piece_loc, piece in board.brown_pieces_locations.items():
        board_img[piece_loc[0], piece_loc[1], piece_to_channel[str(piece)]] = 1
        
    return board_img
    
def view_board_image(board_img, channel):
    plt.figure()
    plt.imshow((board_img[:,:,channel] * 255), cmap="gray", vmin=0, vmax=255)

### Test the preprocessing function

In [None]:
b3 = Board()
b3.parse_terrain("terrain_3M_official_1.txt")
blue_searcher = HillClimbingFirstChoice(b3, "blue")
brown_searcher = HillClimbingFirstChoice(b3, "brown")
blue_random = blue_searcher.get_random_start()
brown_random = brown_searcher.get_random_start()
b3.place_pieces("blue", blue_random)
b3.place_pieces("brown", brown_random)
board_img = board_to_image(b3)
b3.display()

In [None]:
view_board_image(board_img, 0)

In [None]:
import sys
print(sys.getsizeof(board_img))
print(sys.getsizeof(b3.blue_pieces))

In [None]:
# Sample data
np.save("StateData/Images/2", board_img)
np.save("StateData/Turns/2", np.array([1]))
np.save("StateData/Values/2", np.array([.45]))

### Load data

In [None]:
def load_np_array(fp):
    return np.load(fp.numpy())

seed = 12345
imgs = tf.data.Dataset.list_files("StateData/Images/*", seed=seed)
imgs = imgs.map(lambda x: tf.py_function(func=load_np_array, inp=[x], Tout=tf.float32))
turns = tf.data.Dataset.list_files("StateData/Turns/*", seed=seed)
turns = turns.map(lambda x: tf.py_function(func=load_np_array, inp=[x], Tout=tf.float32))
labels = tf.data.Dataset.list_files("StateData/Values/*", seed=seed)
labels = labels.map(lambda x: tf.py_function(func=load_np_array, inp=[x], Tout=tf.float32))
final_data = tf.data.Dataset.zip(((imgs, turns), labels)).batch(2)
for i in final_data:
    print(i)

### Define the model

In [None]:
img = Input(shape=(24,24,11), name="board_img")
turn = Input(shape=(1,), name="turn")

# Per the below project, not using maxpooling
# http://cs231n.stanford.edu/reports/2015/pdfs/ConvChess.pdf
conv = Conv2D(32, kernel_size=(5,5), activation="relu", name="conv1")(img)
conv = Conv2D(64, kernel_size=(5,5), activation="relu", name="conv2")(conv)
conv = Conv2D(64, kernel_size=(5,5), activation="relu", name="conv3")(conv)
flat = Flatten()(conv)
concat = Concatenate()([flat, turn])
dense = Dense(64, activation="relu", name="dense1")(concat)
out = Dense(1, activation="sigmoid", name="dense")(dense)
# sigmoid in last layer constricts value predictions to between 0 and 1

model = Model(inputs=[img, turn], outputs=out)
model.summary()

optimizer = Adam(learning_rate=0.00001)
model.compile(optimizer=optimizer, loss="mse", metrics="mae")

### Model fit

In [None]:
# Using logdir code from tensorflow page:
# https://www.tensorflow.org/tensorboard/tensorboard_in_notebooks
logdir = os.path.join("logs", datetime.datetime.now().strftime("%m%d-%H%M%S"))
model.fit(x=final_data, epochs=100, verbose=1,
          callbacks=[TensorBoard(logdir), ReduceLROnPlateau(factor=0.5, verbose=1)])

### Tensorboard

In [None]:
%tensorboard --logdir logs

### Prediction

In [None]:
from time import time
start=time()
model.predict(final_data)
print(time()-start)