### Train a Policy Network

This notebook trains a policy network, which can predict next move from current features.

In [1]:
import glob
import os
import numpy as np
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Activation, Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D
from tensorflow.keras.models import load_model

from visualization import SvgGoBoard
from train_utils import ParseForPolicy, ParseForPolicy, CreateDataset

tf.enable_eager_execution()

In [5]:
BOARD_SIZE = 19
NUM_CHANNELS = 7
BATCH_SIZE = 64
SHUFFLE_BUFFER = 12800


def BuildModel():
    input = Input(shape=(BOARD_SIZE, BOARD_SIZE, NUM_CHANNELS), name="go_input")
    x = Conv2D(filters=64, kernel_size=(7,7), activation="relu", padding="same",
               data_format='channels_last')(input)
    x = Conv2D(filters=64, kernel_size=(7,7), activation="relu", padding="same",
               data_format='channels_last')(x)
    x = Conv2D(filters=32, kernel_size=(5,5), activation="relu", padding="same",
               data_format='channels_last')(x)
    x = Conv2D(filters=32, kernel_size=(5,5), activation="relu", padding="same",
               data_format='channels_last')(x)
    x = Dropout(0.5)(x)
    x = Conv2D(filters=32, kernel_size=(5,5), activation="relu", padding="same",
               data_format='channels_last')(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(512)(x)
    x = Dense(BOARD_SIZE*BOARD_SIZE)(x)
    policy_output = Activation("softmax", name="policy_output")(x)

    model = keras.models.Model(inputs=input, outputs=[policy_output])
    return model

In [6]:
def CompileModel(model):
    model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    print(model.summary())
    return model

In [7]:
model = BuildModel()
model = CompileModel(model)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
go_input (InputLayer)        (None, 19, 19, 7)         0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 19, 19, 64)        22016     
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 19, 19, 64)        200768    
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 19, 19, 32)        51232     
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 19, 19, 32)        25632     
_________________________________________________________________
dropout_2 (Dropout)          (None, 19, 19, 32)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 19, 19, 32)        25632     
__________

In [8]:
train_set = CreateDataset(glob.glob("/home/tc/SGF/rio/training/data-0000.rio"), BATCH_SIZE)
train_set = train_set.shuffle(buffer_size=SHUFFLE_BUFFER)

In [9]:
model.fit(train_set, epochs=10, steps_per_epoch=1000)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f72502fe128>

In [10]:
test_set = CreateDataset(["/home/tc/SGF/rio/test/data-0000.rio"], BATCH_SIZE)
loss, acc = model.evaluate(test_set, steps=1000)
print("test loss: %f" % loss)
print("test accuracy: %f" % acc)

test loss: 5.665090
test accuracy: 0.008234
