In [None]:
import keras
from keras.layers import Input, Dense, Conv2D, Flatten, BatchNormalization, Activation
from keras.layers.merge import concatenate
from keras.models import Model
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from datetime import datetime
import sqlite3
from blosc import decompress
from msgpack import unpackb
import pandas as pd
import numpy as np
from scipy.stats import entropy

import matplotlib.pyplot as plt
%matplotlib inline

# Load data

In [None]:
con = sqlite3.connect("games.sqlite")

In [None]:
def unpack_state(row):
    vec = unpackb(decompress(row.board_state))
    arr = np.array(np.reshape(np.frombuffer(vec, np.uint8), (8, 4, 8)).T, np.float32)
    arr[:, :, 7] /= 100
    return [arr]

In [None]:
def unpack_moves(row):
    vec = unpackb(decompress(row.mcts_moves))
    return [np.reshape(np.array(vec, np.float32), (4, 4, 8)).T]

In [None]:
positions = pd.read_sql_query("select positions.*, games.outcome from positions, games where positions.game_id=games.id", con)
positions['state_tensor'] = positions.apply(unpack_state, axis=1)
positions['moves_tensor'] = positions.apply(unpack_moves, axis=1)
len(positions)

In [None]:
positions.head()

## Plot move probabilities

In [None]:
def get_board_coords(layer, vals=None):
    if vals is None:
        vals = layer
    x, y = np.nonzero(layer)
    v = vals[x, y]
    x += 1
    for i, xi in enumerate(x):
        if xi%2 == 1:
            y[i] = y[i]*2 + 2
        else:
            y[i] = y[i]*2 + 1
    return x, y, v

In [None]:
def plot_moves(state_tensor, moves_tensor):
    for i, sym in [(0, 'ro'), (1, 'ko'), (2, 'r*'), (3, 'k*')]:
        x, y, _ = get_board_coords(state_tensor[:, :, i])
        plt.plot(y, x, sym, markersize=20)

    l = 0.7
    cm = plt.get_cmap('cool')
    for i, (dx, dy) in [(0, (-l, -l)), (1, (l, -l)), (2, (-l, l)), (3, (l, l))]:
        x, y, p = get_board_coords(moves_tensor[:, :, i] > 0.001, moves_tensor[:, :, i])
        for xi, yi, pi in zip(x, y, p):
            plt.arrow(yi, xi, dy, dx, head_width=pi, head_length=0.3, width=pi, color=cm(pi))
            plt.text(yi + dy/1.1, xi + dx/1.1, '{:.2f}'.format(pi), color='g')

    plt.grid()
    plt.gca().invert_yaxis()
    plt.xticks(np.arange(0.5, 9, 1))
    plt.yticks(np.arange(0.5, 9, 1))

In [None]:
for _, row in positions.sample(3).iterrows():
    plot_moves(row.state_tensor[0], row.moves_tensor[0])
    plt.show()

## Train/validate split

In [None]:
split = positions.groupby(lambda r: positions.loc[r].game_id % 5 == 0)
val = split.get_group(True)
train = split.get_group(False)

In [None]:
def extract_tensors(dataframe):
    inputs = []
    outcomes = []
    mcts_probs = []
    for _, row in dataframe.sample(frac=1).iterrows():
        inputs.append(row.state_tensor[0])
        outcomes.append(row.outcome)
        mcts_probs.append(row.moves_tensor[0].ravel())
    return np.array(inputs), np.array(outcomes), np.array(mcts_probs)

In [None]:
val_in, val_outcomes, val_probs = extract_tensors(val)
train_in, train_outcomes, train_probs = extract_tensors(train)

# Outcome model

In [None]:
start = Input(shape=positions.loc[0].state_tensor[0].shape)

x = start
for i in range(100):
    prev_layer = x
    x = Conv2D(4, (3, 3), padding='same', activation='relu')(prev_layer)
    x = BatchNormalization()(x)
    x = concatenate([prev_layer, x])
    
x = Conv2D(64, (1, 1), padding='same', activation='relu')(x)
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)

outcome_model = Model(start, x)

In [None]:
savedir = 'logs-outcome/' + str(datetime.now())
tbcb = TensorBoard(log_dir=savedir, histogram_freq=0, write_graph=True, write_images=True)
mccb = ModelCheckpoint(savedir+'/model.{epoch:04d}-{loss:.4f}-{val_loss:.4f}.hdf5',
                       monitor='val_loss', save_best_only=False, period=5)
redlr = ReduceLROnPlateau('loss', factor=0.1, cooldown=1, verbose=1, patience=4)
callbacks = [tbcb, mccb, redlr]

outcome_model.compile(loss='mean_squared_error', optimizer=keras.optimizers.RMSprop(2e-4), metrics=['mean_absolute_error'])
outcome_model.fit(train_in, train_outcomes, 1024, 100, verbose=1, callbacks=callbacks, validation_data=(val_in, val_outcomes))

# MCTS probability model

In [None]:
start = Input(shape=positions.loc[0].state_tensor[0].shape)

x = start
for i in range(100):
    prev_layer = x
    x = Conv2D(4, (3, 3), padding='same', activation='relu')(prev_layer)
    x = BatchNormalization()(x)
    x = concatenate([prev_layer, x])
    
x = Conv2D(64, (1, 1), padding='same', activation='relu')(x)
x = Conv2D(4, (1, 1), padding='same', activation='relu')(x)
x = Flatten()(x)
x = Activation('softmax')(x)

prob_model = Model(start, x)

In [None]:
savedir = 'logs-probs/' + str(datetime.now())
tbcb = TensorBoard(log_dir=savedir, histogram_freq=0, write_graph=True, write_images=True)
mccb = ModelCheckpoint(savedir+'/model.{epoch:04d}-{loss:.4f}-{val_loss:.4f}.hdf5',
                       monitor='val_loss', save_best_only=False, period=5)
redlr = ReduceLROnPlateau('loss', factor=0.1, cooldown=1, verbose=1, patience=4)
callbacks = [tbcb, mccb, redlr]

prob_model.compile(loss='kullback_leibler_divergence', optimizer=keras.optimizers.RMSprop(2e-4))
prob_model.fit(train_in, train_probs, 1024, 100, verbose=1, callbacks=callbacks, validation_data=(val_in, val_probs))

## Plot expected vs predicted probabilities on validation set

In [None]:
for i in range(100):
    vi = val_in[i]
    vp = np.reshape(val_probs[i], (8, 4, 4))
    vpp = np.reshape(model.predict(vi[np.newaxis]), (8, 4, 4))
    
    plt.figure(figsize=(10, 4))
    plt.subplot(1, 2, 1)
    plot_moves(vi, vp)
    plt.subplot(1, 2, 2)
    plot_moves(vi, vpp)
    plt.title("KL divergence {:.3f}".format(entropy(vp.ravel(), vpp.ravel())))
    plt.show()            

# Combined outcome/probability model

In [None]:
start = Input(shape=positions.loc[0].state_tensor[0].shape)

x = start
for i in range(100):
    prev_layer = x
    x = Conv2D(4, (3, 3), padding='same', activation='relu')(prev_layer)
    x = BatchNormalization()(x)
    x = concatenate([prev_layer, x])

common_output = x

# Move head
x = Conv2D(64, (1, 1), padding='same', activation='relu')(common_output)
x = Conv2D(4, (1, 1), padding='same', activation='relu')(x)
x = Flatten()(x)
move_probs = Activation('softmax', name='move_probs')(x)

# Outcome head
x = Conv2D(64, (1, 1), padding='same', activation='relu')(common_output)
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
x = Dense(64, activation='relu')(x)
outcome = Dense(1, activation='sigmoid', name='outcome')(x)

combined_model = Model(start, [move_probs, outcome])

In [None]:
SVG(model_to_dot(combined_model, True).create(prog='dot', format='svg'))

In [None]:
savedir = 'logs-combined/' + str(datetime.now())
tbcb = TensorBoard(log_dir=savedir, histogram_freq=0, write_graph=True, write_images=True)
mccb = ModelCheckpoint(savedir+'/model.{epoch:04d}-{loss:.4f}-{val_loss:.4f}.hdf5',
                       monitor='val_loss', save_best_only=False, period=5)
redlr = ReduceLROnPlateau('loss', factor=0.1, cooldown=1, verbose=1, patience=4)
callbacks = [tbcb, mccb, redlr]

combined_model.compile(loss={'move_probs': 'kullback_leibler_divergence', 'outcome': 'mean_squared_error'},
                   loss_weights={'move_probs': 0.5, 'outcome': 0.5},
                   optimizer=keras.optimizers.RMSprop(2e-4))
combined_model.fit(train_in, [train_probs, train_outcomes], 1024, 100, verbose=1,
               callbacks=callbacks, validation_data=(val_in, [val_probs, val_outcomes]))