In [124]:
import math
import numpy as np

from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense, Reshape, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint

from gpaulo.game import suggest
from gpaulo.train import mutate_data

In [132]:
# load data
data = np.load('data/tictactoe.npz')['data']

# strategy data
favs = np.array([
    # center start
    [[0,0,0, 0,-1,0, 0,0,0], [0,0,0, 0,0,0, 0,0,1]],
    [[0,0,-1, 0,1,0, 0,0,0], [1,0,0, 0,0,0, 0,0,1]],
    [[1,0,-1, 0,1,0, 0,0,-1], [0,0,0, 0,0,1, 0,0,0]],
    [[1,0,-1, -1,1,1, 0,0,-1], [0,0,0, 0,0,0, 0,1,0]],

    # my strategy (classic)
    [[0,0,0, 0,-1,0, 1,0,0], [0,0,1, 0,0,0, 0,0,0]],
    [[0,0,1, 0,-1,0, 1,0,-1], [1,0,0, 0,0,0, 0,0,0]],
    [[-1,0,1, 0,-1,0, 1,0,0], [0,0,0, 0,0,0, 0,0,1]],

    [[1,0,1, -1,-1,0, 1,0,-1], [0,1,0, 0,0,0, 0,0,0]],
    [[1,-1,1, 0,-1,0, 1,0,-1], [0,0,0, 1,0,0, 0,0,0]],
    [[-1,0,1, 0,-1,-1, 1,0,1], [0,0,0, 0,0,0, 0,1,0]],
    [[-1,0,1, 0,-1,0, 1,-1,1], [0,0,0, 0,0,1, 0,0,0]],

    # my strategy (reverse corner)
    [[0,0,-1, 0,0,0, 1,0,0], [1,0,0, 0,0,0, 0,0,1]],
    [[1,0,-1, -1,0,0, 1,0,0], [0,0,0, 0,0,0, 0,0,1]],
    [[0,0,-1, 0,0,0, 1,-1,1], [1,0,0, 0,0,0, 0,0,0]],

    [[1,0,-1, -1,0,0, 1,-1,1], [0,0,0, 1,1,0, 0,0,0]],
    [[1,0,-1, 0,-1,0, 1,-1,1], [0,0,0, 1,1,0, 0,0,0]],
    [[1,0,-1, -1,0,0, 1,-1,1], [0,0,0, 0,1,0, 0,1,0]],
    [[1,0,-1, -1,-1,0, 1,0,1], [0,0,0, 0,1,0, 0,1,0]],

    # my strategy (another corners)
    [[0,0,0, 0,0,0, 1,0,-1], [0,0,1, 0,0,0, 0,0,0]],
    [[-1,0,0, 0,0,0, 1,0,0], [0,0,1, 0,0,0, 0,0,0]],
    [[-1,0,1, 0,-1,0, 1,0,0], [0,0,0, 0,0,0, 0,0,1]],
    [[0,0,1, 0,-1,0, 1,0,-1], [1,0,0, 0,0,0, 0,0,0]],

    [[1,-1,1, 0,-1,0, 1,0,-1], [0,0,0, 1,0,0, 0,0,0]],
    [[1,0,1, -1,-1,0, 1,0,-1], [0,1,0, 0,0,0, 0,0,0]],
    [[-1,0,1, 0,-1,-1, 1,0,1], [0,0,0, 0,0,0, 0,1,0]],
    [[-1,0,1, 0,-1,0, 1,-1,1], [0,0,0, 0,0,1, 0,0,0]],

    [[-1,-1,1, 0,0,0, 1,0,0], [0,0,0, 0,1,0, 0,0,0]],
    [[-1,0,1, 0,0,0, 1,0,-1], [0,0,0, 0,1,0, 0,0,0]],
    
    # my strategy (another cells)
    [[0,0,0, 0,0,0, 1,-1,0], [0,0,0, 0,1,0, 0,0,0]],
    [[0,0,0, 0,0,-1, 1,0,0], [0,0,0, 0,1,0, 0,0,0]],
    [[0,0,0, -1,0,0, 1,0,0], [0,0,0, 0,1,0, 0,0,0]],
    [[0,-1,0, 0,0,0, 1,0,0], [0,0,0, 0,1,0, 0,0,0]],

    [[0,0,-1, 0,1,0, 1,-1,0], [1,0,0, 0,0,0, 0,0,0]],
    [[0,0,-1, 0,1,-1, 1,0,0], [0,0,0, 0,0,0, 0,0,1]],
    [[0,0,-1, -1,1,0, 1,0,0], [0,0,0, 0,0,0, 0,0,1]],
    [[0,-1,-1, 0,1,0, 1,0,0], [1,0,0, 0,0,0, 0,0,0]],

    [[1,0,-1, -1,1,0, 1,-1,0], [0,0,0, 0,0,0, 0,0,1]],
    [[1,-1,-1, -1,1,0, 1,0,0], [0,0,0, 0,0,0, 0,0,1]],
    [[1,0,-1, 0,1,0, 1,-1,-1], [0,0,0, 1,0,0, 0,0,0]],
    [[1,-1,-1, 0,1,0, 1,0,-1], [0,0,0, 1,0,0, 0,0,0]],
    [[-1,0,-1, 0,1,-1, 1,0,1], [0,0,0, 0,0,0, 0,1,0]],
    [[-1,0,-1, -1,1,0, 1,0,1], [0,0,0, 0,0,0, 0,1,0]],
    [[0,0,-1, 0,1,-1, 1,-1,1], [1,0,0, 0,0,0, 0,0,0]],
    [[0,0,-1, -1,1,0, 1,-1,1], [1,0,0, 0,0,0, 0,0,0]],
])
# favs = mutate_data(favs)
favs = np.concatenate([favs] * 150)

# fixes
fixes = np.array([
    [[1,0,0,-1,1,0,-1,0,-1], [0,0,0,0,0,0,0,1,0]],
    [[0,0,0,-1,-1,0,0,0,1], [0,0,0,0,0,1,0,0,0]],
    [[1,-1,-1, 0,1,0, 0,0,-1], [0,0,0,0,0,1,0,0,0]],
    [[0,0,0, -1,1,0, -1,1,0], [1,0,0, 0,0,0, 0,0,0]],
    [[0,-1,-1, 0,1,0, 1,0,0], [1,0,0, 0,0,0, 0,0,0]],
    [[0,0,1, 0,1,-1, -1,0,-1], [0,0,0, 0,0,0, 0,1,0]],
    [[0,0,-1, 0,1,-1, 1,-1,0], [0,0,0, 0,0,0, 0,0,1]],
    [[0,0,0, 0,-1,-1, 0,0,1], [0,0,0, 1,0,0, 0,0,0]],

    # counter strategy
    [[0,0,0, 0,0,0, -1,0,0], [0,0,0, 0,1,0, 0,0,0]],
    [[0,0,-1, 0,0,0, 0,0,0], [0,0,0, 0,1,0, 0,0,0]],
    [[-1,0,0, 0,0,0, 0,0,0], [0,0,0, 0,1,0, 0,0,0]],
    [[0,0,0, 0,0,0, 0,0,-1], [0,0,0, 0,1,0, 0,0,0]],

    [[-1,0,0, 0,1,0, 0,0,-1], [0,1,0, 1,0,1, 0,1,0]],
    [[0,0,-1, 0,1,0, -1,0,0], [0,1,0, 1,0,1, 0,1,0]],

    [[-1,1,0, 0,1,0, 0,-1,-1], [0,0,0, 0,0,0, 1,0,0]],
    [[-1,0,0, 1,1,-1, 0,0,-1], [0,0,1, 0,0,0, 0,0,0]],
    [[-1,0,0, -1,1,1, 0,0,-1], [0,0,0, 0,0,0, 1,0,0]],
    [[-1,-1,0, 0,1,0, 0,1,-1], [0,0,1, 0,0,0, 0,0,0]],

    [[-1,1,-1, 0,1,0, 1,-1,-1], [0,0,0, 0,0,1, 0,0,0]],
    [[-1,0,1, 1,1,-1, -1,0,-1], [0,0,0, 0,0,0, 0,1,0]],
    [[-1,0,-1, -1,1,1, 1,0,-1], [0,1,0, 0,0,0, 0,0,0]],
    [[-1,-1,1, 0,1,0, -1,1,-1], [0,0,0, 1,0,0, 0,0,0]],
])

# comment to get 98.51%
# fixes = mutate_data(fixes)
fixes = np.concatenate([fixes] * 150)

# merge data
data = np.concatenate([ data, favs, fixes ])
np.random.shuffle(data)

# get train data
train_x, train_y = data[:, 0], data[:, 1]

# validation data
np.random.shuffle(data)
nval = math.ceil(data.shape[0] * .1)
test_x, test_y = data[:nval, 0], data[:nval, 1]

In [133]:
# build neural network
nn = Sequential()
nn.add(Dense(9, input_dim=9))
nn.add(Dense(27, activation="tanh"))
nn.add(Dense(81, activation="tanh"))
nn.add(Dropout(.1))
nn.add(Dense(27, activation="tanh"))
nn.add(Dense(9, activation="sigmoid"))

nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy'])
nn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_139 (Dense)            (None, 9)                 90        
_________________________________________________________________
dense_140 (Dense)            (None, 27)                270       
_________________________________________________________________
dense_141 (Dense)            (None, 81)                2268      
_________________________________________________________________
dropout_19 (Dropout)         (None, 81)                0         
_________________________________________________________________
dense_142 (Dense)            (None, 27)                2214      
_________________________________________________________________
dense_143 (Dense)            (None, 9)                 252       
Total params: 5,094
Trainable params: 5,094
Non-trainable params: 0
_________________________________________________________________


In [None]:
# checkpoint and early stop
callbacks = [
    ModelCheckpoint("weights.best.hdf5", monitor="val_acc", verbose=1, save_best_only=True, mode="max"),
    EarlyStopping(monitor="val_acc", patience=50, mode="max"),
]

# train network
nn.fit(train_x, train_y, epochs=500, batch_size=32, validation_data=(test_x, test_y), callbacks=callbacks)

# evaluate the network
scores = nn.evaluate(train_x, train_y)
print("\n%s: %.2f%%" % (nn.metrics_names[1], scores[1]*100))

Train on 29583 samples, validate on 2959 samples
Epoch 1/500

Epoch 00001: val_acc improved from -inf to 0.87158, saving model to weights.best.hdf5
Epoch 2/500

Epoch 00002: val_acc improved from 0.87158 to 0.88791, saving model to weights.best.hdf5
Epoch 3/500

Epoch 00003: val_acc improved from 0.88791 to 0.90173, saving model to weights.best.hdf5
Epoch 4/500

Epoch 00004: val_acc improved from 0.90173 to 0.90811, saving model to weights.best.hdf5
Epoch 5/500

Epoch 00005: val_acc improved from 0.90811 to 0.91743, saving model to weights.best.hdf5
Epoch 6/500

Epoch 00006: val_acc improved from 0.91743 to 0.92208, saving model to weights.best.hdf5
Epoch 7/500

Epoch 00007: val_acc improved from 0.92208 to 0.92584, saving model to weights.best.hdf5
Epoch 8/500

Epoch 00008: val_acc improved from 0.92584 to 0.92820, saving model to weights.best.hdf5
Epoch 9/500

Epoch 00009: val_acc improved from 0.92820 to 0.93380, saving model to weights.best.hdf5
Epoch 10/500

Epoch 00010: val_acc i


Epoch 00037: val_acc did not improve from 0.96346
Epoch 38/500

Epoch 00038: val_acc improved from 0.96346 to 0.96403, saving model to weights.best.hdf5
Epoch 39/500

Epoch 00039: val_acc improved from 0.96403 to 0.96489, saving model to weights.best.hdf5
Epoch 40/500

Epoch 00040: val_acc improved from 0.96489 to 0.96493, saving model to weights.best.hdf5
Epoch 41/500

Epoch 00041: val_acc improved from 0.96493 to 0.96594, saving model to weights.best.hdf5
Epoch 42/500

Epoch 00042: val_acc did not improve from 0.96594
Epoch 43/500

Epoch 00043: val_acc improved from 0.96594 to 0.96696, saving model to weights.best.hdf5
Epoch 44/500

Epoch 00044: val_acc improved from 0.96696 to 0.96729, saving model to weights.best.hdf5
Epoch 45/500

Epoch 00045: val_acc did not improve from 0.96729
Epoch 46/500

Epoch 00046: val_acc improved from 0.96729 to 0.96744, saving model to weights.best.hdf5
Epoch 47/500

Epoch 00047: val_acc did not improve from 0.96744
Epoch 48/500

Epoch 00048: val_acc i

In [131]:
board = np.array([
    0,  0, 0,
    0,  -1, 0,
   1,  0, 0
])
print(suggest(nn, board))

(0, 2)


In [None]:
nn.save("model2.h5")