In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from data_generation import DenseGenerator, ChessPositionGen
import keras_tuner as kt

import datetime
%load_ext tensorboard

We'll start with the same setup as in `full-puzzle-model.ipynb`

In [2]:
# Setting paramaters on early stopping
earlystop = EarlyStopping(monitor='val_loss',
                          min_delta=0,
                          patience=5,
                          verbose=1,
                          mode='min',
                          restore_best_weights=True)

log_dir = "logs/fit/tuned" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


In [3]:
# Memory management, likely not necessary, but used as a safety as per the documentation recommendations on using GPUS

gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)


1 Physical GPUs, 1 Logical GPUs


In [4]:
train = pd.read_csv('fens/train.csv')
val = pd.read_csv('fens/val.csv')

In [5]:
train_dense_gen = DenseGenerator(train, batch_size=1024)
val_dense_gen = DenseGenerator(val, batch_size=1024)

In [5]:
train_tune_gen = ChessPositionGen(train, batch_size=512)
val_tune_gen = ChessPositionGen(val, batch_size=512)

## Multi Layer Perceptron model for comparison with the CNN versions

This model uses basic densely connected layers and uses a slight variation of the previously used data generator with no reshaping of the array.

In [6]:
dense_model = models.Sequential()
dense_model.add(layers.Dense(832, input_shape=(832,), activation='relu'))
dense_model.add(layers.Dense(64, activation='relu'))
dense_model.add(layers.Dense(64, activation='relu'))
dense_model.add(layers.Dense(1, activation='sigmoid'))
dense_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=['acc'])
dense_model.summary()

# Fitting the model
dense_history = dense_model.fit(x=train_dense_gen,
                    validation_data=val_dense_gen,
                    # steps_per_epoch=100,
                    epochs=15,
                    callbacks=[earlystop, tensorboard_callback]
                    )

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 832)               693056    
_________________________________________________________________
dense_1 (Dense)              (None, 64)                53312     
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 750,593
Trainable params: 750,593
Non-trainable params: 0
_________________________________________________________________
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [9]:
# dense_model.save('MLPmodel-Long-PB')

INFO:tensorflow:Assets written to: DenseModel-PB/assets


Initially this model was only trained for 15 epochs, then when it appeared that it might be roughly comparable with the CNN model, it was trained for another 15 (early stopped after 11, 26 epochs in total) for a more direct comparison.

In [10]:
dense_history = dense_model.fit(x=train_dense_gen,
                    validation_data=val_dense_gen,
                    epochs=15,
                    callbacks=[earlystop, tensorboard_callback])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Restoring model weights from the end of the best epoch.
Epoch 00011: early stopping


In [12]:
# dense_model.save('MLPmodel-Long.h5')

## Autotuning

For further information on the Keras autotuner, consult the documentation.

In [6]:
def model_builder(hp):
    """
    Autotuner modeling function, based off the CNN model from the full-puzzle-model notebook.
    """
    model = models.Sequential()

    # Set Convolutional layer parameters
    hp_filters = hp.Int('filters', min_value=16, max_value=128, step=8)
    hp_ksize = hp.Int('kernel_size', min_value=2, max_value=8, step=2)

    model.add(layers.Conv2D(filters=hp_filters, kernel_size=hp_ksize, padding='same', input_shape=(8,8,13), activation='relu'))
    model.add(layers.MaxPooling2D(2))
    model.add(layers.Conv2D(filters=hp_filters, kernel_size=hp_ksize, padding='same', activation='relu'))
    model.add(layers.Flatten())

    # Add dense tuning parameters
    hp_units = hp.Int('units', min_value=16, max_value=128, step=8)
    model.add(layers.Dense(units=hp_units, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=['acc'])

    return model

tuner = kt.Hyperband(model_builder, objective='val_acc', max_epochs=10, directory='tuner', project_name='CNN_tuning')


With the `model_builder` function and tuner created, let's start the search. The steps per epoch have been reduced in order to finish searching in a reasonable amount of time.

In [7]:
tuner.search(x=train_tune_gen, validation_data=val_tune_gen, steps_per_epoch=1000, callbacks=[earlystop])

best_hps=tuner.get_best_hyperparameters()[0]
print(best_hps)

Trial 30 Complete [02h 09m 36s]
val_acc: 0.8775654435157776

Best val_acc So Far: 0.8832182884216309
Total elapsed time: 02h 04m 53s
INFO:tensorflow:Oracle triggered exit
<keras_tuner.engine.hyperparameters.HyperParameters object at 0x7f4850260a90>


With parameters found, we can now train our best model.

In [16]:
tuned_model = tuner.get_best_models(num_models=1)[0]
tuned_model.build()
tuned_model.summary()

tuned_history = tuned_model.fit(x=train_tune_gen,
                    validation_data=val_tune_gen,
                    epochs=30,
                    callbacks=[earlystop, tensorboard_callback])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 8, 8, 120)         99960     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 4, 4, 120)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 4, 4, 120)         921720    
_________________________________________________________________
flatten (Flatten)            (None, 1920)              0         
_________________________________________________________________
dense (Dense)                (None, 96)                184416    
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 97        
Total params: 1,206,193
Trainable params: 1,206,193
Non-trainable params: 0
______________________________________________

In [18]:
# tuned_model.save("tuned_model-PB")

INFO:tensorflow:Assets written to: tuned_model-PB/assets
