In [1]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import tensorflow.keras.regularizers as reg
import keras_tuner as kt
import numpy as np

import kaggle_functions as kaggle
import importlib
importlib.reload(kaggle);

In [2]:
train_dataset, valid_dataset, valid_labels = kaggle.load_train_as_dataset()
x_test_real = kaggle.load_test_set()

batch_size = 128
epoch_length = int(len(train_dataset) / batch_size)
train_dataset_augmented = kaggle.augment_dataset(train_dataset, batch_size)

model_number = 'model8'

### VGG-style with residual blocks

In [7]:
class VGGRes(kt.HyperModel):
    def residual_module(self, input, filters, stride=1, bottleneck=0, l2_reg=0.00001, batch_norm=0.99):
        # Applies bottleneck if necessary, to reduce dimensions
        if bottleneck > 0:
            conv_0 = layers.Conv2D(
                bottleneck, kernel_size=(1,1),
                padding='same', activation='relu',
                kernel_regularizer=reg.l2(l2_reg), bias_regularizer=reg.l2(l2_reg),
                kernel_initializer='he_normal')(input)
        else:
            bottleneck = filters
            conv_0 = input

        # Applies relu convolution, then linear convolution before shortcut
        conv_1 = layers.Conv2D(
            bottleneck, kernel_size=(3,3), strides=(stride, stride),
            padding='same', activation='relu',
            kernel_regularizer=reg.l2(l2_reg), bias_regularizer=reg.l2(l2_reg),
            kernel_initializer='he_normal')(conv_0)
        conv_2 = layers.Conv2D(
            filters, kernel_size=(3,3), 
            padding='same', activation='linear',
            kernel_regularizer=reg.l2(l2_reg), bias_regularizer=reg.l2(l2_reg),
            kernel_initializer='he_normal')(conv_1)
        
        # Ensures shortcut is correct depth by adding a 1x1 convolution
        if input.shape[-1] != filters:
            shortcut = layers.Conv2D(
                filters, kernel_size=(1,1), strides=(stride,stride),
                padding='same', activation='relu',
                kernel_regularizer=reg.l2(l2_reg), bias_regularizer=reg.l2(l2_reg),
                kernel_initializer='he_normal')(input)
        else:
            shortcut = input

        # Adds shortcut
        addition = layers.add([conv_2, shortcut])

        # Batch Norm is performed in the original paper
        addition = layers.BatchNormalization(momentum=batch_norm)(addition)

        activation = layers.Activation('relu')(addition)
        return activation

    def conv_layer(self, input, filters, stride=1, kernel=3, l2_reg=0, padding='same'):
        return layers.Conv2D(
            filters, kernel_size=(kernel,kernel), strides=(stride,stride), 
            padding=padding, activation='relu',
            kernel_regularizer=reg.l2(l2_reg), bias_regularizer=reg.l2(l2_reg),
            kernel_initializer='he_uniform')(input)

    def build(self, hyperparameters):
        # Tunable hyperparameters
        if hyperparameters is not None: 
            dense_l2_reg = hyperparameters.Float('dense_l2_reg', 0.00001, 0.001, sampling='log')
            dense_dropout = hyperparameters.Float('dense_dropout', 0.3, 0.5, step=0.1)
        else:
            dense_l2_reg = 0.001
            dense_dropout = 0.3

        # Fixed hyperparameters
        learning_rate = 0.001

        input_layer = layers.Input(shape=(96, 96, 1))

        output = self.conv_layer(input_layer, 32, stride=2)
        output = self.conv_layer(output, 32)
        output = layers.BatchNormalization()(output)
        
        output = self.conv_layer(output, 64, stride=2)
        output = self.conv_layer(output, 64)
        output = layers.BatchNormalization()(output)

        output = self.residual_module(output, 128, stride=2)
        output = self.residual_module(output, 128)

        output = self.residual_module(output, 256, stride=2, bottleneck=128)
        output = self.residual_module(output, 256, bottleneck=128)

        # Final output
        output = layers.Flatten()(output)
        output = layers.Dropout(dense_dropout / 2)(output)
        output = layers.Dense(
            128, activation='relu', kernel_initializer='he_uniform',
            kernel_regularizer=keras.regularizers.l2(dense_l2_reg),
            bias_regularizer=keras.regularizers.l2(dense_l2_reg))(output)
        output = layers.Dropout(dense_dropout)(output) 
        output = layers.Dense(
            11, kernel_regularizer=keras.regularizers.l2(dense_l2_reg),
            bias_regularizer=keras.regularizers.l2(dense_l2_reg))(output)

        model = keras.models.Model(inputs=input_layer, outputs=output)

        # Create model
        model.compile(
            optimizer=keras.optimizers.Nadam(learning_rate),
            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['accuracy'])

        return model

# VGGRes().build(None).summary()

In [8]:
# Hyperparameter tuning
# Took 5hrs for 8 trials with 250 epochs and LR decrease

reload_tuner = False
tuner_filepath = 'hypertuner_2021-11-28'

tuner_callbacks = [
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=20),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=20, min_lr=0.00001)
]

tuner = kt.RandomSearch(VGGRes(),
    objective='val_accuracy',
    max_trials=8,
    seed=10,
    directory=f'models/{model_number}',
    project_name=tuner_filepath,
    overwrite=(not reload_tuner))

tuner.search_space_summary()

if reload_tuner:
    tuner.reload()
else:
    tuner.search(
        train_dataset_augmented, 
        validation_data=valid_dataset.batch(128).cache(),
        epochs=250, steps_per_epoch=epoch_length,
        callbacks=tuner_callbacks, verbose=1)

tuner.results_summary()

# model = tuner.get_best_models(2)[1]

Trial 8 Complete [00h 27m 14s]
val_accuracy: 0.6181665062904358

Best val_accuracy So Far: 0.6518082618713379
Total elapsed time: 05h 02m 44s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in models/model8\hypertuner_2021-11-28
Showing 10 best trials
Objective(name='val_accuracy', direction='max')
Trial summary
Hyperparameters:
dense_l2_reg: 1e-05
dense_dropout: 0.4000000000000001
learning_rate: 0.001
Score: 0.6518082618713379
Trial summary
Hyperparameters:
dense_l2_reg: 0.001
dense_dropout: 0.4000000000000001
learning_rate: 0.001
Score: 0.6181665062904358
Trial summary
Hyperparameters:
dense_l2_reg: 0.01
dense_dropout: 0.5000000000000001
learning_rate: 0.001
Score: 0.6009251475334167
Trial summary
Hyperparameters:
dense_l2_reg: 0.0001
dense_dropout: 0.6000000000000001
learning_rate: 0.001
Score: 0.5588729977607727
Trial summary
Hyperparameters:
dense_l2_reg: 1e-05
dense_dropout: 0.6000000000000001
learning_rate: 0.0001
Score: 0.5285954475402832
Trial summary
Hyperparame

In [None]:
# Fit model 
print('Building new model')
model, history = kaggle.train_model(
    VGGRes().build(None), train_dataset_augmented, valid_dataset, 
    epochs=200, valid_patience=30, epoch_length=epoch_length)

In [None]:
# Save model
model_name = 'VGGRes_1'
model.save(f'models/{model_number}/VGGRes_1')

# Plot model statistics during training
kaggle.plot_model_history(history, [['accuracy', 'val_accuracy'], ['loss', 'val_loss']])

In [None]:
# Fine-tune model
print('Fine-tuning model')
fine_model, history = kaggle.fine_tune_model_filepath(
    f'models/{model_number}/{model_name}',
    train_dataset.batch(128).cache(), valid_dataset, 
    epochs=1, learning_rate=0.0001)

In [None]:
# Save fine-tuned model
fine_model.save(f'models/{model_number}/VGGRes_2')

### Evaluate model

In [None]:
model_to_evaluate = model_name # Can be changed to evaluate older models
try:
    loaded_model = keras.models.load_model(f'models/{model_number}/{model_to_evaluate}')
except:
    model = model

test_pred_raw = model.predict(valid_dataset.batch(128))
test_pred = np.argmax(test_pred_raw, axis=1)

kaggle.print_accuracy(valid_labels, test_pred)
kaggle.plot_confusion_matrix(valid_labels, test_pred)

### Get labels for Kaggle

In [None]:
true_test_pred = np.argmax(model.predict(x_test_real), axis=1)

kaggle.save_test_pred(f'models/{model_number}/{model_name}_test_pred.csv', true_test_pred)