# Justin Zarkovacki 2/15/2023
# Transfer Learning MNIST -> KMNIST

# Prepare imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install optuna==3.0.3

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting optuna==3.0.3
  Downloading optuna-3.0.3-py3-none-any.whl (348 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m348.5/348.5 KB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog
  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)
Collecting cmaes>=0.8.2
  Downloading cmaes-0.9.1-py3-none-any.whl (21 kB)
Collecting alembic>=1.5.0
  Downloading alembic-1.9.4-py3-none-any.whl (210 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m210.5/210.5 KB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cliff
  Downloading cliff-4.2.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.0/81.0 KB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting importlib-metadata<5.0.0
  Downloading importlib_metadata-4.13.0-py3-none-any.whl (23 kB)
Collecting Mako
  Downloading Mako-1.2.4-py3-no

# Transfer Learning Notebook

In [None]:
import numpy as np
import os
import optuna
import random

import matplotlib
from matplotlib import pyplot as plt

import tensorflow as tf
from keras.models import Sequential
from tensorflow.keras.datasets import mnist
from keras.layers import Conv2D, Dropout, AveragePooling2D, MaxPooling2D, Flatten, Dense, GlobalAveragePooling2D, Rescaling
from keras import Input, models, backend as K
from tensorflow.keras import layers, models

print("Done!")

Done!


# Function Definitions and Variables

In [None]:
epochs = 12
img_rows, img_cols = 28, 28  # Image dimensions

def load(f):
    return np.load(f)['arr_0']
    
def initialize_data(train_im_file, test_im_file, train_lb_file, test_lb_file):
    train_images = load(train_im_file)
    test_images = load(test_im_file)
    train_labels = load(train_lb_file)
    test_labels = load(test_lb_file)
    
    if K.image_data_format() == 'channels_first':
        train_images = train_images.reshape(train_images.shape[0], 1, img_rows, img_cols)
        test_images = test_images.reshape(test_images.shape[0], 1, img_rows, img_cols)
        input_shape = (1, img_rows, img_cols)
    else:
        train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, 1)
        test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, 1)
        input_shape = (img_rows, img_cols, 1)

    train_images = train_images.astype('float32')
    test_images = test_images.astype('float32')
    train_images /= 255
    test_images /= 255
    print('{} train samples, {} test samples'.format(len(train_images), len(test_images)))
    
    return tuple([train_images, test_images, train_labels, test_labels, input_shape])

# Helper to create the graphics
def create_visuals(graph_title, model_hist, test_images, test_labels):
    accuracy_data = model_hist.history['accuracy']
    val_accuracy_data = model_hist.history['val_accuracy']

    lower_bound = min(min(accuracy_data), min(val_accuracy_data))

    plt.plot(accuracy_data, label='Train Accuracy')
    plt.plot(val_accuracy_data, label = 'Validation Accuracy')

    plt.title(graph_title)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.ylim([lower_bound - 0.01, 1])
    plt.legend(loc='lower right')

    print(accuracy_data[-1])
    print(val_accuracy_data[-1])
#     test_loss, test_acc = model_hist.evaluate(test_images, test_labels, verbose=2)

print("Done!")

Done!


## Load Data

In [None]:
prefix = '/content/drive/MyDrive/datasets//'
datasets = {
    "km_tr_i" : prefix + 'Kuzushiji-MNIST - train-imgs.npz',
    "km_te_i" : prefix + 'Kuzushiji-MNIST - test-imgs.npz',
    "km_tr_l" : prefix + 'Kuzushiji-MNIST - train-labels.npz',
    "km_te_l" : prefix + 'Kuzushiji-MNIST - test-labels.npz',
}

# Load MNIST Data
(mnist_train_images, mnist_train_labels), (mnist_test_images, mnist_test_labels) = mnist.load_data()
mnist_input_shape = (28, 28, 1)
mnist_classes = 10
mnist_epochs = 15

# Load KMNIST Data
dataset = initialize_data(datasets["km_tr_i"], datasets["km_te_i"], datasets["km_tr_l"], datasets["km_te_l"])
kmnist_train_images = dataset[0]
kmnist_test_images = dataset[1]
kmnist_train_labels = dataset[2]
kmnist_test_labels  = dataset[3]
kmnist_input_shape = dataset[4]
kmnist_classes = 10
kmnist_epochs = 15

batches = 128
num_trials = 25

print("Done!")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
60000 train samples, 10000 test samples
Done!


This notebook will create an ensemble model for KMNIST character recognition. It wil be composed of 2 basic models, and one transfer learning model. Knowledge from MNIST will be transfered to KMNIST.

# Creating KMNIST Model 1

In [None]:
def kmnist_objective1(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 3)
    l1_filters = trial.suggest_int('first_layer_kernel', 32, 54)
    l2_filters = trial.suggest_int('second_layer_kernel', 20, 64)
    l1_activation = trial.suggest_categorical('first_layer_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('second_layer_activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 80)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    # Design model
    kmnist1_design = Sequential()
    kmnist1_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=kmnist_input_shape))
    kmnist1_design.add(Dropout(dropout))
    kmnist1_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=kmnist_input_shape))
    kmnist1_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kmnist1_design.add(Flatten())
    kmnist1_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kmnist1_design.add(Dense(kmnist_classes))

    kmnist1_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(kmnist1_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kmnist1_history = kmnist1_design.fit(kmnist_train_images, kmnist_train_labels, epochs=kmnist_epochs, batch_size=batches,
                    callbacks=callback, validation_data=(kmnist_test_images, kmnist_test_labels))

    # Important metric for optuna to optimize over
    return kmnist1_history.history['val_accuracy'][-1]

In [None]:
# Run Study 1
kmnist_study1 = optuna.create_study(direction='maximize', study_name="KMNIST-1")
kmnist_study1.optimize(kmnist_objective1, n_trials=num_trials)

In [None]:
# Print the info from the best trial
print(f'Best trial info:\n{kmnist_study1.best_trial}\n')
for param, value in kmnist_study1.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=11, values=[0.9496999979019165], datetime_start=datetime.datetime(2023, 2, 17, 19, 1, 50, 150095), datetime_complete=datetime.datetime(2023, 2, 17, 19, 3, 14, 414231), params={'kernel_size': 3, 'first_layer_kernel': 50, 'second_layer_kernel': 63, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.1646000975336841, 'average_pooling_size': 4, 'dense_layer_size': 80, 'dense_layer_activation': 'relu'}, distributions={'kernel_size': IntDistribution(high=3, log=False, low=2, step=1), 'first_layer_kernel': IntDistribution(high=54, log=False, low=32, step=1), 'second_layer_kernel': IntDistribution(high=64, log=False, low=20, step=1), 'first_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'second_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'dropout': FloatDistribution(high=0.3, log=False, low=0.15, step=None), 'average_pooling_size': IntDistribution(high=4, lo

In [None]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size	= 3
l1_filters = 50
l2_filters = 63
l1_activation = "relu"
l2_activation = "relu"
dropout	= 0.1646000975336841
average_pooling_size = 4
dense_layer_size	= 80
dense_layer_activation = "relu"

kmnist1 = Sequential()
kmnist1.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=kmnist_input_shape))
kmnist1.add(Dropout(dropout))
kmnist1.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=kmnist_input_shape))
kmnist1.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kmnist1.add(Flatten())
kmnist1.add(Dense(dense_layer_size, activation=dense_layer_activation))
kmnist1.add(Dense(kmnist_classes))

kmnist1.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

# kmnist1.summary()

In [None]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

kmnist1_optuna_history = kmnist1.fit(kmnist_train_images, kmnist_train_labels, epochs=kmnist_epochs, batch_size=batches,
                            callbacks=callback, validation_data=(kmnist_test_images, kmnist_test_labels))
kmnist1.save('/content/drive/MyDrive/saved_models/kmnist1.h5', save_format='h5')

Epoch 1/15




Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


# Creating KMNIST Model 2

In [None]:
def kmnist_objective2(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 3)
    l1_filters = trial.suggest_int('first_layer_kernel', 20, 40)
    l2_filters = trial.suggest_int('second_layer_kernel', 40, 64)
    l1_activation = trial.suggest_categorical('first_layer_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('second_layer_activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 80)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    # Design model
    kmnist2_design = Sequential()
    kmnist2_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=kmnist_input_shape))
    kmnist2_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=kmnist_input_shape))
    kmnist2_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kmnist2_design.add(Dropout(dropout))
    kmnist2_design.add(Flatten())
    kmnist2_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kmnist2_design.add(Dense(kmnist_classes))

    kmnist2_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(kmnist2_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kmnist2_history = kmnist2_design.fit(kmnist_train_images, kmnist_train_labels, epochs=kmnist_epochs, batch_size=batches,
                    callbacks=callback, validation_data=(kmnist_test_images, kmnist_test_labels))

    # Important metric for optuna to optimize over
    return kmnist2_history.history['val_accuracy'][-1]

In [None]:
# Run Study 2
kmnist_study2 = optuna.create_study(direction='maximize', study_name="KMNIST-2")
kmnist_study2.optimize(kmnist_objective2, n_trials=30)

[32m[I 2023-02-17 20:10:38,762][0m A new study created in memory with name: KMNIST-2[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 20:17:01,307][0m Trial 0 finished with value: 0.9466999769210815 and parameters: {'kernel_size': 3, 'first_layer_kernel': 24, 'second_layer_kernel': 56, 'first_layer_activation': 'relu', 'second_layer_activation': 'tanh', 'dropout': 0.192922226646244, 'average_pooling_size': 3, 'dense_layer_size': 72, 'dense_layer_activation': 'relu'}. Best is trial 0 with value: 0.9466999769210815.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 20:23:23,790][0m Trial 1 finished with value: 0.9498000144958496 and parameters: {'kernel_size': 3, 'first_layer_kernel': 34, 'second_layer_kernel': 45, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.2623422535363248, 'average_pooling_size': 4, 'dense_layer_size': 80, 'dense_layer_activation': 'relu'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 20:29:46,114][0m Trial 2 finished with value: 0.8676999807357788 and parameters: {'kernel_size': 2, 'first_layer_kernel': 24, 'second_layer_kernel': 59, 'first_layer_activation': 'sigmoid', 'second_layer_activation': 'relu', 'dropout': 0.1854216362190773, 'average_pooling_size': 3, 'dense_layer_size': 77, 'dense_layer_activation': 'sigmoid'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 20:36:08,437][0m Trial 3 finished with value: 0.9315000176429749 and parameters: {'kernel_size': 2, 'first_layer_kernel': 26, 'second_layer_kernel': 50, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.28601545352996294, 'average_pooling_size': 2, 'dense_layer_size': 71, 'dense_layer_activation': 'tanh'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15


[32m[I 2023-02-17 20:38:23,257][0m Trial 4 finished with value: 0.10000000149011612 and parameters: {'kernel_size': 3, 'first_layer_kernel': 38, 'second_layer_kernel': 55, 'first_layer_activation': 'tanh', 'second_layer_activation': 'sigmoid', 'dropout': 0.25002909822360053, 'average_pooling_size': 2, 'dense_layer_size': 69, 'dense_layer_activation': 'tanh'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 20:44:08,158][0m Trial 5 finished with value: 0.9449999928474426 and parameters: {'kernel_size': 3, 'first_layer_kernel': 25, 'second_layer_kernel': 53, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.1539573992708128, 'average_pooling_size': 3, 'dense_layer_size': 67, 'dense_layer_activation': 'relu'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 20:49:43,956][0m Trial 6 finished with value: 0.9498000144958496 and parameters: {'kernel_size': 3, 'first_layer_kernel': 23, 'second_layer_kernel': 56, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.1568321122763012, 'average_pooling_size': 3, 'dense_layer_size': 68, 'dense_layer_activation': 'relu'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 20:56:06,286][0m Trial 7 finished with value: 0.9435999989509583 and parameters: {'kernel_size': 3, 'first_layer_kernel': 36, 'second_layer_kernel': 49, 'first_layer_activation': 'relu', 'second_layer_activation': 'sigmoid', 'dropout': 0.2800935489371308, 'average_pooling_size': 3, 'dense_layer_size': 67, 'dense_layer_activation': 'tanh'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 21:02:28,691][0m Trial 8 finished with value: 0.9343000054359436 and parameters: {'kernel_size': 3, 'first_layer_kernel': 31, 'second_layer_kernel': 62, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.2043941584001044, 'average_pooling_size': 2, 'dense_layer_size': 66, 'dense_layer_activation': 'sigmoid'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 21:08:50,852][0m Trial 9 finished with value: 0.8420000076293945 and parameters: {'kernel_size': 2, 'first_layer_kernel': 27, 'second_layer_kernel': 63, 'first_layer_activation': 'sigmoid', 'second_layer_activation': 'tanh', 'dropout': 0.19425851862314067, 'average_pooling_size': 3, 'dense_layer_size': 65, 'dense_layer_activation': 'tanh'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 21:14:35,656][0m Trial 10 finished with value: 0.8743000030517578 and parameters: {'kernel_size': 2, 'first_layer_kernel': 32, 'second_layer_kernel': 44, 'first_layer_activation': 'tanh', 'second_layer_activation': 'sigmoid', 'dropout': 0.2463269773639591, 'average_pooling_size': 4, 'dense_layer_size': 79, 'dense_layer_activation': 'relu'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 21:20:17,057][0m Trial 11 finished with value: 0.9492999911308289 and parameters: {'kernel_size': 3, 'first_layer_kernel': 20, 'second_layer_kernel': 40, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.15280133093936338, 'average_pooling_size': 4, 'dense_layer_size': 75, 'dense_layer_activation': 'relu'}. Best is trial 1 with value: 0.9498000144958496.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 21:25:58,171][0m Trial 12 finished with value: 0.9513999819755554 and parameters: {'kernel_size': 3, 'first_layer_kernel': 34, 'second_layer_kernel': 46, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.24987707047676055, 'average_pooling_size': 4, 'dense_layer_size': 80, 'dense_layer_activation': 'relu'}. Best is trial 12 with value: 0.9513999819755554.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 21:32:20,570][0m Trial 13 finished with value: 0.9506000280380249 and parameters: {'kernel_size': 3, 'first_layer_kernel': 34, 'second_layer_kernel': 46, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.24894506874625777, 'average_pooling_size': 4, 'dense_layer_size': 80, 'dense_layer_activation': 'relu'}. Best is trial 12 with value: 0.9513999819755554.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 21:38:06,919][0m Trial 14 finished with value: 0.9437999725341797 and parameters: {'kernel_size': 3, 'first_layer_kernel': 39, 'second_layer_kernel': 46, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.22497069755381577, 'average_pooling_size': 4, 'dense_layer_size': 75, 'dense_layer_activation': 'relu'}. Best is trial 12 with value: 0.9513999819755554.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 21:44:29,261][0m Trial 15 finished with value: 0.9545999765396118 and parameters: {'kernel_size': 3, 'first_layer_kernel': 29, 'second_layer_kernel': 40, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.23221735656122952, 'average_pooling_size': 4, 'dense_layer_size': 77, 'dense_layer_activation': 'relu'}. Best is trial 15 with value: 0.9545999765396118.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 21:50:51,601][0m Trial 16 finished with value: 0.8914999961853027 and parameters: {'kernel_size': 3, 'first_layer_kernel': 29, 'second_layer_kernel': 40, 'first_layer_activation': 'sigmoid', 'second_layer_activation': 'relu', 'dropout': 0.22509140181544762, 'average_pooling_size': 4, 'dense_layer_size': 77, 'dense_layer_activation': 'sigmoid'}. Best is trial 15 with value: 0.9545999765396118.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 21:56:37,282][0m Trial 17 finished with value: 0.9476000070571899 and parameters: {'kernel_size': 2, 'first_layer_kernel': 28, 'second_layer_kernel': 42, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.22500109535601595, 'average_pooling_size': 4, 'dense_layer_size': 77, 'dense_layer_activation': 'relu'}. Best is trial 15 with value: 0.9545999765396118.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 22:02:18,058][0m Trial 18 finished with value: 0.9437000155448914 and parameters: {'kernel_size': 3, 'first_layer_kernel': 34, 'second_layer_kernel': 48, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.2969700845518277, 'average_pooling_size': 4, 'dense_layer_size': 74, 'dense_layer_activation': 'relu'}. Best is trial 15 with value: 0.9545999765396118.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 22:08:00,031][0m Trial 19 finished with value: 0.9361000061035156 and parameters: {'kernel_size': 3, 'first_layer_kernel': 31, 'second_layer_kernel': 42, 'first_layer_activation': 'tanh', 'second_layer_activation': 'sigmoid', 'dropout': 0.2679524897787683, 'average_pooling_size': 4, 'dense_layer_size': 78, 'dense_layer_activation': 'sigmoid'}. Best is trial 15 with value: 0.9545999765396118.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 22:14:22,395][0m Trial 20 finished with value: 0.8522999882698059 and parameters: {'kernel_size': 2, 'first_layer_kernel': 36, 'second_layer_kernel': 43, 'first_layer_activation': 'sigmoid', 'second_layer_activation': 'relu', 'dropout': 0.21043704910090225, 'average_pooling_size': 4, 'dense_layer_size': 75, 'dense_layer_activation': 'relu'}. Best is trial 15 with value: 0.9545999765396118.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 22:20:07,248][0m Trial 21 finished with value: 0.9496999979019165 and parameters: {'kernel_size': 3, 'first_layer_kernel': 33, 'second_layer_kernel': 47, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.24261602044836927, 'average_pooling_size': 4, 'dense_layer_size': 80, 'dense_layer_activation': 'relu'}. Best is trial 15 with value: 0.9545999765396118.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 22:25:50,262][0m Trial 22 finished with value: 0.9574000239372253 and parameters: {'kernel_size': 3, 'first_layer_kernel': 36, 'second_layer_kernel': 51, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.23565992578719316, 'average_pooling_size': 4, 'dense_layer_size': 80, 'dense_layer_activation': 'relu'}. Best is trial 22 with value: 0.9574000239372253.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 22:31:34,554][0m Trial 23 finished with value: 0.9519000053405762 and parameters: {'kernel_size': 3, 'first_layer_kernel': 37, 'second_layer_kernel': 50, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.23441033998687927, 'average_pooling_size': 4, 'dense_layer_size': 78, 'dense_layer_activation': 'relu'}. Best is trial 22 with value: 0.9574000239372253.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 22:37:15,831][0m Trial 24 finished with value: 0.9467999935150146 and parameters: {'kernel_size': 3, 'first_layer_kernel': 40, 'second_layer_kernel': 51, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.23724236392793907, 'average_pooling_size': 3, 'dense_layer_size': 77, 'dense_layer_activation': 'relu'}. Best is trial 22 with value: 0.9574000239372253.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 22:43:38,173][0m Trial 25 finished with value: 0.9527999758720398 and parameters: {'kernel_size': 3, 'first_layer_kernel': 37, 'second_layer_kernel': 53, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.2279518834740182, 'average_pooling_size': 4, 'dense_layer_size': 78, 'dense_layer_activation': 'relu'}. Best is trial 22 with value: 0.9574000239372253.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 22:49:26,218][0m Trial 26 finished with value: 0.9480000138282776 and parameters: {'kernel_size': 3, 'first_layer_kernel': 30, 'second_layer_kernel': 53, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.21467607748669426, 'average_pooling_size': 4, 'dense_layer_size': 73, 'dense_layer_activation': 'relu'}. Best is trial 22 with value: 0.9574000239372253.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15


[32m[I 2023-02-17 22:54:44,497][0m Trial 27 finished with value: 0.9506999850273132 and parameters: {'kernel_size': 3, 'first_layer_kernel': 38, 'second_layer_kernel': 59, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.1731469164202171, 'average_pooling_size': 3, 'dense_layer_size': 78, 'dense_layer_activation': 'relu'}. Best is trial 22 with value: 0.9574000239372253.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 23:00:26,480][0m Trial 28 finished with value: 0.8895999789237976 and parameters: {'kernel_size': 3, 'first_layer_kernel': 36, 'second_layer_kernel': 59, 'first_layer_activation': 'sigmoid', 'second_layer_activation': 'sigmoid', 'dropout': 0.27004965129708264, 'average_pooling_size': 4, 'dense_layer_size': 76, 'dense_layer_activation': 'sigmoid'}. Best is trial 22 with value: 0.9574000239372253.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-17 23:06:48,827][0m Trial 29 finished with value: 0.9229000210762024 and parameters: {'kernel_size': 3, 'first_layer_kernel': 40, 'second_layer_kernel': 54, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.19940119817010848, 'average_pooling_size': 3, 'dense_layer_size': 72, 'dense_layer_activation': 'tanh'}. Best is trial 22 with value: 0.9574000239372253.[0m


In [None]:
# Print the info from the best trial
print(f'Best trial info:\n{kmnist_study2.best_trial}\n')
for param, value in kmnist_study2.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=22, values=[0.9574000239372253], datetime_start=datetime.datetime(2023, 2, 17, 22, 20, 7, 249500), datetime_complete=datetime.datetime(2023, 2, 17, 22, 25, 50, 261204), params={'kernel_size': 3, 'first_layer_kernel': 36, 'second_layer_kernel': 51, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.23565992578719316, 'average_pooling_size': 4, 'dense_layer_size': 80, 'dense_layer_activation': 'relu'}, distributions={'kernel_size': IntDistribution(high=3, log=False, low=2, step=1), 'first_layer_kernel': IntDistribution(high=40, log=False, low=20, step=1), 'second_layer_kernel': IntDistribution(high=64, log=False, low=40, step=1), 'first_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'second_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'dropout': FloatDistribution(high=0.3, log=False, low=0.15, step=None), 'average_pooling_size': IntDistribution(high=4, 

In [None]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size	= 3
l1_filters = 36
l2_filters = 51
l1_activation = "tanh"
l2_activation = "relu"
dropout	= 0.23565992578719316
average_pooling_size = 4
dense_layer_size	= 80
dense_layer_activation = "relu"

kmnist2 = Sequential()
kmnist2.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=kmnist_input_shape))
kmnist2.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=kmnist_input_shape))
kmnist2.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kmnist2.add(Dropout(dropout))
kmnist2.add(Flatten())
kmnist2.add(Dense(dense_layer_size, activation=dense_layer_activation))
kmnist2.add(Dense(kmnist_classes))

kmnist2.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

# kmnist2.summary()

In [None]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

kmnist2_optuna_history = kmnist2.fit(kmnist_train_images, kmnist_train_labels, epochs=kmnist_epochs, batch_size=batches,
                            callbacks=callback, validation_data=(kmnist_test_images, kmnist_test_labels))

kmnist2.save('/content/drive/MyDrive/saved_models/kmnist2.h5', save_format='h5')

Epoch 1/15




Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


# MNIST to KMNIST Transfer Learning

## Create MNIST Base

In [None]:
def mnist_base_objective(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 3)
    l1_filters = trial.suggest_int('first_layer_kernel', 20, 40)
    l2_filters = trial.suggest_int('second_layer_kernel', 40, 64)
    l1_activation = trial.suggest_categorical('first_layer_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('second_layer_activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 80)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    # Design model
    mnist_base_design = Sequential()
    mnist_base_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=mnist_input_shape))
    mnist_base_design.add(Dropout(dropout))
    mnist_base_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=mnist_input_shape))
    # mnist_base_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    mnist_base_design.add(Flatten())
    mnist_base_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    mnist_base_design.add(Dense(mnist_classes))

    mnist_base_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(mnist_base_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    mnist_base_history = mnist_base_design.fit(mnist_train_images, mnist_train_labels,
                    epochs=mnist_epochs, batch_size=batches, callbacks=callback,
                    validation_data=(mnist_test_images, mnist_test_labels))

    # Important metric for optuna to optimize over
    return mnist_base_history.history['val_accuracy'][-1]

In [None]:
# Run Study 3
mnist_base_study = optuna.create_study(direction='maximize', study_name="MNIST-Base")
mnist_base_study.optimize(mnist_base_objective, n_trials=10)

[32m[I 2023-02-19 04:40:57,539][0m A new study created in memory with name: MNIST-Base[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15


[32m[I 2023-02-19 04:41:44,365][0m Trial 0 finished with value: 0.10279999673366547 and parameters: {'kernel_size': 3, 'first_layer_kernel': 40, 'second_layer_kernel': 48, 'first_layer_activation': 'tanh', 'second_layer_activation': 'sigmoid', 'dropout': 0.1842557295414608, 'average_pooling_size': 2, 'dense_layer_size': 72, 'dense_layer_activation': 'sigmoid'}. Best is trial 0 with value: 0.10279999673366547.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 04:43:07,286][0m Trial 1 finished with value: 0.9772999882698059 and parameters: {'kernel_size': 3, 'first_layer_kernel': 32, 'second_layer_kernel': 47, 'first_layer_activation': 'relu', 'second_layer_activation': 'tanh', 'dropout': 0.2598515559909706, 'average_pooling_size': 2, 'dense_layer_size': 72, 'dense_layer_activation': 'relu'}. Best is trial 1 with value: 0.9772999882698059.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15


[32m[I 2023-02-19 04:43:45,161][0m Trial 2 finished with value: 0.9814000129699707 and parameters: {'kernel_size': 3, 'first_layer_kernel': 21, 'second_layer_kernel': 40, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.24748217599244543, 'average_pooling_size': 3, 'dense_layer_size': 73, 'dense_layer_activation': 'sigmoid'}. Best is trial 2 with value: 0.9814000129699707.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 04:44:50,702][0m Trial 3 finished with value: 0.9810000061988831 and parameters: {'kernel_size': 2, 'first_layer_kernel': 21, 'second_layer_kernel': 64, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.2262259351529961, 'average_pooling_size': 2, 'dense_layer_size': 79, 'dense_layer_activation': 'sigmoid'}. Best is trial 2 with value: 0.9814000129699707.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 04:46:13,722][0m Trial 4 finished with value: 0.9825000166893005 and parameters: {'kernel_size': 3, 'first_layer_kernel': 21, 'second_layer_kernel': 44, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.26931391080641653, 'average_pooling_size': 2, 'dense_layer_size': 79, 'dense_layer_activation': 'relu'}. Best is trial 4 with value: 0.9825000166893005.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15


[32m[I 2023-02-19 04:47:16,361][0m Trial 5 finished with value: 0.11349999904632568 and parameters: {'kernel_size': 2, 'first_layer_kernel': 32, 'second_layer_kernel': 61, 'first_layer_activation': 'tanh', 'second_layer_activation': 'sigmoid', 'dropout': 0.15710108996381633, 'average_pooling_size': 2, 'dense_layer_size': 73, 'dense_layer_activation': 'tanh'}. Best is trial 4 with value: 0.9825000166893005.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 04:48:39,406][0m Trial 6 finished with value: 0.9793000221252441 and parameters: {'kernel_size': 3, 'first_layer_kernel': 23, 'second_layer_kernel': 46, 'first_layer_activation': 'sigmoid', 'second_layer_activation': 'sigmoid', 'dropout': 0.27542430813212704, 'average_pooling_size': 4, 'dense_layer_size': 72, 'dense_layer_activation': 'relu'}. Best is trial 4 with value: 0.9825000166893005.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 04:49:44,778][0m Trial 7 finished with value: 0.9825999736785889 and parameters: {'kernel_size': 2, 'first_layer_kernel': 29, 'second_layer_kernel': 52, 'first_layer_activation': 'relu', 'second_layer_activation': 'tanh', 'dropout': 0.17435848899058518, 'average_pooling_size': 4, 'dense_layer_size': 65, 'dense_layer_activation': 'sigmoid'}. Best is trial 7 with value: 0.9825999736785889.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 04:50:57,482][0m Trial 8 finished with value: 0.9818999767303467 and parameters: {'kernel_size': 2, 'first_layer_kernel': 32, 'second_layer_kernel': 61, 'first_layer_activation': 'relu', 'second_layer_activation': 'tanh', 'dropout': 0.23127498120973827, 'average_pooling_size': 2, 'dense_layer_size': 73, 'dense_layer_activation': 'relu'}. Best is trial 7 with value: 0.9825999736785889.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15


[32m[I 2023-02-19 04:51:18,812][0m Trial 9 finished with value: 0.11349999904632568 and parameters: {'kernel_size': 2, 'first_layer_kernel': 22, 'second_layer_kernel': 44, 'first_layer_activation': 'tanh', 'second_layer_activation': 'sigmoid', 'dropout': 0.22928162915639017, 'average_pooling_size': 3, 'dense_layer_size': 77, 'dense_layer_activation': 'sigmoid'}. Best is trial 7 with value: 0.9825999736785889.[0m


In [None]:
# Print the info from the best trial
print(f'Best trial info:\n{mnist_base_study.best_trial}\n')
for param, value in mnist_base_study.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=7, values=[0.9825999736785889], datetime_start=datetime.datetime(2023, 2, 19, 4, 48, 39, 409443), datetime_complete=datetime.datetime(2023, 2, 19, 4, 49, 44, 777393), params={'kernel_size': 2, 'first_layer_kernel': 29, 'second_layer_kernel': 52, 'first_layer_activation': 'relu', 'second_layer_activation': 'tanh', 'dropout': 0.17435848899058518, 'average_pooling_size': 4, 'dense_layer_size': 65, 'dense_layer_activation': 'sigmoid'}, distributions={'kernel_size': IntDistribution(high=3, log=False, low=2, step=1), 'first_layer_kernel': IntDistribution(high=40, log=False, low=20, step=1), 'second_layer_kernel': IntDistribution(high=64, log=False, low=40, step=1), 'first_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'second_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'dropout': FloatDistribution(high=0.3, log=False, low=0.15, step=None), 'average_pooling_size': IntDistribution(high=4,

In [None]:
# Optuna doesn't save the best model. You must rebuild it and save it.
kern_size	= 2
l1_filters = 29
l2_filters = 52
l1_activation = "relu"
l2_activation = "tanh"
dropout	= 0.17435848899058518
average_pooling_size = 4
dense_layer_size	= 65
dense_layer_activation = "sigmoid"

mnist_base = Sequential()
mnist_base.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=mnist_input_shape))
mnist_base.add(Dropout(dropout))
mnist_base.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=mnist_input_shape))
mnist_base.add(Flatten())
mnist_base.add(Dense(dense_layer_size, activation=dense_layer_activation))
mnist_base.add(Dense(mnist_classes))

mnist_base.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [None]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

mnist_optuna_history = mnist_base.fit(mnist_train_images, mnist_train_labels, batch_size=batches, epochs=epochs,
                                callbacks=callback, validation_data=(mnist_test_images, mnist_test_labels))

mnist_base.save('/content/drive/MyDrive/saved_models/mnist_base.h5', save_format='h5')

Epoch 1/12




Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


## Create KMNIST Top

In [None]:
import os
import random
from skimage.transform import rescale
from skimage import io

# Loading a base model requires you pop all dimensionality flattening layers
def load_base_model(filename):
    bm = models.load_model(filename)
    bm.pop()
    bm.pop()
    bm.pop()
    bm.trainable = False
    return bm

In [None]:
def kmnist_top_objective(trial):
    # Define search space per trial (integer, categorical and floating point values)
    kern_size = trial.suggest_int('kernel_size', 2, 3)
    l1_filters = trial.suggest_int('first_layer_kernel', 32, 54)
    l2_filters = trial.suggest_int('second_layer_kernel', 64, 96)
    l1_activation = trial.suggest_categorical('first_layer_activation', ['relu', 'sigmoid', 'tanh'])
    l2_activation = trial.suggest_categorical('second_layer_activation', ['relu', 'sigmoid', 'tanh'])
    dropout = trial.suggest_float('dropout', 0.15, 0.3)
    average_pooling_size = trial.suggest_int('average_pooling_size', 2, 4)
    dense_layer_size = trial.suggest_int('dense_layer_size', 64, 128)
    dense_layer_activation = trial.suggest_categorical('dense_layer_activation', ['relu', 'sigmoid', 'tanh'])

    base_model = load_base_model('/content/drive/MyDrive/saved_models/mnist_base.h5')
    kmnist_in_shape = (28, 28, 1)

    # Design model
    kmnist_top_design = Sequential()
    kmnist_top_design.add(base_model)
    kmnist_top_design.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=kmnist_in_shape))
    kmnist_top_design.add(Dropout(dropout))
    kmnist_top_design.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=kmnist_in_shape))
    kmnist_top_design.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
    kmnist_top_design.add(Flatten())
    kmnist_top_design.add(Dense(dense_layer_size, activation=dense_layer_activation))
    kmnist_top_design.add(Dense(kmnist_classes))

    kmnist_top_design.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

#     print(kmnist_top_design.summary())

    callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)

    kmnist_top_history = kmnist_top_design.fit(kmnist_train_images, kmnist_train_labels, epochs=kmnist_epochs, batch_size=batches,
                    callbacks=callback, validation_data=(kmnist_test_images, kmnist_test_labels))

    # Important metric for optuna to optimize over
    return kmnist_top_history.history['val_accuracy'][-1]

In [None]:
# Run Study 4
kmnist_TL_study = optuna.create_study(direction='maximize', study_name="KMNIST-TL-Results")
kmnist_TL_study.optimize(kmnist_top_objective, n_trials=num_trials)

[32m[I 2023-02-19 05:21:04,071][0m A new study created in memory with name: KMNIST-TL-Results[0m


Epoch 1/15




Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 05:27:26,560][0m Trial 0 finished with value: 0.9308000206947327 and parameters: {'kernel_size': 2, 'first_layer_kernel': 42, 'second_layer_kernel': 81, 'first_layer_activation': 'sigmoid', 'second_layer_activation': 'relu', 'dropout': 0.23745963299235384, 'average_pooling_size': 3, 'dense_layer_size': 116, 'dense_layer_activation': 'tanh'}. Best is trial 0 with value: 0.9308000206947327.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 05:33:42,240][0m Trial 1 finished with value: 0.9287999868392944 and parameters: {'kernel_size': 2, 'first_layer_kernel': 47, 'second_layer_kernel': 66, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.2176152231761921, 'average_pooling_size': 3, 'dense_layer_size': 82, 'dense_layer_activation': 'sigmoid'}. Best is trial 0 with value: 0.9308000206947327.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 05:40:01,608][0m Trial 2 finished with value: 0.9221000075340271 and parameters: {'kernel_size': 3, 'first_layer_kernel': 40, 'second_layer_kernel': 85, 'first_layer_activation': 'tanh', 'second_layer_activation': 'sigmoid', 'dropout': 0.18126446314843492, 'average_pooling_size': 4, 'dense_layer_size': 126, 'dense_layer_activation': 'relu'}. Best is trial 0 with value: 0.9308000206947327.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 05:46:24,100][0m Trial 3 finished with value: 0.951200008392334 and parameters: {'kernel_size': 2, 'first_layer_kernel': 48, 'second_layer_kernel': 93, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.2194630524559446, 'average_pooling_size': 4, 'dense_layer_size': 127, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 05:53:47,356][0m Trial 4 finished with value: 0.9477999806404114 and parameters: {'kernel_size': 2, 'first_layer_kernel': 33, 'second_layer_kernel': 71, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.2191427267987781, 'average_pooling_size': 4, 'dense_layer_size': 93, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 06:00:10,444][0m Trial 5 finished with value: 0.9309999942779541 and parameters: {'kernel_size': 3, 'first_layer_kernel': 52, 'second_layer_kernel': 65, 'first_layer_activation': 'sigmoid', 'second_layer_activation': 'tanh', 'dropout': 0.1806600212100754, 'average_pooling_size': 3, 'dense_layer_size': 108, 'dense_layer_activation': 'tanh'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15


[32m[I 2023-02-19 06:03:06,552][0m Trial 6 finished with value: 0.10000000149011612 and parameters: {'kernel_size': 3, 'first_layer_kernel': 54, 'second_layer_kernel': 72, 'first_layer_activation': 'tanh', 'second_layer_activation': 'sigmoid', 'dropout': 0.2744467343753033, 'average_pooling_size': 3, 'dense_layer_size': 103, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 06:09:20,566][0m Trial 7 finished with value: 0.9473000168800354 and parameters: {'kernel_size': 2, 'first_layer_kernel': 54, 'second_layer_kernel': 91, 'first_layer_activation': 'tanh', 'second_layer_activation': 'relu', 'dropout': 0.16558372326969492, 'average_pooling_size': 4, 'dense_layer_size': 95, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15


[32m[I 2023-02-19 06:12:42,362][0m Trial 8 finished with value: 0.10000000149011612 and parameters: {'kernel_size': 3, 'first_layer_kernel': 36, 'second_layer_kernel': 71, 'first_layer_activation': 'tanh', 'second_layer_activation': 'sigmoid', 'dropout': 0.2534121672593879, 'average_pooling_size': 2, 'dense_layer_size': 71, 'dense_layer_activation': 'tanh'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 06:19:08,024][0m Trial 9 finished with value: 0.934499979019165 and parameters: {'kernel_size': 3, 'first_layer_kernel': 53, 'second_layer_kernel': 81, 'first_layer_activation': 'tanh', 'second_layer_activation': 'tanh', 'dropout': 0.19822422876518986, 'average_pooling_size': 2, 'dense_layer_size': 76, 'dense_layer_activation': 'sigmoid'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 06:25:17,787][0m Trial 10 finished with value: 0.9441999793052673 and parameters: {'kernel_size': 2, 'first_layer_kernel': 47, 'second_layer_kernel': 94, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.29226634850397304, 'average_pooling_size': 4, 'dense_layer_size': 126, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 06:32:30,649][0m Trial 11 finished with value: 0.9435999989509583 and parameters: {'kernel_size': 2, 'first_layer_kernel': 32, 'second_layer_kernel': 74, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.22038811652564125, 'average_pooling_size': 4, 'dense_layer_size': 89, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 06:38:53,092][0m Trial 12 finished with value: 0.9469000101089478 and parameters: {'kernel_size': 2, 'first_layer_kernel': 47, 'second_layer_kernel': 88, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.24706818598076677, 'average_pooling_size': 4, 'dense_layer_size': 110, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 06:46:15,603][0m Trial 13 finished with value: 0.9498999714851379 and parameters: {'kernel_size': 2, 'first_layer_kernel': 32, 'second_layer_kernel': 77, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.20651891133028316, 'average_pooling_size': 4, 'dense_layer_size': 95, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 06:53:38,109][0m Trial 14 finished with value: 0.9437000155448914 and parameters: {'kernel_size': 2, 'first_layer_kernel': 39, 'second_layer_kernel': 95, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.19989135469148592, 'average_pooling_size': 4, 'dense_layer_size': 117, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 07:01:00,615][0m Trial 15 finished with value: 0.9412999749183655 and parameters: {'kernel_size': 2, 'first_layer_kernel': 50, 'second_layer_kernel': 76, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.19822361217234966, 'average_pooling_size': 3, 'dense_layer_size': 101, 'dense_layer_activation': 'sigmoid'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 07:08:23,116][0m Trial 16 finished with value: 0.947700023651123 and parameters: {'kernel_size': 2, 'first_layer_kernel': 45, 'second_layer_kernel': 85, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.15137322200847586, 'average_pooling_size': 4, 'dense_layer_size': 85, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 07:15:45,612][0m Trial 17 finished with value: 0.9387000203132629 and parameters: {'kernel_size': 2, 'first_layer_kernel': 36, 'second_layer_kernel': 77, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.25894670662190317, 'average_pooling_size': 2, 'dense_layer_size': 64, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 07:22:08,107][0m Trial 18 finished with value: 0.9404000043869019 and parameters: {'kernel_size': 2, 'first_layer_kernel': 50, 'second_layer_kernel': 85, 'first_layer_activation': 'relu', 'second_layer_activation': 'tanh', 'dropout': 0.23373291433241583, 'average_pooling_size': 4, 'dense_layer_size': 119, 'dense_layer_activation': 'sigmoid'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15


[32m[I 2023-02-19 07:24:13,930][0m Trial 19 finished with value: 0.10000000149011612 and parameters: {'kernel_size': 2, 'first_layer_kernel': 43, 'second_layer_kernel': 79, 'first_layer_activation': 'sigmoid', 'second_layer_activation': 'sigmoid', 'dropout': 0.2067469472431805, 'average_pooling_size': 3, 'dense_layer_size': 101, 'dense_layer_activation': 'tanh'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 07:31:53,056][0m Trial 20 finished with value: 0.9509000182151794 and parameters: {'kernel_size': 3, 'first_layer_kernel': 36, 'second_layer_kernel': 90, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.1842237324968906, 'average_pooling_size': 3, 'dense_layer_size': 110, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 07:39:15,607][0m Trial 21 finished with value: 0.9488000273704529 and parameters: {'kernel_size': 3, 'first_layer_kernel': 35, 'second_layer_kernel': 91, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.18278912339474518, 'average_pooling_size': 3, 'dense_layer_size': 115, 'dense_layer_activation': 'relu'}. Best is trial 3 with value: 0.951200008392334.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 07:45:38,107][0m Trial 22 finished with value: 0.9524999856948853 and parameters: {'kernel_size': 3, 'first_layer_kernel': 34, 'second_layer_kernel': 91, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.16690854650458012, 'average_pooling_size': 3, 'dense_layer_size': 107, 'dense_layer_activation': 'relu'}. Best is trial 22 with value: 0.9524999856948853.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 07:52:00,789][0m Trial 23 finished with value: 0.9519000053405762 and parameters: {'kernel_size': 3, 'first_layer_kernel': 38, 'second_layer_kernel': 92, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.1513735513263988, 'average_pooling_size': 3, 'dense_layer_size': 108, 'dense_layer_activation': 'relu'}. Best is trial 22 with value: 0.9524999856948853.[0m


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[32m[I 2023-02-19 07:58:23,289][0m Trial 24 finished with value: 0.9527999758720398 and parameters: {'kernel_size': 3, 'first_layer_kernel': 39, 'second_layer_kernel': 96, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.15487543805908655, 'average_pooling_size': 3, 'dense_layer_size': 122, 'dense_layer_activation': 'relu'}. Best is trial 24 with value: 0.9527999758720398.[0m


In [None]:
# Print the info from the best trial
print(f'Best trial info:\n{kmnist_TL_study.best_trial}\n')
for param, value in kmnist_TL_study.best_params.items():
    print(f'Param: {param}\tValue: {value}')

Best trial info:
FrozenTrial(number=24, values=[0.9527999758720398], datetime_start=datetime.datetime(2023, 2, 19, 7, 52, 0, 790831), datetime_complete=datetime.datetime(2023, 2, 19, 7, 58, 23, 288938), params={'kernel_size': 3, 'first_layer_kernel': 39, 'second_layer_kernel': 96, 'first_layer_activation': 'relu', 'second_layer_activation': 'relu', 'dropout': 0.15487543805908655, 'average_pooling_size': 3, 'dense_layer_size': 122, 'dense_layer_activation': 'relu'}, distributions={'kernel_size': IntDistribution(high=3, log=False, low=2, step=1), 'first_layer_kernel': IntDistribution(high=54, log=False, low=32, step=1), 'second_layer_kernel': IntDistribution(high=96, log=False, low=64, step=1), 'first_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'second_layer_activation': CategoricalDistribution(choices=('relu', 'sigmoid', 'tanh')), 'dropout': FloatDistribution(high=0.3, log=False, low=0.15, step=None), 'average_pooling_size': IntDistribution(high=4, l

In [None]:
# Optuna doesn't save the best model. You must rebuild it and save it.
base_model = load_base_model('/content/drive/MyDrive/saved_models/mnist_base.h5')
kern_size	= 3
l1_filters = 39
l2_filters = 96
l1_activation = "relu"
l2_activation = "relu"
dropout	= 0.15487543805908655
average_pooling_size = 3
dense_layer_size	= 122
dense_layer_activation = "relu"

kmnist_TL = Sequential()
kmnist_TL.add(base_model)
kmnist_TL.add(Conv2D(l1_filters, kernel_size=kern_size, activation=l1_activation, input_shape=kmnist_input_shape))
kmnist_TL.add(Dropout(dropout))
kmnist_TL.add(Conv2D(l2_filters, kernel_size=kern_size, activation=l2_activation, input_shape=kmnist_input_shape))
kmnist_TL.add(AveragePooling2D((average_pooling_size, average_pooling_size)))
kmnist_TL.add(Flatten())
kmnist_TL.add(Dense(dense_layer_size, activation=dense_layer_activation))
kmnist_TL.add(Dense(kmnist_classes))

kmnist_TL.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=[tf.keras.metrics.SparseCategoricalCrossentropy(), 'accuracy'])

In [None]:
tf.config.run_functions_eagerly(True)

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

kmnist_TL_optuna_history = kmnist_TL.fit(kmnist_train_images, kmnist_train_labels, epochs=kmnist_epochs, batch_size=batches,
                    callbacks=callback, validation_data=(kmnist_test_images, kmnist_test_labels))

kmnist_TL.save('/content/drive/MyDrive/saved_models/kmnist_TL.h5', save_format='h5')



Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15


# Creating the Ensemble