In [1]:
import numpy as np
import keras
import imageio

def prepare_data(X: np.ndarray) -> np.ndarray:
    """ Pad a 28x28 picture into a 32x32 picture """
    X_new = np.pad(X, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
    return X_new

def parse_file(folder: str, path: str, labels: dict) -> list:
    ids = []

    for line in open(path, "r"):
        label, _ = line.strip().split("/")
        img_full_path = "data/" + folder + "/" + line.strip()
        ids.append(img_full_path)
        labels[img_full_path] = int(label)

    return ids

def generate_generator_objects() -> tuple:
    labels = { }
    X_train_ids = parse_file("train", "train_images_paths", labels)
    X_val_ids = parse_file("val", "val_images_paths", labels)
    X_test_ids = parse_file("test", "test_images_paths", labels)

    return ({ "train": X_train_ids, "validation": X_val_ids, "test": X_test_ids }, labels)

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, batch_size=32, dim=(32,32,32), n_channels=1,
                 n_classes=10, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            img = np.array([imageio.imread(ID)]).T
            img = prepare_data(np.array([img]))
            X[i,] = img[0]

            # Store class
            y[i] = self.labels[ID]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)

Using TensorFlow backend.


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from time import time

from sklearn.model_selection import train_test_split
import tensorflow as tf
import keras
import keras.layers as layers
from keras.regularizers import l2
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.callbacks import TensorBoard
from keras.datasets import mnist
from datagenerator import DataGenerator, generate_generator_objects

# Load mnist data
(X_train_raw, y_train_raw), (X_test, y_test) = mnist.load_data()
X_train_raw = X_train_raw.reshape(len(X_train_raw), 28, 28, 1)

# Prepare and set aside the test set
X_test = X_test.reshape(len(X_test), 28, 28, 1)

sns.set()

params = {'dim': (32,32),
          'batch_size': 256,
          'n_classes': 10,
          'n_channels': 1,
          'shuffle': True}


def generate_lenet5_model(activation='relu', kernel_regularizer=None) -> keras.Sequential:
    # Architecture from picture at: https://www.researchgate.net/figure/Architecture-of-LeNet-5_fig3_313808170
    model = keras.Sequential()

    # C1: features maps
    model.add(layers.Conv2D(filters=6, kernel_size=(3, 3), activation=activation, input_shape=(32, 32, 1), kernel_regularizer=kernel_regularizer))

    # S2: Feature maps
    model.add(layers.AveragePooling2D())

    # C3: Feature maps
    model.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation=activation, kernel_regularizer=kernel_regularizer))

    # S4: Feature maps
    model.add(layers.AveragePooling2D())
    
    model.add(layers.Flatten())

    # C5: Layer
    model.add(layers.Dense(units=120, activation=activation, kernel_regularizer=kernel_regularizer))

    # F6: Layer
    model.add(layers.Dense(units=84, activation=activation, kernel_regularizer=kernel_regularizer))

    # 7: Output layer
    model.add(layers.Dense(units=10, activation='softmax', kernel_regularizer=kernel_regularizer))
    print(model.summary())

    model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=['accuracy'])

    return model

def prepare_data(X: np.ndarray) -> np.ndarray:
    """ Pad a 28x28 picture into a 32x32 picture """
    X_new = np.pad(X, ((0, 0), (2, 2), (2, 2), (0, 0)), 'constant')
    return X_new

""" Given a keras model, train the model with given parameters """
def train_keras_model(X_train: np.ndarray, y_train: np.ndarray, model: keras.Sequential, EPOCHS: int, BATCH_SIZE: int, generator: bool) -> tf.keras.callbacks.History:
    # Randomly split into a train and validation set
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.2)
    X_train, X_validation = prepare_data(X_train), prepare_data(X_validation)

    y_train, y_validation = to_categorical(y_train), to_categorical(y_validation)

    if not generator:
        return model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(X_validation, y_validation), shuffle=True)
    (partition, labels) = generate_generator_objects()
    train_generator = DataGenerator(partition['train'], labels, **params)
    validation_generator = DataGenerator(partition['validation'], labels, **params)
    
    return model.fit_generator(train_generator, validation_data=validation_generator, epochs=EPOCHS)

""" Plot loss from keras history object """
def plot_history_loss(history: tf.keras.callbacks.History, name: str):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Loss per epoch for ' + name)
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

""" Plot accuracy from keras history object """
def plot_history_accuracy(history: tf.keras.callbacks.History, name: str):
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Accuracy per epoch for ' + name)
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

if __name__ == "__main__":
    """ Hyper paramters """
    activation_functions = [("relu", "ReLU"), ("sigmoid", "Sigmoid")]
    kernel_regularizers = [(None, "No regularization"), (l2(0.001), "L2: 0.001"), (l2(0.01), "L2: 0.01"), (l2(0.1), "L2: 0.1"), (l2(1), "L2: 1")]

    """ Training parameters """
    EPOCHS = 5
    BATCH_SIZE = 256

    """ Initialize paramters for selecting best hyper parameters """
    best_accuracy = 0
    best_accuracy_params = None
    best_accuracy_name = ""


    """ For each pair of hyper paramters, try to do 
        a train/validation split, and train with training data,
        and check performance with the validation data.
        Selects the model with the highest accuracy """
    for (activation_func, activation_func_displayname) in activation_functions:
        for (regularizer, regularizer_displayname) in kernel_regularizers:
            model = generate_lenet5_model(activation=activation_func, kernel_regularizer=regularizer)
            history = train_keras_model(X_train_raw, y_train_raw, model, EPOCHS, BATCH_SIZE, True)
            name = "%s - %s" % (activation_func_displayname, regularizer_displayname)
            plot_history_loss(history, name)
            plot_history_accuracy(history, name)
            if history.history["accuracy"][-1] > best_accuracy:
                best_accuracy = history.history["accuracy"][-1]
                best_accuracy_params = (activation_func, regularizer)
                best_accuracy_name = name
    
    model = generate_lenet5_model(activation=best_accuracy_params[0], kernel_regularizer=best_accuracy_params[1])

    """ Using the test set that we set aside earlier, evaluate performance on the selected best model """
    X_train, y_train = prepare_data(X_train_raw), to_categorical(y_train_raw)
    X_test, y_test = prepare_data(X_test), to_categorical(y_test)
    
    history = model.fit(x=X_train, y=y_train, validation_data=(X_test, y_test),batch_size=BATCH_SIZE, epochs=EPOCHS)
    print("Test accuracy: %r" % (history.history["val_accuracy"][-1]))
    


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_1 (Average (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_2 (Average (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 120)               69240     
_________________________________________________________________
dense_2 (Dense)              (None, 84)               

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_3 (Average (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_4 (Average (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 120)               69240     
_________________________________________________________________
dense_5 (Dense)              (None, 84)               

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_5 (Average (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_6 (Average (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 120)               69240     
_________________________________________________________________
dense_8 (Dense)              (None, 84)               

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_7 (Conv2D)            (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_7 (Average (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_8 (Average (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 120)               69240     
_________________________________________________________________
dense_11 (Dense)             (None, 84)               

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_9 (Average (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_10 (Averag (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_5 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 120)               69240     
_________________________________________________________________
dense_14 (Dense)             (None, 84)               

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_11 (Conv2D)           (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_11 (Averag (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_12 (Averag (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 120)               69240     
_________________________________________________________________
dense_17 (Dense)             (None, 84)               

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_13 (Conv2D)           (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_13 (Averag (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_14 (Averag (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 120)               69240     
_________________________________________________________________
dense_20 (Dense)             (None, 84)               

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_15 (Conv2D)           (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_15 (Averag (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_16 (Averag (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_8 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 120)               69240     
_________________________________________________________________
dense_23 (Dense)             (None, 84)               

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_17 (Conv2D)           (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_17 (Averag (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_18 (Averag (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_25 (Dense)             (None, 120)               69240     
_________________________________________________________________
dense_26 (Dense)             (None, 84)               

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_19 (Averag (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_20 (Averag (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_10 (Flatten)         (None, 576)               0         
_________________________________________________________________
dense_28 (Dense)             (None, 120)               69240     
_________________________________________________________________
dense_29 (Dense)             (None, 84)              

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 30, 30, 6)         60        
_________________________________________________________________
average_pooling2d_21 (Averag (None, 15, 15, 6)         0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 13, 13, 16)        880       
_________________________________________________________________
average_pooling2d_22 (Averag (None, 6, 6, 16)          0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 576)               0         
_________________________________________________________________
dense_31 (Dense)             (None, 120)               69240     
_________________________________________________________________
dense_32 (Dense)             (None, 84)              