In [1]:
import time
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import seaborn as sns
import tensorflow as tf
print(tf.__version__)
import gc
#from talos.model.layers import hidden_layers
from keras import optimizers
from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, LocallyConnected1D, Conv2D, Reshape, Dropout, MaxPool2D, Flatten
from keras.callbacks import ModelCheckpoint, History, EarlyStopping
from keras.activations import relu, elu, linear
from keras import backend as K
from keras import regularizers
from keras.datasets import mnist
from keras.utils import np_utils
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import r2_score, roc_auc_score, accuracy_score
from sklearn.model_selection import cross_val_score, KFold, train_test_split

from tqdm import trange

plt.style.use('seaborn-darkgrid')
mpl.rcParams['figure.figsize'] = [10.0, 4.0]
mpl.rcParams['figure.dpi'] = 80
mpl.rcParams['savefig.dpi'] = 100
mpl.rcParams['font.size'] = 18

2.0.0


Using TensorFlow backend.


# Fashion MNIST - Convolutional Neural Network
## Loading Data

In [2]:
X_train = np.load("../data/X_train.npy")
X_test = np.load("../data/X_test.npy")
X_val = np.load("../data/X_val.npy")
Y_train = np.load("../data/Y_train.npy")
Y_test = np.load("../data/Y_test.npy")
Y_val = np.load("../data/Y_val.npy")
Y_onehot_train = np_utils.to_categorical(Y_train)
Y_onehot_test = np_utils.to_categorical(Y_test)
Y_onehot_val = np_utils.to_categorical(Y_val)
X_train_reshaped = X_train.reshape((X_train.shape[0], 28, 28, 1))
X_test_reshaped = X_test.reshape((X_test.shape[0], 28, 28, 1))
X_val_reshaped = X_val.reshape((X_val.shape[0], 28, 28, 1))

nr_params = X_train.shape[1]

## Setting up general CNN model

In [3]:
def make_conv_model(params):
    opti = optimizers.Adam(amsgrad=True, lr=params["lr"])
    def nn_clf():
        model = Sequential()
        model.add(Conv2D(filters = params["nr_neurons"], kernel_size = (5,5), padding = 'Same', activation ='relu', input_shape = (28,28,1)))
        model.add(Dropout(params["dropout"]))
        for i in range(params["nr_layers"] - 1):
            model.add(Conv2D(filters = params["nr_neurons"], kernel_size = (5,5), padding = 'Same', activation ='relu'))
            model.add(Dropout(params["dropout"]))
        model.add(MaxPool2D(pool_size=(2,2)))
        for i in range(params["nr_layers"]):
            model.add(Conv2D(filters = 2*params["nr_neurons"], kernel_size = (3,3), padding = 'Same', activation ='relu'))
            model.add(Dropout(params["dropout"]))
        model.add(MaxPool2D(pool_size=(2,2)))

        model.add(Flatten())
        model.add(Dense(256, activation = "relu"))
        model.add(Dropout(params["dropout"]))
        model.add(Dense(256, activation = "relu"))
        model.add(Dropout(params["dropout"]))
        model.add(Dense(10, activation = "softmax"))
        model.compile(optimizer=opti, loss="categorical_crossentropy",  metrics=["accuracy"])
        return model
    return nn_clf

## Performing a test run

In [None]:
params = {
    "lr" : 1e-3,
    "batch_size" : 4086,
    "nr_neurons": 16,
    "nr_layers" : 1,
    "dropout" : 0.2
}

In [None]:
make_conv_model(params)().summary()

In [None]:
callbacks = [EarlyStopping(monitor="val_accuracy", min_delta=0.001, patience=10, verbose=1, restore_best_weights=True)]
clf = KerasClassifier(build_fn=make_conv_model(params), batch_size=params["batch_size"], epochs=20, validation_data=(X_test_reshaped, Y_onehot_test), callbacks=callbacks, verbose=1)
history = clf.fit(X_train_reshaped, Y_onehot_train)
Y_pred = clf.predict(X_val_reshaped)

In [None]:
plt.figure(figsize=(16,8))
plt.plot(history.history["val_accuracy"], c="crimson")
plt.plot(history.history["accuracy"], c="navy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")

# Hyperparameter Optimization
## Learning Rate vs Batch Size

In [4]:
all_params = {
    "batch_size" : [8, 64, 256, 1024],
    "lr" : [1e-2, 1e-3, 1e-4],
}
nr_runs = len(all_params['batch_size'])*len(all_params["lr"])

In [5]:
hp_dict = {
    "acc_score" : [],
    "batch_size" : [],
    "lr" : []
}

k = 0
t0 = time.time()
for batch_size in all_params["batch_size"]:
    for lr in all_params["lr"]:
        params = {
            "batch_size" : batch_size,
            "lr" : lr,
            "nr_layers" : 2,
            "nr_neurons" : 32,
            "dropout" : 0.2,
             }

        hp_dict["batch_size"].append(batch_size)
        hp_dict["lr"].append(lr)

        callbacks = [EarlyStopping(monitor="val_accuracy", min_delta=0.001, patience=10, verbose=1, restore_best_weights=True)]
        model = make_conv_model(params)
        clf = KerasClassifier(build_fn=model, batch_size=params["batch_size"], epochs=200, validation_data=(X_test_reshaped, Y_onehot_test), callbacks=callbacks, verbose=1)
        history = clf.fit(X_train_reshaped, Y_onehot_train)
        Y_pred = clf.predict(X_val_reshaped)
        del history, clf, model
        gc.collect()
        K.clear_session()
        hp_dict["acc_score"].append(accuracy_score(Y_val, Y_pred))

        k += 1
        print(f"{100*k/nr_runs:.1f} %, {(time.time()-t0)/60:.1f} m")

Train on 50000 samples, validate on 10000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Restoring model weights from the end of the best epoch
Epoch 00011: early stopping
8.3 %, 21.3 m
Train on 50000 samples, validate on 10000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Restoring model weights from the end of the best epoch
Epoch 00024: early stopping
16.7 %, 68.8 m
Train on 50000 samples, validate on 10000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 

In [6]:
hp_df = pd.DataFrame.from_dict(hp_dict)
hp_df.to_pickle("../data/hp_df_CNN_lr_batch.pickle")

In [7]:
hp_df

Unnamed: 0,acc_score,batch_size,lr
0,0.1028,8,0.01
1,0.9229,8,0.001
2,0.9309,8,0.0001
3,0.0965,64,0.01
4,0.9333,64,0.001
5,0.9309,64,0.0001
6,0.0946,256,0.01
7,0.935,256,0.001
8,0.9283,256,0.0001
9,0.9089,1024,0.01


## Network Size and Depth, and Dropout Rate

In [17]:
all_params = {
    "nr_layers" : [1, 2, 3],
    "nr_neurons" : [16, 32, 64],
    "dropout" : [0, 0.2]
}
nr_runs = len(all_params['nr_layers'])*len(all_params["nr_neurons"])

In [18]:
hp_dict = {
    "acc_score" : [],
    "nr_layers" : [],
    "nr_neurons" : [],
    "dropout" : []
}

k = 0
t0 = time.time()
for nr_layers in all_params["nr_layers"]:
    for nr_neurons in all_params["nr_neurons"]:
        for dropout in all_params["dropout"]:
            params = {
                "batch_size" : 256,
                "lr" : 1e-3,
                "nr_layers" : nr_layers,
                "nr_neurons" : nr_neurons,
                "dropout" : dropout
                 }

            hp_dict["nr_layers"].append(nr_layers)
            hp_dict["nr_neurons"].append(nr_neurons)
            hp_dict["dropout"].append(dropout)

            callbacks = [EarlyStopping(monitor="val_accuracy", min_delta=0.001, patience=10, verbose=1, restore_best_weights=True)]
            model = make_conv_model(params)
            clf = KerasClassifier(build_fn=model, batch_size=params["batch_size"], epochs=200, validation_data=(X_test_reshaped, Y_onehot_test), callbacks=callbacks, verbose=1)
            history = clf.fit(X_train_reshaped, Y_onehot_train)
            Y_pred = clf.predict(X_val_reshaped)
            del history, clf, model
            gc.collect()
            K.clear_session()
            hp_dict["acc_score"].append(accuracy_score(Y_val, Y_pred))

            k += 1
            print(f"{100*k/nr_runs:.1f} %, {(time.time()-t0)/60:.1f} m")

Train on 50000 samples, validate on 10000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Restoring model weights from the end of the best epoch
Epoch 00026: early stopping
11.1 %, 3.0 m
Train on 50000 samples, validate on 10000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Restoring model weights from the end of the best epoch
Epoch 00031: early stoppi

In [19]:
hp_df = pd.DataFrame.from_dict(hp_dict)
hp_df.to_pickle("../data/hp_df_CNN_network_size.pickle")

In [20]:
hp_df

Unnamed: 0,acc_score,nr_layers,nr_neurons,dropout
0,0.9196,1,16,0.0
1,0.9265,1,16,0.2
2,0.9265,1,32,0.0
3,0.9325,1,32,0.2
4,0.9257,1,64,0.0
5,0.9309,1,64,0.2
6,0.9187,2,16,0.0
7,0.9269,2,16,0.2
8,0.9234,2,32,0.0
9,0.9324,2,32,0.2
