### CNN

In [74]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, regularizers
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import KFold
from src.dataLoading import dataLoader
from sklearn.model_selection import train_test_split
import itertools


In [57]:
print("GPU Available: ", tf.test.is_gpu_available())

GPU Available:  True


#### Load Data

In [58]:
vectors, images, labels = dataLoader(mnist_only=False, chinese_mnist_only=False)  # 784-long vectors, 28*28 images and mnist/chinese labels

encoder = LabelBinarizer()
transfomed_labels = encoder.fit(np.unique(labels))
# split the vectors (for PCA, CNN would use images)
# "stratify" makes sure theres a balance of each class in the test/train sets
X_train, X_test, y_train, y_test = train_test_split(vectors, labels, train_size=0.8, stratify=labels)
y_train_t = encoder.transform(y_train)
y_test_t = encoder.transform(y_test)

In [59]:
X_train = np.reshape(X_train, (X_train.shape[0],28,28,1))
X_test = np.reshape(X_test, (X_test.shape[0],28,28,1))

In [75]:
BATCH_SIZE=64
STEPS_PER_EPOCH = X_train.shape[0]//BATCH_SIZE

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
]
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=STEPS_PER_EPOCH*1000,
  decay_rate=1,
  staircase=False)


def create_model(input_shape, output_shape):
    model = tf.keras.models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation=None, input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation=None))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation=None))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.Flatten())
    model.add(layers.Dropout(0.3)),
    model.add(layers.Dense(64, kernel_regularizer=regularizers.L1L2(l1=0.00, l2=0.00), activation='relu'))
    model.add(layers.Dropout(0.3)),
    model.add(layers.Dense(32, kernel_regularizer=regularizers.L1L2(l1=0.00, l2=0.00), activation='relu'))
    model.add(layers.Dropout(0.3)),
    model.add(layers.Dense(output_shape))
    return model

In [77]:
model = create_model(X_train.shape[1:], y_train_t.shape[-1])
model.summary()

Model: "sequential_35"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_74 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_54 (Batc (None, 26, 26, 32)        128       
_________________________________________________________________
activation_74 (Activation)   (None, 26, 26, 32)        0         
_________________________________________________________________
max_pooling2d_70 (MaxPooling (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_75 (Conv2D)           (None, 11, 11, 64)        18496     
_________________________________________________________________
batch_normalization_55 (Batc (None, 11, 11, 64)        256       
_________________________________________________________________
activation_75 (Activation)   (None, 11, 11, 64)      

In [78]:
model.compile(optimizer=tf.keras.optimizers.Adam(lr_schedule),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(X_train, y_train_t, epochs=50, batch_size=64,
                    validation_split=0.1, callbacks=callbacks)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50


### Grid Search

In [51]:
EPOCHS = 100
BATCH_SIZE = [32, 64]
l1 = [0.1, 0.01, 0.001]
l2 = [0.1, 0.01, 0.001]
dropouts = [0.5, 0.2]
conv_models = [
    [32, 64],
    [32, 64, 128],
    [16, 32],
    [16, 32, 64],
    [64, 128],
    # [64, 128, 256]
]
add_batch_norm = [True, False]
dense_models = [
    [32],
    # [128, 64],
    [64],
    [64, 32],
    # [128, 32],
    [128]
]
parameters = list(itertools.product(*[conv_models, add_batch_norm, dense_models, dropouts, BATCH_SIZE]))
print(f'total number of combinations: {len(parameters)}')

total number of combinations: 160


In [72]:
def create_model_2(input_shape, output_shape, conv_layers, batch_norm, dense_layers, dropout, steps_per_epoch):
    model = tf.keras.models.Sequential()
    for idx, val in enumerate(conv_layers):
        if(idx == 0):
            model.add(layers.Conv2D(val, (3, 3), activation=None, input_shape=input_shape))
        else:
            model.add(layers.Conv2D(val, (3, 3), activation=None))
        if(batch_norm):
            model.add(layers.BatchNormalization())
        model.add(layers.Activation('relu'))
        model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dropout(dropout)),
    for idx, val in enumerate(dense_layers):
        model.add(layers.Dense(val, kernel_regularizer=regularizers.L1L2(l1=0.00, l2=0.00), activation='relu'))
        model.add(layers.Dropout(dropout)),
    model.add(layers.Dense(output_shape))
       
    lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
      0.001,
      decay_steps=steps_per_epoch*1000,
      decay_rate=1,
      staircase=False)
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr_schedule),
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'])
    return model


### Cross Validation

In [73]:
def cross_validate(conv_model, batch_norm, dense_model, dropout, batch_size, k_folds=10):
    acc_per_fold = []
    loss_per_fold = []
    num_folds = 10
    kfold = KFold(n_splits=num_folds, shuffle=True)
    fold_n = 1
    for train, test in kfold.split(X_train, y_train_t):
        print(f'FOLD {fold_n}')
        model = create_model_2(X_train.shape[1:], y_train_t.shape[-1], conv_model, batch_norm, dense_model, dropout,  X_train.shape[0]//batch_size)
        callbacks = [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
        ]
        history = model.fit(
            X_train[train],
            y_train_t[train],
            epochs=EPOCHS,
            batch_size=batch_size,
            validation_split=0.1,
            verbose=2,
            callbacks=callbacks
        )
        test_loss, test_acc = model.evaluate(X_train[test], y_train_t[test], verbose=2)
        print(f'Score for fold {fold_n}: {model.metrics_names[0]} of {test_loss}; {model.metrics_names[1]} of {test_acc*100}%')
        acc_per_fold.append(test_acc * 100)
        loss_per_fold.append(test_loss)
        fold_n += 1
    return [np.mean(acc_per_fold), np.mean(loss_per_fold)]

In [55]:
means = [cross_validate(conv_model, batch_norm, dense_model, dropout, batch_size) for conv_model, batch_norm, dense_model, dropout, batch_size in parameters[:1]]

FOLD 1
Epoch 1/100
405/405 - 3s - loss: 2.4601 - accuracy: 0.2239 - val_loss: 1.1302 - val_accuracy: 0.7556
Epoch 2/100
405/405 - 2s - loss: 1.5570 - accuracy: 0.4818 - val_loss: 0.4666 - val_accuracy: 0.9146
Epoch 3/100
405/405 - 2s - loss: 1.2115 - accuracy: 0.5806 - val_loss: 0.3699 - val_accuracy: 0.9201
Epoch 4/100
405/405 - 2s - loss: 1.0208 - accuracy: 0.6426 - val_loss: 0.2466 - val_accuracy: 0.9486
Epoch 5/100
405/405 - 2s - loss: 0.9010 - accuracy: 0.6846 - val_loss: 0.1865 - val_accuracy: 0.9597
Epoch 6/100
405/405 - 2s - loss: 0.8294 - accuracy: 0.7066 - val_loss: 0.1674 - val_accuracy: 0.9590
Epoch 7/100
405/405 - 2s - loss: 0.7627 - accuracy: 0.7294 - val_loss: 0.1409 - val_accuracy: 0.9646
Epoch 8/100
405/405 - 2s - loss: 0.6761 - accuracy: 0.7609 - val_loss: 0.1419 - val_accuracy: 0.9639
Epoch 9/100
405/405 - 2s - loss: 0.6355 - accuracy: 0.7785 - val_loss: 0.1156 - val_accuracy: 0.9694
Epoch 10/100
405/405 - 2s - loss: 0.5417 - accuracy: 0.8142 - val_loss: 0.1085 - val

KeyboardInterrupt: 

In [37]:
np.array(means)[:,:]

array([[9.88874996e+01, 4.46845509e-02],
       [9.84562498e+01, 6.14750601e-02]])

In [41]:
top_params = [parameters[idx] for idx in np.argsort(np.array(means)[:,0])[::-1][:3]]
print('Top model Configurations')
print(top_params)

Top model Configurations
[([32, 64], True, [64], 0.5, 64), ([32, 64], False, [64], 0.5, 64)]


In [43]:
top_means = [means[idx] for idx in np.argsort(np.array(means)[:,0])[::-1][:3]]
print('Top mean accuracies and losses')
print(top_means)

Top mean accuracies and losses
[[98.88749957084656, 0.04468455091118813], [98.456249833107, 0.06147506013512612]]


### Final Model

In [39]:
import os,sys


In [42]:
os.path.abspath('')

'H:\\My Drive\\Master\\Machine_Learning\\MachineLearning-2022'

In [46]:
fname = os.path.sep.join([os.path.abspath(''), 'saved_models',
	"weights-{epoch:03d}-{val_loss:.4f}.hdf5"])
final_callbacks = [
    # tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10),
    tf.keras.callbacks.ModelCheckpoint(
        filepath=fname,
        save_weights_only=False,
        monitor='val_accuracy',
        mode='max',
        save_best_only=True,
        verbose=1)
]

In [47]:
model = create_model_2(X_train.shape[1:], y_train_t.shape[-1], top_params[0][0], top_params[0][1], top_params[0][2])
model.compile(
    optimizer=tf.keras.optimizers.Adam(lr_schedule),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy'])
history = model.fit(X_train, y_train_t, epochs=100, batch_size=64,
                    validation_split=0.1, callbacks=final_callbacks)

Epoch 1/100

Epoch 00001: val_accuracy improved from -inf to 0.88500, saving model to H:\My Drive\Master\Machine_Learning\MachineLearning-2022\saved_models\weights-001-0.5531.hdf5
Epoch 2/100

Epoch 00002: val_accuracy improved from 0.88500 to 0.94687, saving model to H:\My Drive\Master\Machine_Learning\MachineLearning-2022\saved_models\weights-002-0.2072.hdf5
Epoch 3/100

Epoch 00003: val_accuracy improved from 0.94687 to 0.97062, saving model to H:\My Drive\Master\Machine_Learning\MachineLearning-2022\saved_models\weights-003-0.1380.hdf5
Epoch 4/100

Epoch 00004: val_accuracy did not improve from 0.97062
Epoch 5/100

Epoch 00005: val_accuracy improved from 0.97062 to 0.97250, saving model to H:\My Drive\Master\Machine_Learning\MachineLearning-2022\saved_models\weights-005-0.1162.hdf5
Epoch 6/100

Epoch 00006: val_accuracy did not improve from 0.97250
Epoch 7/100

Epoch 00007: val_accuracy improved from 0.97250 to 0.97438, saving model to H:\My Drive\Master\Machine_Learning\MachineLea