# Task 3-2

Try various implementations of CNNs that are present in lecture materials using MNIST or Fashion MNIST (or any other you wish) dataset. You may try other examples you find in the web as well. Play with training hyper-parameters and network architecture, with dropouts, batch normalization and data generation. Build the table where compare performance (loss, accuracy) of the combinations you made.   

In [32]:

from tensorflow.python.keras.models import Sequential, load_model
from tensorflow.python.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Activation
from tensorflow.python.keras import utils
from tensorflow.python.keras.optimizers import adam_v2
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
import os

In [20]:
(X_train, Y_train), (X_test, Y_test) = tf.keras.datasets.fashion_mnist.load_data()

In [21]:
X_train = X_train.astype(np.float32) / 255.0
X_test = X_test.astype(np.float32) / 255.0

In [22]:
X_train.shape

(60000, 28, 28)

In [23]:
num_classes = 10
img_rows, img_cols = 28, 28

In [24]:
early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

In [25]:
if tf.keras.backend.image_data_format() == 'channels_first':
    x_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
    x_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    x_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)


In [26]:
y_train = tf.keras.utils.to_categorical(Y_train, num_classes)
y_test = tf.keras.utils.to_categorical(Y_test, num_classes)

In [27]:
res = pd.DataFrame(columns=['name', 'accuracy', 'loss', 'layers', 'dense_layers', 'learning_rate', 'batch_size', 'epochs'])

In [35]:
def create_model(train_params):
    model = Sequential()
    
    for i, layer in enumerate(train_params['layers']):
        if i == 0:
            model.add(Conv2D(layer['filters'], layer['kernel_size'], activation=layer['activation'], input_shape=input_shape))
        else:
            model.add(Conv2D(layer['filters'], layer['kernel_size'], activation=layer['activation']))
        if layer['batch_norm']:
            model.add(BatchNormalization())
        if layer['pool_size']:
            model.add(MaxPooling2D(pool_size=layer['pool_size']))
        if layer['dropout']:
            model.add(Dropout(layer['dropout']))
        
    model.add(Flatten())
    
    for i, dense_layer in enumerate(train_params['dense_layers']):
        model.add(Dense(dense_layer['units'], activation=dense_layer['activation']))
        if dense_layer['batch_norm']:
            model.add(BatchNormalization())
        if dense_layer['dropout']:
            model.add(Dropout(dense_layer['dropout']))
    
    model.add(Dense(num_classes, activation='softmax'))
    optimizer = adam_v2.Adam(learning_rate=train_params['learning_rate'])
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [34]:
train_params = [
    {
        'name': 'simplest',
        'layers': [
            {'filters': 64, 'kernel_size': (3, 3), 'activation': 'relu', 'batch_norm': False, 'pool_size': (2, 2), 'dropout': 0.25},
        ],
        'dense_layers': [
            {'units': 128, 'activation': 'relu', 'batch_norm': True, 'dropout': 0.5},
        ],
        'learning_rate': 0.01,
        'batch_size': 64,
        'epochs': 5
    },
    {
        'name': 'deep',
        'layers': [
            {'filters': 32, 'kernel_size': (3, 3), 'activation': 'relu', 'batch_norm': True, 'pool_size': (2, 2), 'dropout': 0.2},
            {'filters': 64, 'kernel_size': (3, 3), 'activation': 'relu', 'batch_norm': True, 'pool_size': (2, 2), 'dropout': 0.2},
            {'filters': 128, 'kernel_size': (3, 3), 'activation': 'relu', 'batch_norm': True, 'pool_size': None, 'dropout': 0.3}
        ],
        'dense_layers': [
            {'units': 64, 'activation': 'relu', 'batch_norm': True, 'dropout': 0.2},
            {'units': 32, 'activation': 'relu', 'batch_norm': True, 'dropout': 0.2}
        ],
        'learning_rate': 0.01,
        'batch_size': 64,
        'epochs': 5
    },
    {
        'name': 'deepest',
        'layers': [
            {'filters': 32, 'kernel_size': (3, 3), 'activation': 'relu', 'batch_norm': True, 'pool_size': (2, 2), 'dropout': 0.2},
            {'filters': 64, 'kernel_size': (3, 3), 'activation': 'relu', 'batch_norm': True, 'pool_size': (2, 2), 'dropout': 0.2},
            {'filters': 128, 'kernel_size': (3, 3), 'activation': 'relu', 'batch_norm': True, 'pool_size': None, 'dropout': 0.5},
            {'filters': 256, 'kernel_size': (3, 3), 'activation': 'relu', 'batch_norm': True, 'pool_size': None, 'dropout': 0.4}
        ],
        'dense_layers': [
            {'units': 128, 'activation': 'relu', 'batch_norm': True, 'dropout': 0.2},
            {'units': 64, 'activation': 'relu', 'batch_norm': True, 'dropout': 0.2},
            {'units': 32, 'activation': 'relu', 'batch_norm': True, 'dropout': 0.2}
        ],
        'learning_rate': 0.001,
        'batch_size': 64,
        'epochs': 5
    }
]

In [36]:
results = []

for params in train_params:
    if not os.path.isdir(f"./models/{params['name']}"):
        model = create_model(params)
        history = model.fit(x_train, y_train, batch_size=params['batch_size'], epochs=params['epochs'], validation_data=(x_test, y_test), callbacks=[early_stop])
        model.save(f"./models/{params['name']}")
    else:
        model = load_model(f"./models/{params['name']}")
    loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
    results.append({
        'params': params,
        'accuracy': accuracy,
        'loss': loss
    })

for i, r in enumerate(results):
    params = r['params']
    accuracy, loss = r['accuracy'], r['loss']
    layers = ', '.join([f"{layer['filters']}{layer['kernel_size'][0]}x{layer['kernel_size'][1]}{layer['activation']}{'bn' if layer['batch_norm'] else ''}{'dr' if layer['dropout'] > 0 else ''}{layer['dropout']}" for layer in params['layers']])
    dense_layers = ', '.join([f"{layer['units']}{'bn' if layer['batch_norm'] else ''}{'dr' if layer['dropout'] > 0 else ''}{layer['dropout']}" for layer in params['dense_layers']])
    res = res.append({
        'name': params['name'],
        'layers': layers,
        'dense_layers': dense_layers,
        'learning_rate': params['learning_rate'],
        'batch_size': params['batch_size'],
        'epochs': params['epochs'],
        'loss': loss,
        'accuracy': accuracy,
    }, ignore_index=True)

print(res)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: ./models/simplest\assets


INFO:tensorflow:Assets written to: ./models/simplest\assets


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: ./models/deep\assets


INFO:tensorflow:Assets written to: ./models/deep\assets


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: ./models/deepest\assets


INFO:tensorflow:Assets written to: ./models/deepest\assets


       name  accuracy      loss  \
0  simplest    0.8958  0.289879   
1      deep    0.8361  0.470821   
2   deepest    0.8941  0.294753   

                                              layers  \
0                                    643x3reludr0.25   
1  323x3relubndr0.2, 643x3relubndr0.2, 1283x3relu...   
2  323x3relubndr0.2, 643x3relubndr0.2, 1283x3relu...   

                       dense_layers  learning_rate batch_size epochs  
0                        128bndr0.5          0.010         64      5  
1              64bndr0.2, 32bndr0.2          0.010         64      5  
2  128bndr0.2, 64bndr0.2, 32bndr0.2          0.001         64      5  


  res = res.append({
  res = res.append({
  res = res.append({


In [39]:
res.sort_values('accuracy', ascending=False)

Unnamed: 0,name,accuracy,loss,layers,dense_layers,learning_rate,batch_size,epochs
0,simplest,0.8958,0.289879,643x3reludr0.25,128bndr0.5,0.01,64,5
2,deepest,0.8941,0.294753,"323x3relubndr0.2, 643x3relubndr0.2, 1283x3relu...","128bndr0.2, 64bndr0.2, 32bndr0.2",0.001,64,5
1,deep,0.8361,0.470821,"323x3relubndr0.2, 643x3relubndr0.2, 1283x3relu...","64bndr0.2, 32bndr0.2",0.01,64,5
