### CNN Model Template (keras)

Use `mnist` image dataset, build a CNN for image classification.

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from abc import ABC, abstractmethod

from keras.datasets import mnist
from keras.models import Model, load_model
from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras import optimizers
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint
from keras.utils import to_categorical

%matplotlib inline

Using TensorFlow backend.


In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(60000, 28, 28) (10000, 28, 28) (60000,) (10000,)


In [3]:
num_classes = 10
img_rows, img_cols = 28, 28

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
X_train = X_train / 255
X_test = X_test / 255

y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(60000, 28, 28, 1) (10000, 28, 28, 1) (60000, 10) (10000, 10)


In [4]:
def make_model(model_params):
    """
    Template for building NN (CNN) model (keras)
    
    :param model_params: dict, include 
                               - input_size: tuple for input shape
                               - output_size: number of output units
                               - units_per_hidden_layer: list of number of hidden units
                               - drop_rate: dropout rate
                               - loss: loss function
                               - optimizer: optimization method
                               - metrics: list of metrics
    
    :return: keras model
    """
    input_size = model_params['input_size']
    output_size = model_params['output_size']
    units_per_hidden_layer = model_params['units_per_hidden_layer']
    drop_rate = model_params['drop_rate']
    loss = model_params['loss']
    optimizer = model_params['optimizer']
    metrics = model_params['metrics']
    
    inp = Input(shape=input_size)
    x = inp
    for n_units in units_per_hidden_layer:
        x = Conv2D(filters=n_units,
                   kernel_size=(5, 5),
                   strides=(1, 1),
                   padding='valid',
                   activation='relu')(x)
        x = MaxPooling2D(pool_size=(2, 2),
                         strides=None,
                         padding='valid')(x)
    x = Flatten()(x)
    x = Dense(units=256, activation='relu')(x)
    x = Dropout(rate=drop_rate)(x)
    x = Dense(units=100, activation='relu')(x)
    x = Dropout(rate=drop_rate)(x)
    x = Dense(units=output_size, activation='softmax')(x)
    
    model = Model(inputs=[inp], outputs=x)
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    
    return model

In [5]:
class BaseNN(ABC):
    @abstractmethod
    def initialize(self):
        pass
    
    @abstractmethod
    def fit(self):
        pass
    
    @abstractmethod
    def predict(self):
        pass
    
    @abstractmethod
    def save(self):
        pass
    
class BaseCNN(BaseNN):
    """ Basic CNN model (keras) """
    def __init__(self, model_params):
        self.model_params = model_params
    
    def initialize(self):
        """ Initialize model """
        model = make_model(self.model_params)
        print('Model Summary:')
        print(model.summary())
        self._model = model
    
    def fit(self, X_train, y_train, X_val, y_val):
        assert self._model is not None
        self.train_params = self.model_params['train_params']
        self.patience = self.train_params['patience']
        self.nb_epochs = self.train_params['nb_epochs']
        self.batch_size = self.train_params['batch_size']
        self.filepath = self.model_params['filepath']
        earlystopping = EarlyStopping(monitor='val_loss',
                                      patience=self.patience,
                                      mode='min')
        checkpointer = ModelCheckpoint(filepath=self.filepath, save_best_only=True)
        history = self._model.fit(X_train, y_train,
                                  epochs=self.nb_epochs,
                                  batch_size=self.batch_size,
                                  validation_data=(X_val, y_val),
                                  callbacks=[earlystopping, checkpointer]).history
        return self._model
    
    def predict(self, X):
        try:
            if self.saved_model_destination is None:
                return self._model.predict(X)
            else:
                loaded_model = load_model(self.saved_model_destination)
                return loaded_model.predict(X)
        except AttributeError:
            raise AttributeError("Model not saved, try .save() first.")
            
    def save(self, saved_model_destination):
        assert self._model is not None
        self.saved_model_destination = saved_model_destination
        self._model.save(self.saved_model_destination)

In [6]:
model_params = {
    'input_size'  : (img_rows, img_cols, 1),
    'output_size' : 10,
    'train_params': {'batch_size': 128,
                     'patience'  : 10,
                     'nb_epochs' : 10},
    'units_per_hidden_layer': [64, 128],
    'drop_rate': 0.5,
    'loss'     : 'categorical_crossentropy',
    'optimizer': optimizers.Adam(),
    'metrics'  : ['accuracy'],
    'filepath' : './model.h5'
}

In [7]:
basic_cnn = BasicCNN(model_params)
basic_cnn.initialize()
_ = basic_cnn.fit(X_train, y_train, X_test, y_test)
basic_cnn.save('./basic_cnn.h5')

Model Summary:
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        1664      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 8, 8, 128)         204928    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 4, 4, 128)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               524544

#### Model Structure Overview

- Layer 1: 5x5 conv filters (64). Number of parameters = 25 x 64 + 64 = 1664
- Layer 2: 2D max pooling
- Layer 3: 5x5 conv filters (128). Number of parameters = 25 x 64 x 128 + 128 = 204928
- Layer 4: 2D max pooling
- Layer 5: fully connected layer (256) + dropout. Number of parameters = 2048 x 256 + 256 = 524544
- Layer 6: fully connected layer (100) + dropout. Number of parameters = 256 x 100 + 100 = 25700
- Layer 7: softmax layer (10). Number of parameters = 100 x 10 + 10 = 1010

In summary:

- Number of parameters of conv layer:

    `filter_size`^`dim` x `num_filters` + `num_filters`.
   
   
- Number of parameters of fully connected layer:

    `prev_layer_units` x `curr_layer_units` + `curr_layer_units`.