# Libraries

In [1]:
import cloudpickle as pickle

In [2]:
import numpy as np
import pandas as pd
import sklearn

In [3]:
import tensorflow as tf
import keras

Using TensorFlow backend.


In [4]:
from sklearn.preprocessing import OneHotEncoder

# Loading the data

In [5]:
data_path = r'D:\temps\mnist.pkl'

In [6]:
with open(data_path, 'rb') as f:
    (x_train, y_train), (x_val, y_val), (x_test, y_test) = pickle.load(f, encoding='latin-1')

In [7]:
print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)
print(x_test.shape)
print(y_test.shape)

(50000, 784)
(50000,)
(10000, 784)
(10000,)
(10000, 784)
(10000,)


In [8]:
x_train = x_train.reshape((50000, 28, 28, 1))
x_val = x_val.reshape((10000, 28, 28, 1))
x_test = x_test.reshape((10000, 28, 28, 1))

In [9]:
encoder = OneHotEncoder(categories=[range(10)], sparse=False)

In [10]:
encoder.fit(np.array([0,1,2,3,4,5,6,7,8,9]).reshape(10,1))

OneHotEncoder(categorical_features=None, categories=[range(0, 10)],
       dtype=<class 'numpy.float64'>, handle_unknown='error',
       n_values=None, sparse=False)

In [11]:
y_train = encoder.transform(y_train.reshape(len(y_train), 1))
y_val = encoder.transform(y_val.reshape(len(y_val), 1))
y_test = encoder.transform(y_test.reshape(len(y_test), 1))

In [12]:
print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

(50000, 10)
(10000, 10)
(10000, 10)


# Keras

In [13]:
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

## First model with 1 conv

In [16]:
class CNN1:
    
    def __init__(self):
        np.random.seed(0)
        tf.set_random_seed(0)
        self.model = keras.models.Sequential()
        self.history = None
        
        # Layer 1
        self.model.add(Conv2D(32, kernel_size=3, strides=1, padding='same', use_bias=True,
                              activation='relu', input_shape=(28,28,1), data_format='channels_last'))
        self.model.add(MaxPooling2D(2, padding='valid', data_format='channels_last'))
        
        # Layer 4
        self.model.add(Flatten())
        
        # Layer 5
        self.model.add(Dense(10, use_bias=True, activation='softmax'))
        
        sgd = keras.optimizers.SGD(lr=0.01)
        self.model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
        
        self.model.summary()
    
    def train(self, x_tr, y_tr, x_v, y_v, ep, bs=32, early=10):
        earlystop = EarlyStopping(patience=early)
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                                    patience=2, 
                                                    verbose=1, 
                                                    factor=0.5, 
                                                    min_lr=0.00001)
        callbacks = [earlystop, learning_rate_reduction]
        
        self.history = self.model.fit(x_tr, y_tr, validation_data=(x_v, y_v), epochs=ep, batch_size=bs,
                                      callbacks=callbacks)

In [20]:
mod1 = CNN1()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 6272)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                62730     
Total params: 63,050
Trainable params: 63,050
Non-trainable params: 0
_________________________________________________________________


In [21]:
mod1.train(x_train, y_train, x_val, y_val, 20)

Train on 50000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [22]:
mod1.model.evaluate(x_test, y_test)



[0.07887579851225018, 0.9773]

***

## Second model with 2 conv but fewer parameters

In [23]:
class CNN2:
    
    def __init__(self):
        np.random.seed(0)
        tf.set_random_seed(0)
        self.model = keras.models.Sequential()
        self.history = None
        
        # Layer 1
        self.model.add(Conv2D(32, kernel_size=3, strides=1, padding='same', use_bias=True,
                              activation='relu', input_shape=(28,28,1), data_format='channels_last'))
        self.model.add(MaxPooling2D(2, padding='valid', data_format='channels_last'))
        
        # Layer 2
        self.model.add(Conv2D(32, kernel_size=3, strides=1, padding='same', use_bias=True,
                              activation='relu', input_shape=(14,14,32), data_format='channels_last'))
        self.model.add(MaxPooling2D(2, padding='valid', data_format='channels_last'))
        
        # Layer 4
        self.model.add(Flatten())
        
        # Layer 5
        self.model.add(Dense(10, use_bias=True, activation='softmax'))
        
        sgd = keras.optimizers.SGD(lr=0.01)
        self.model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
        
        self.model.summary()
    
    def train(self, x_tr, y_tr, x_v, y_v, ep, bs=32, early=10):
        earlystop = EarlyStopping(patience=early)
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                                    patience=2, 
                                                    verbose=1, 
                                                    factor=0.5, 
                                                    min_lr=0.00001)
        callbacks = [earlystop, learning_rate_reduction]
        
        self.history = self.model.fit(x_tr, y_tr, validation_data=(x_v, y_v), epochs=ep, batch_size=bs,
                                      callbacks=callbacks)

In [26]:
mod2 = CNN2()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 14, 14, 32)        9248      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 7, 7, 32)          0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 1568)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 10)                15690     
Total params: 25,258
Trainable params: 25,258
Non-trainable params: 0
_________________________________________________________________


In [27]:
mod2.train(x_train, y_train, x_val, y_val, 10)

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [28]:
mod2.model.evaluate(x_test, y_test)



[0.05285470188893378, 0.9832]

***

## Third model with 2 conv and 0.5M parameters (to compare with MLP)

In [35]:
class CNN3:
    
    def __init__(self):
        np.random.seed(0)
        tf.set_random_seed(0)
        self.model = keras.models.Sequential()
        self.history = None
        
        # Layer 1
        self.model.add(Conv2D(32, kernel_size=3, strides=1, padding='same', use_bias=True,
                              activation='relu', input_shape=(28,28,1), data_format='channels_last'))
        self.model.add(MaxPooling2D(2, padding='valid', data_format='channels_last'))
        
        # Layer 2
        self.model.add(Conv2D(32, kernel_size=3, strides=1, padding='same', use_bias=True,
                              activation='relu', input_shape=(14,14,32), data_format='channels_last'))
        self.model.add(MaxPooling2D(2, padding='valid', data_format='channels_last'))
        
        # Layer 4
        self.model.add(Flatten())
        
        # Layer 5
        self.model.add(Dense(512, use_bias=True, activation='relu'))
        
        # Layer 6
        self.model.add(Dense(10, use_bias=True, activation='softmax'))
        
        sgd = keras.optimizers.SGD(lr=0.01)
        self.model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
        
        self.model.summary()
    
    def train(self, x_tr, y_tr, x_v, y_v, ep, bs=32, early=10):
        earlystop = EarlyStopping(patience=early)
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                                    patience=2, 
                                                    verbose=1, 
                                                    factor=0.5, 
                                                    min_lr=0.00001)
        callbacks = [earlystop, learning_rate_reduction]
        
        self.history = self.model.fit(x_tr, y_tr, validation_data=(x_v, y_v), epochs=ep, batch_size=bs,
                                      callbacks=callbacks)

In [39]:
mod3 = CNN3()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 14, 14, 32)        9248      
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 7, 7, 32)          0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 1568)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 512)               803328    
_________________________________________________________________
dense_14 (Dense)             (None, 10)                5130      
Total para

In [40]:
mod3.train(x_train, y_train, x_val, y_val, 30)

Train on 50000 samples, validate on 10000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30

Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.0024999999441206455.
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30

Epoch 00018: ReduceLROnPlateau reducing learning rate to 0.0012499999720603228.
Epoch 19/30
Epoch 20/30

Epoch 00020: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30

Epoch 00024: ReduceLROnPlateau reducing learning rate to 0.0003124999930150807.
Epoch 25/30
Epoch 26/30

Epoch 00026: ReduceLROnPlateau reducing learning rate to 0.00015624999650754035.
Epoch 27/30


In [41]:
mod3.model.evaluate(x_test, y_test)



[0.03355478632784216, 0.9886]

***

## Fourth model with 2 conv and 0.5M parameters (to compare with MLP)

In [19]:
class CNN4:
    
    def __init__(self):
        np.random.seed(0)
        tf.set_random_seed(0)
        self.model = keras.models.Sequential()
        self.history = None
        
        # Layer 1
        self.model.add(Conv2D(64, kernel_size=3, strides=1, padding='same', use_bias=True,
                              activation='relu', input_shape=(28,28,1), data_format='channels_last'))
        self.model.add(MaxPooling2D(2, padding='valid', data_format='channels_last'))
        
        # Layer 2
        self.model.add(Conv2D(128, kernel_size=3, strides=1, padding='same', use_bias=True,
                              activation='relu', input_shape=(14,14,32), data_format='channels_last'))
        self.model.add(MaxPooling2D(2, padding='valid', data_format='channels_last'))
        
        # Layer 4
        self.model.add(Flatten())
        
        # Layer 5
        self.model.add(Dense(128, use_bias=True, activation='relu'))
        
        # Layer 6
        self.model.add(Dense(10, use_bias=True, activation='softmax'))
        
        sgd = keras.optimizers.SGD(lr=0.01)
        self.model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
        
        self.model.summary()
    
    def train(self, x_tr, y_tr, x_v, y_v, ep, bs=32, early=10):
        earlystop = EarlyStopping(patience=early)
        learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                                    patience=2, 
                                                    verbose=1, 
                                                    factor=0.5, 
                                                    min_lr=0.00001)
        callbacks = [earlystop, learning_rate_reduction]
        
        self.history = self.model.fit(x_tr, y_tr, validation_data=(x_v, y_v), epochs=ep, batch_size=bs,
                                      callbacks=callbacks)

In [20]:
mod4 = CNN4()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 28, 28, 64)        640       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 14, 14, 128)       73856     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 7, 7, 128)         0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 6272)              0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               802944    
_________________________________________________________________
dense_6 (Dense)              (None, 10)                1290      
Total para

In [22]:
mod4.train(x_train, y_train, x_val, y_val, 10)

Train on 50000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.004999999888241291.


In [23]:
mod4.model.evaluate(x_test, y_test)



[0.04046680129289162, 0.9863]