https://www.kaggle.com/cdeotte/how-to-choose-cnn-architecture-mnist

In [15]:
# Import modules
# Add modules as needed
from sklearn.datasets import fetch_openml
import numpy as np

# For windows laptops add following 2 lines:
# import matplotlib
# matplotlib.use('agg')

import matplotlib.pyplot as plt

import tensorflow.keras as keras

In [3]:
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
print('Shape of X_train:', X_train.shape, '\n', 'Shape of y_train:', y_train.shape)
'\n'
'\n'
print('Shape of X_test:', X_test.shape, '\n', 'Shape of y_test:', y_test.shape)

Shape of X_train: (60000, 28, 28) 
 Shape of y_train: (60000,)
Shape of X_test: (10000, 28, 28) 
 Shape of y_test: (10000,)


In [5]:
print('Type of X_train:', type(X_train), '\n', 'Type of y_train:', type(y_train))

Type of X_train: <class 'numpy.ndarray'> 
 Type of y_train: <class 'numpy.ndarray'>


In [6]:
print('Type of X_test:', type(X_test), '\n', 'Type of y_test:', type(y_test))

Type of X_test: <class 'numpy.ndarray'> 
 Type of y_test: <class 'numpy.ndarray'>


In [7]:
6/7

0.8571428571428571

### **The input shape that a CNN accepts should be in a specific format. If you are using Tensorflow, the format should be (batch, height, width, channels). If you are using Theano, the format should be (batch, channels, height, width).**

In [8]:
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test  = X_test.reshape(X_test.shape[0], 28, 28, 1)

In [9]:
X_train.shape

(60000, 28, 28, 1)

In [10]:
# When loading from "mnist.load_data()", we have to convert its type to float32
X_train = X_train.astype('float32')
X_train = X_train/255

X_test = X_test.astype('float32')
X_test = X_test/255

In [17]:
from keras.utils import to_categorical
number_of_classes = 10

y_train = to_categorical(y_train, number_of_classes)
y_test  = to_categorical(y_test,  number_of_classes)

## **MODEL 1 :          784 - [24C5-P2] - 256 - 10**

This model has a convolution layer with 24 feature maps using a 5x5 filter and stride 1 PLUS a max pooling using 2x2 filter and stride 2.
Then it has a fully connected dense layer with 256 units together with a final output dense layer with 10 units.

In [25]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten

from keras.layers import Conv2D, MaxPooling2D

In [26]:
# define cnn model1

model1 = Sequential()
model1.add(Conv2D(24, (5, 5), padding='same', activation='relu', input_shape=(28, 28, 1)))
model1.add(MaxPooling2D((2, 2)))
  
model1.add(Flatten())                       # Talvez isso não seja necessário com o fetch_openml
model1.add(Dense(256, activation='relu'))
model1.add(Dense(10, activation='softmax'))


  

In [27]:
# Compile the model
model1.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [28]:
model1.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 28, 28, 24)        624       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 24)        0         
_________________________________________________________________
flatten (Flatten)            (None, 4704)              0         
_________________________________________________________________
dense (Dense)                (None, 256)               1204480   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                2570      
Total params: 1,207,674
Trainable params: 1,207,674
Non-trainable params: 0
_________________________________________________________________


In [31]:
14*14*24

4704

In [29]:
1204480/256

4705.0

In [32]:
# Fit the model
history = model1.fit(X_train, y_train, batch_size=64, epochs=12)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [33]:
Test_loss = model1.evaluate(X_test, y_test)



In [35]:
print('Test loss:', Test_loss[0])
print('Test accuracy:', Test_loss[1])

Test loss: 0.046124644577503204
Test accuracy: 0.9882000088691711


#### **Not that possibly the level of overfitting can be reduced by using a dropout layer**

In [36]:
model1 = Sequential()
model1.add(Conv2D(24, (5, 5), padding='same', activation='relu', input_shape=(28, 28, 1)))
model1.add(MaxPooling2D((2, 2)))
model1.add(Dropout(0.2))
  
model1.add(Flatten())                       # Talvez isso não seja necessário com o fetch_openml
model1.add(Dense(256, activation='relu'))
model1.add(Dropout(0.2))
model1.add(Dense(10, activation='softmax'))

In [37]:
# Compile the model
model1.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [53]:
model1.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 28, 28, 24)        624       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 14, 14, 24)        0         
_________________________________________________________________
dropout (Dropout)            (None, 14, 14, 24)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 4704)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               1204480   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)               

In [38]:
# Fit the model
history = model1.fit(X_train, y_train, batch_size=64, epochs=12)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [39]:
Test_loss = model1.evaluate(X_test, y_test)



In [40]:
print('Test loss:', Test_loss[0])
print('Test accuracy:', Test_loss[1])

Test loss: 0.0320826880633831
Test accuracy: 0.9907000064849854


## **784 - [24C5-P2] - [48C5-P2] - 256 - 10**

This model has a (convolution layer with 24 feature maps using a 5x5 filter and stride 1 together with a max pooling using 2x2 filter and stride 2) PLUS a (convolutional layer with 48 feature maps using a 5x5 filter and stride 1 together with a max pooling using 2x2 filter and stride 2). Then it has a fully connected dense layer with 256 units together with a final output dense layer with 10 units.

In [41]:
# define cnn model1

model2 = Sequential()
model2.add(Conv2D(24, (5, 5), padding='same', activation='relu', input_shape=(28, 28, 1)))
model2.add(MaxPooling2D((2, 2)))
model2.add(Dropout(0.2))

model2.add(Conv2D(48, (5, 5), padding='same', activation='relu'))
model2.add(MaxPooling2D((2, 2)))
model2.add(Dropout(0.2))
  
model2.add(Flatten())                       # Talvez isso não seja necessário com o fetch_openml
model2.add(Dense(256, activation='relu'))
model2.add(Dropout(0.2))
model2.add(Dense(10, activation='softmax'))

In [42]:
# Compile the model
model2.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [54]:
model2.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 28, 28, 24)        624       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 14, 14, 24)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 14, 14, 24)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 14, 14, 48)        28848     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 7, 7, 48)          0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 7, 7, 48)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 2352)             

In [43]:
# Fit the model
history = model2.fit(X_train, y_train, batch_size=64, epochs=12)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [44]:
Test_loss2 = model2.evaluate(X_test, y_test)



In [49]:
print('Test loss:', Test_loss2[0])
print('Test accuracy:', Test_loss2[1])

Test loss: 0.02397640235722065
Test accuracy: 0.9937999844551086


## **784 - [24C5-P2] - [48C5-P2] - [64C5-P2] - 256 - 10**

In [46]:
# define cnn model1

model3 = Sequential()
model3.add(Conv2D(24, (5, 5), padding='same', activation='relu', input_shape=(28, 28, 1)))
model3.add(MaxPooling2D((2, 2)))
model3.add(Dropout(0.2))

model3.add(Conv2D(48, (5, 5), padding='same', activation='relu'))
model3.add(MaxPooling2D((2, 2)))
model3.add(Dropout(0.2))

model3.add(Conv2D(64, (5, 5), padding='same', activation='relu'))
model3.add(MaxPooling2D((2, 2)))
model3.add(Dropout(0.2))
  
model3.add(Flatten())                       # Talvez isso não seja necessário com o fetch_openml
model3.add(Dense(256, activation='relu'))
model3.add(Dropout(0.2))
model3.add(Dense(10, activation='softmax'))

In [47]:
# Compile the model
model3.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [55]:
model3.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 28, 28, 24)        624       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 14, 14, 24)        0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 14, 14, 24)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 14, 14, 48)        28848     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 7, 7, 48)          0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 7, 7, 48)          0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 7, 7, 64)         

In [50]:
# Fit the model
history = model3.fit(X_train, y_train, batch_size=64, epochs=12)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


In [51]:
Test_loss3 = model3.evaluate(X_test, y_test)



In [52]:
print('Test loss:', Test_loss3[0])
print('Test accuracy:', Test_loss3[1])

Test loss: 0.025993548333644867
Test accuracy: 0.9936000108718872
