# Data mining HW4

author: Ali ArjomandBigdeli (Student Number: 9423008)

## Question 3: Comparing implementation of fully connected NN VS. CNN in classifying MNIST

## Fully connected NN

In [1]:
from keras.datasets import mnist
from keras.preprocessing.image import load_img, array_to_img
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense

import numpy as np
import pandas as pd

Using TensorFlow backend.


In [2]:
(X_train, Y_train),(X_test, Y_test) = mnist.load_data()

In [3]:
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


### Preprocessing the image data

In [4]:
image_height, image_width = 28, 28

In [5]:
X_train = X_train.reshape(60000, image_height * image_width)
X_test = X_test.reshape(10000, image_height * image_width)
print(X_train.shape)
print(X_test.shape)

(60000, 784)
(10000, 784)


In [6]:
# rescale our data
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.0
X_test /= 255.0

In [7]:
Y_train = to_categorical(Y_train, 10)
Y_test = to_categorical(Y_test, 10)
print(Y_train.shape)
print(Y_test.shape)

(60000, 10)
(10000, 10)


### Building keras model

### Network Architecture:

![FullyConnectedNN_MNIST](img/FullyNN_MNIST.JPG)

In [8]:
model = Sequential()

In [9]:
model.add(Dense(512, activation= 'relu', input_shape=(784,)))
model.add(Dense(512, activation= 'relu'))
model.add(Dense(10, activation= 'softmax'))

### Compile the model

In [10]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [11]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________


### Train the model

In [12]:
history = model.fit(X_train, Y_train, epochs=20, validation_data=(X_test, Y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Evaluating the model

In [13]:
score = model.evaluate(X_test, Y_test)



In [14]:
score

[0.15124783596569877, 0.9803000092506409]

## CNN

### Network Architecture:

![CNN_Arch_MNIST](img/CNN_Arch_MNIST.JPG)

In [15]:
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.models import Sequential
from keras.datasets import mnist
from keras.utils.np_utils import to_categorical

### load data

In [16]:
(X_train, Y_train),(X_test, Y_test) = mnist.load_data()

In [17]:
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


### Pre-processing

In [18]:
num_classes = 10
epochs = 3

X_train = X_train.reshape(60000, 28, 28, 1)
X_test = X_test.reshape(10000, 28, 28, 1)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255.0
X_test /= 255.0
Y_train = to_categorical(Y_train, num_classes)
Y_test = to_categorical(Y_test, num_classes)

In [19]:
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(60000, 28, 28, 1)
(60000, 10)
(10000, 28, 28, 1)
(10000, 10)


### Create and Compile the model

In [20]:
cnn = Sequential()

In [21]:
cnn.add(Conv2D(32, kernel_size=(5,5), input_shape=(28,28,1), padding='same', activation='relu'))

In [22]:
cnn.add(MaxPooling2D())

In [23]:
cnn.add(Conv2D(64, kernel_size=(5,5), padding='same', activation='relu'))

In [24]:
cnn.add(MaxPooling2D())

In [25]:
cnn.add(Flatten())

In [26]:
cnn.add(Dense(1024, activation='relu'))

In [27]:
cnn.add(Dense(10, activation='softmax'))

In [28]:
cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [29]:
print(cnn.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        51264     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 1024)              3212288   
_________________________________________________________________
dense_5 (Dense)              (None, 10)               

### Train the model

In [30]:
history_cnn = cnn.fit(X_train, Y_train, epochs=5, verbose=1, validation_data=(X_test, Y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [31]:
cnn.save_weights('cnn-model.h5')
# cnn.load_weights('cnn-model.h5')

In [32]:
score = cnn.evaluate(X_test, Y_test)



In [33]:
score

[0.029352097202170854, 0.9923999905586243]

## Compare two model

we obtain 99.23
% accuracy by CNN model with 5 epochs, while we obtain 98.03% accuracy by FC model with 20 epochs. This happends because CNN by using conv and pooling layers selects better features than Fully Connected network.