# Learning MNIST Dataset through CNN + DNN Model using tensorflow.keras

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras import datasets 
from tensorflow.keras.utils import to_categorical

In [2]:
# load MNIST Datasets
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()

# Reshape images
train_images = train_images.reshape((60000, 28, 28, 1))
test_images = test_images.reshape((10000, 28, 28, 1))

# Normalize pixel value in 0~1
train_images, test_images = train_images / 255.0, test_images / 255.0

# One-hot encoding labels
train_y_onehot = to_categorical(train_labels)
test_y_onehot = to_categorical(test_labels)
train_y_onehot[0]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

In [3]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 64)          36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 3, 3, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 576)               0

1. Passing ```conv2d layer``` with 3*3 filter and 32 output.
    - convolutioned size: $28 \times 28$ (**padding='same'**)
    - parameters from weight: $3 \times 3 \times 32 = 288$ (9 from filter, 32 output)
    - parameters from bias: $32$ (32 output)
    - total parameter in this layer: $288 + 32 = 320$

2. Passing ```max_pooling2d``` layer with 2*2 pool_size.
    - output size: $14 \times 14$
    - No parameter

3. Passing ```conv2d_1``` layer with 3*3 filter and 64 output.
    - convolutioned size $= 14 \times 14$ (**padding='same'**)
    - parameters from weight: $3 \times 3 \times 32 \times 64 = 18432$ (32 input, 9 from filter, 64 output)
    - parameters from bias: $64$ (64 output)
    - total parameter in this layer: $18432 + 64 = 18496$

4. Passing ```max_pooling2d_1``` layer with 2*2 pool_size.
    - output size: $7 \times 7$
    - No parameter

5. Passing ```conv2d_2``` layer with 3*3 filter and 64 output.
    - convolutioned size $= 7 \times 7$ (**padding='same'**)
    - parameters from weight: $3 \times 3 \times 64 \times 64 = 36864$ (64 input, 9 from filter, 64 output)
    - parameters from bias: $64$ (64 output)
    - total parameter in this layer: $36864 + 64 = 36928$

6. Passing ```max_pooling2d_2``` layer with 2*2 pool_size.
    - output size: $3 \times 3$
    - No parameter

7. Passing ```flatten``` layer
    - size: $576 \quad (\because 3 \times 3 \times 64 = 576)$
    - No parameter

8. Passing ```dense``` layer
    - size: 128
    - parameters from weight: $576 \times 128 = 73728$ (576 input, 128 output)
    - parameters from bias: $128$ (128 output)
    - total parameter in this layer: $73728 + 128 = 73856$

9. Passing ```dense_1``` layer
    - size: 64
    - parameters from weight: $128 \times 64 = 8192$ (128 input, 64 output)
    - parameters from bias: $64$ (64 output)
    - total parameter in this layer: $8192 + 64 = 8256$

10. Passing ```dense_2``` layer
    - size: 10
    - parameters from weight: $64 \times 10 = 640$ (64 input, 10 output)
    - parameters from bias: $10$ (10 output)
    - total parameter in this layer: $640 + 10 = 650$

Total 138506 Parameters

In [4]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(train_images, train_y_onehot, batch_size = 100, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f355ee2fd10>

In [5]:
test_loss, test_acc = model.evaluate(test_images,  test_y_onehot, verbose=2)

313/313 - 1s - loss: 0.0289 - accuracy: 0.9914


In [6]:
print(test_acc)

0.9914000034332275
