<a href="https://colab.research.google.com/github/ishankarve/NeuralNetworks/blob/main/cnn_mnist_keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [5]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [8]:
# sample only 20k images for training
idx = np.random.randint(x_train.shape[0], size = 20000)
x_train = x_train[idx, :]
y_train = y_train[idx]
print(x_train.shape)
print(y_train.shape)

(20000, 28, 28)
(20000,)


In [9]:
# specify input dimensions of each image
img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)

# batch size, number of classes, epochs
batch_size = 128
num_classes = 10
epochs = 12

In [10]:
# reshape x_train and x_test
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
print(x_train.shape)
print(x_test.shape)

(20000, 28, 28, 1)
(10000, 28, 28, 1)


In [11]:
# convert class labels (from digits) to one-hot encoded vectors
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape)

(20000, 10)


In [12]:
# originally, the pixels are stored as ints
x_train.dtype

dtype('uint8')

In [13]:
# convert int to float
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

# normalise
x_train /= 255
x_test /= 255

In [14]:
# model
model = Sequential()

# a keras convolutional layer is called Conv2D
# help(Conv2D)
# note that the first layer needs to be told the input shape explicitly

# first conv layer
'''
Layer-1 (Conv2D): We have used 32 kernels of size (3, 3), 
and each kernel has a single bias, so we have 
32 x 3 x 3 (weights) + 32 (biases) = 320 parameters (all trainable). 
Note that the kernels have only one channel since 
the input images are 2D (grayscale). By default, 
a convolutional layer uses stride of 1 and no padding, 
so the output from this layer is of shape 26 x 26 x 32, 
as shown in the summary above (the first element None 
is for the batch size).
'''
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape)) # input shape = (img_rows, img_cols, 1)

# second conv layer
'''
Layer-2 (Conv2D): We have used 64 kernels of size (3, 3), 
but this time, each kernel has to convolve a tensor of 
size (26, 26, 32) from the previous layer. Thus, 
the kernels will also have 32 channels, and 
so the shape of each kernel is (3, 3, 32) 
(and we have 64 of them). So we have 
64 x 3 x 3 x 32 (weights) + 64 (biases) = 18496 parameters (all trainable). 
The output shape is (24, 24, 64) since each kernel 
produces a (24, 24) feature map.
'''
model.add(Conv2D(64, kernel_size=(3, 3), 
                 activation='relu'))
'''
Max pooling: The pooling layer gets the (24, 24, 64) 
input from the previous conv layer and produces a 
(12, 12, 64) output (the default pooling uses stride of 2). 
There are no trainable parameters in the pooling layer.
'''
model.add(MaxPooling2D(pool_size=(2, 2)))
'''
The Dropout layer does not alter the output shape 
and has no trainable parameters.
'''
model.add(Dropout(0.25))

# flatten and put a fully connected layer
'''
The Flatten layer simply takes in the (12, 12, 64) 
output from the previous layer and 'flattens' it 
into a vector of length 12 x 12 x 64 = 9216.
'''
model.add(Flatten())
'''
The Dense layer is a plain fully connected layer 
with 128 neurons. It takes the 9216-dimensional 
output vector from the previous layer (layer l-1) as 
the input and has 128 x 9216 (weights) + 128 (biases) = 1179776 
trainable parameters. The output of this layer is a 128-dimensional vector.'''
model.add(Dense(128, activation='relu')) # fully connected
'''
The Dropout layer simply drops a few neurons.
'''
model.add(Dropout(0.5))

# softmax layer
'''
Finally, we have a Dense softmax layer with 
10 neurons which takes the 128-dimensional 
vector from the previous layer as input. 
It has 128 x 10 (weights) + 10 (biases) = 1290 trainable parameters.
'''
model.add(Dense(num_classes, activation='softmax'))

# model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 64)        18496     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 12, 64)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 12, 12, 64)        0         
                                                                 
 flatten (Flatten)           (None, 9216)              0         
                                                                 
 dense (Dense)               (None, 128)               1179776   
                                                        

In [15]:
# usual cross entropy loss
# choose any optimiser such as adam, rmsprop etc
# metric is accuracy
model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=tf.keras.optimizers.Adadelta(),
              metrics=['accuracy'])

In [None]:
# fit the model
# this should take around 10-15 minutes when run locally on a windows/mac PC 
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))

Epoch 1/12
Epoch 2/12

In [None]:
# evaluate the model on test data
model.evaluate(x_test, y_test)

In [None]:
print(model.metrics_names)