**CNN using Keras on MNIST dataset**

- Importing necessary Packages

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist
import tensorflow as tf
import numpy as np
import time
import sys

- Architecture of the Network model
 - One pool layer set with 16 channels of 3*3 filters.
 - two poll layer sets with 32 channels of 3*3 filters

In [None]:

	model = Sequential([
		Conv2D(16, (3, 3), padding="same", input_shape=inputShape),
		Activation("relu"),
		BatchNormalization(axis=chanDim),
		MaxPooling2D(pool_size=(2, 2)),
		# (CONV => RELU => BN) * 2 => POOL layer set
		Conv2D(32, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(axis=chanDim),
		Conv2D(32, (3, 3), padding="same"),
		Activation("relu"),
		BatchNormalization(axis=chanDim),
		MaxPooling2D(pool_size=(2, 2)),
		# first (and only) set of FC => RELU layers
		Flatten(),
		Dense(256),
		Activation("relu"),
		BatchNormalization(),
		Dropout(0.5),def build_model(width, height, depth, classes):
	inputShape = (height, width, depth)
	chanDim = -1
		# softmax classifier
		Dense(classes),
		Activation("softmax")
	])
	# return the built model to the calling function
	return model

- Optimization happens here

In [None]:
def train(X, y):
	# keep track of our gradients
	with tf.GradientTape() as tape:
		# make a prediction using the model and then calculate the
		# loss
		pred = model(X)
		loss = categorical_crossentropy(y, pred)
	# calculate the gradients using our tape and then update the
	# model weights
	grads = tape.gradient(loss, model.trainable_variables)
	opt.apply_gradients(zip(grads, model.trainable_variables))

- Data Preprocessing

In [None]:
epochs = 3000
lr = 0.01
((trainX, trainY), (testX, testY)) = mnist.load_data()
trainX = trainX.astype("float32") / 255.0
testX = testX.astype("float32") / 255.0
trainY = to_categorical(trainY, 10)
testY = to_categorical(testY, 10)

In [None]:
model = build_model(28, 28, 1, 10)
opt = Adam(lr=lr, decay=lr / epochs)

In [None]:
data = tf.data.Dataset.from_tensor_slices(
    (trainX.reshape([-1, 28, 28, 1]).astype(np.float32) / 255, trainY.astype(np.int32)))
data = data.shuffle(buffer_size=60000).batch(128).repeat()

- Training Phase 

In [None]:
for step, (img_batch, lbl_batch) in enumerate(data):
    if step > epochs:
        break
    train(img_batch, lbl_batch)
    if not step % 100:
        logits = model(img_batch)
        loss = tf.keras.backend.mean(tf.keras.losses.categorical_crossentropy(lbl_batch, logits))
        preds = tf.argmax(logits, axis=1, output_type=tf.int32)
        acc = tf.reduce_mean(tf.cast(tf.equal(preds, tf.argmax(lbl_batch, axis =1, output_type=tf.int32)),
                             tf.float32))
        print("Loss: {} Accuracy: {}".format(loss, acc))

Loss: 0.3243069648742676 Accuracy: 0.890625
Loss: 0.14792966842651367 Accuracy: 0.9453125
Loss: 0.010317481122910976 Accuracy: 1.0
Loss: 0.060455046594142914 Accuracy: 0.9765625
Loss: 0.028316056355834007 Accuracy: 1.0
Loss: 0.027193576097488403 Accuracy: 0.9921875
Loss: 0.07623466849327087 Accuracy: 0.984375
Loss: 0.026266302913427353 Accuracy: 1.0
Loss: 0.010583963245153427 Accuracy: 1.0
Loss: 0.020456988364458084 Accuracy: 0.9921875
Loss: 0.0022524348460137844 Accuracy: 1.0
Loss: 0.005889228545129299 Accuracy: 1.0
Loss: 0.0016094091115519404 Accuracy: 1.0
Loss: 0.051517244428396225 Accuracy: 0.984375
Loss: 0.006235375534743071 Accuracy: 1.0
Loss: 0.008865944109857082 Accuracy: 1.0
Loss: 0.004106742795556784 Accuracy: 1.0
Loss: 0.006427858490496874 Accuracy: 1.0
Loss: 0.0016038736794143915 Accuracy: 1.0
Loss: 0.0073527805507183075 Accuracy: 1.0
Loss: 0.0023270398378372192 Accuracy: 1.0
Loss: 0.0032665079925209284 Accuracy: 1.0
Loss: 0.001481209765188396 Accuracy: 1.0
Loss: 0.00557239

- Test Phase

In [None]:
test_images = testX.reshape([-1, 28, 28, 1])
test_labels = testY.astype(np.int32)

In [None]:
logits = model(test_images)
preds = tf.argmax(logits, axis=1, output_type=tf.int32)
acc = tf.reduce_mean(tf.cast(tf.equal(preds, tf.argmax(test_labels, axis =1, output_type=tf.int32)),
                             tf.float32))
print("Test Accuracy: {}".format(acc))

Test Accuracy: 0.9882000088691711


- I have tried different set of hypermeters for learning rate and epochs.
- I have tried different activation functions (tanh, sigmoid, relu and LeakyRelu)
- I have used different loss functions and adam optimizers also.

- As CIFAR-10 taking so-much time to train, I can't able to do experiments with it.