In [3]:
import tensorflow
from tensorflow import keras
from keras.layers import Dense, Conv2D, Flatten
from keras import Sequential
from keras.datasets import mnist

In [4]:
(x_train,y_train),(x_test,y_test)= mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
# When padding = valid / when we will not apply padding before convolution we may loose information as image size reduces , and also central features of images get more weightage 
# compared to edge features during convolutional operation for ex. 

In [7]:
from keras.engine import input_spec
model = Sequential()
model.add(Conv2D(32,kernel_size=(3,3),padding="valid",activation='relu',input_shape=(28,28,1))) # total 32 filters of 3*3 size is used , without padding
model.add(Conv2D(32,kernel_size=(3,3),padding="valid",activation='relu',input_shape=(28,28,1)))
model.add(Conv2D(32,kernel_size=(3,3),padding="valid",activation='relu',input_shape=(28,28,1))) # total 3 convolutional layers will be there
model.add(Flatten())   # will flatten 2dimensional array to 1Dimensional array to give input to fully connected layes for classification using softmax activation function
model.add(Dense(128,activation='relu'))
model.add(Dense(10,activation='softmax'))   # As for we have to do classification of 10 digits (0 to 9) 
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 conv2d_2 (Conv2D)           (None, 24, 24, 32)        9248      
                                                                 
 conv2d_3 (Conv2D)           (None, 22, 22, 32)        9248      
                                                                 
 flatten (Flatten)           (None, 15488)             0         
                                                                 
 dense (Dense)               (None, 128)               1982592   
                                                                 
 dense_1 (Dense)             (None, 10)                1290      
                                                                 
Total params: 2,002,698
Trainable params: 2,002,698
No

In [None]:
# Number of Parameters = (Kernel Width * Kernel Height * Input Channels + 1) * Output Channels
#In this case:
# Kernel Width: 3
# Kernel Height: 3
# Input Channels: 1 (grayscale image)
# Output Channels: 32
# Substituting these values into the formula:
# Number of Parameters = (3 * 3 * 1 + 1) * 32 = 320


# Second Conv2D layer:
# Number of filters: 32
# Kernel size: (3, 3)
# Padding: "valid"
# Activation function: ReLU
# Input shape: (28, 28, 1)
# Again, using the formula:
# Number of Parameters = (3 * 3 * 32 + 1) * 32 = 9248

# Third Conv2D layer:
# Number of filters: 32
# Kernel size: (3, 3)
# Padding: "valid"
# Activation function: ReLU
# Input shape: (28, 28, 1)
# Applying the formula once more:
# Number of Parameters = (3 * 3 * 32 + 1) * 32 = 9248
# The third Conv2D layer also has 9,248 parameters.


# Flatten layer:
# No learnable parameters. It reshapes the output of the previous layer i.e, 3rd conv2D layes to a 1D vector.
# input for flatten layer is (22*22*32) = 15488 parametres


# First Dense layer:
# Number of neurons: 128
# Activation function: ReLU
# Since this is a Dense layer, the input shape is determined by the output shape of the previous layer.
# In this case, it is a flattened vector of size (None, 15,488) because the output of the previous Flatten layer is (None, 15,488).
# Using the formula:
# Number of Parameters = (15,488+1) * 128 = 19,82,592 parametres


# Second Dense layer:
# Number of neurons: 10
# Activation function: Softmax
# Using the formula:
# Number of Parameters = (128 + 1) * 10 = 1,290
# Therefore, the total number of parameters in the model can be obtained by summing the parameters from all the layers:


# Total Parameters = 320 + 9,248 + 9,248 + 19,82,592 + 1,290 = 20,02,698
# The model summary will display the same information, including the number of trainable parameters for each layer.

In [8]:
# When padding = same / when we will apply padding before convolution we will not loose information as image size will not reduce.
# and hence central edge features in a way will get same importance , as indirectly we are bringing edge features at the centre by adding padding near edges of image

In [11]:
model = Sequential()
model.add(Conv2D(32,kernel_size=(3,3),padding='same',activation='relu',input_shape=(28,28,1)))
model.add(Conv2D(32,kernel_size=(3,3),padding='same',activation='relu'))
model.add(Conv2D(32,kernel_size=(3,3),padding='same',activation='relu'))
model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dense(10,activation='softmax'))
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_7 (Conv2D)           (None, 28, 28, 32)        320       
                                                                 
 conv2d_8 (Conv2D)           (None, 28, 28, 32)        9248      
                                                                 
 conv2d_9 (Conv2D)           (None, 28, 28, 32)        9248      
                                                                 
 flatten_1 (Flatten)         (None, 25088)             0         
                                                                 
 dense_2 (Dense)             (None, 128)               3211392   
                                                                 
 dense_3 (Dense)             (None, 10)                1290      
                                                                 
Total params: 3,231,498
Trainable params: 3,231,498
No

Here we can see output shape of image after every convolutional operation is same as input layer and hence no information loss would happen if we do convolution by applying padding