# **Convolutional Neural Network** - From Theory to Practice

Convolutional neural networks (CNN/ConvNet) are a class of deep neural networks used in deep learning that are most frequently used to evaluate visual vision. ConvNet does not operate like a matrix multiplication, which is what comes to mind when we think of a neural network. It makes use of a special approach known as convolution. Convolution, as it is known in mathematics, is an operation on two functions that yields a third function that describes how the shape of one is changed by the other.

<img src="images/cnnArchitecture.jpeg"/>

## **Building CNN from Scratch**

In [172]:
import numpy as np

The following image is used as an example to understand and verify the working of CNN:

<img src="images/conv.gif" width="350"/>

In [173]:
import numpy as np

class CNN:
    def __init__(self):
        pass

    def convLayer(self, input_shape, channels, strides, padding, filter_size, input_matrix, filter_matrix):
        height, width = input_shape
        input_shape_with_channels = (height, width, channels)
        print("Input Shape (with channels):", input_shape_with_channels)
        
        input_matrix = np.expand_dims(input_matrix, axis=-1)  # Add channel dimension
        filter_matrix = np.expand_dims(filter_matrix, axis=-1)  # Add channel dimension
        
        print("\nInput Matrix:\n", input_matrix[:, :, 0])
        print("\nFilter Matrix:\n", filter_matrix[:, :, 0])
        
        padding = padding.lower()
        
        if padding == 'same':
            # Calculate padding needed for each dimension
            pad_height = filter_size[0] // 2
            pad_width = filter_size[1] // 2
            print("\nPad Height:", pad_height)
            print("Pad Width:", pad_width)
            
            # Apply padding to the input matrix
            input_matrix = np.pad(input_matrix, ((pad_height, pad_height), (pad_width, pad_width), (0, 0)), mode='constant')
            
            # Adjust height and width to consider the padding
            height += 2 * pad_height
            width += 2 * pad_width

        elif padding == 'valid':
            pass

        else:
            return "Invalid Padding!!"

        # Output dimensions
        conv_height = (height - filter_size[0]) // strides[0] + 1
        conv_width = (width - filter_size[1]) // strides[1] + 1
        
        output_matrix = np.zeros((conv_height, conv_width, channels))
        
        # Convolution Operation
        for i in range(0, height - filter_size[0] + 1, strides[0]):
            for j in range(0, width - filter_size[1] + 1, strides[1]):
                receptive_field = input_matrix[i:i + filter_size[0], j:j + filter_size[1], :]
                output_matrix[i // strides[0], j // strides[1], :] = np.sum(receptive_field * filter_matrix, axis=(0, 1, 2))
        
        return output_matrix

    def maxPooling(self, input_matrix, pool_size=(2, 2), strides_pooling=(2, 2)):
        input_height, input_width, input_channels = input_matrix.shape
        pool_height, pool_width = pool_size
        stride_height, stride_width = strides_pooling
        
        # Calculate output dimensions
        pooled_height = (input_height - pool_height) // stride_height + 1
        pooled_width = (input_width - pool_width) // stride_width + 1
        
        # Initialize output
        pooled_matrix = np.zeros((pooled_height, pooled_width, input_channels))
        
        # Perform max pooling
        for c in range(input_channels):
            for i in range(0, input_height - pool_height + 1, stride_height):
                for j in range(0, input_width - pool_width + 1, stride_width):
                    patch = input_matrix[i:i + pool_height, j:j + pool_width, c]
                    pooled_matrix[i // stride_height, j // stride_width, c] = np.max(patch)
        
        return pooled_matrix

    def flatten(self, input_matrix):
        return input_matrix.flatten()
    
    def dropout(self, input_matrix, dropout_rate=0.5):
        assert 0 <= dropout_rate < 1, "Dropout rate must be in [0, 1)."
        dropout_mask = np.random.binomial(1, 1 - dropout_rate, size=input_matrix.shape)
        return input_matrix * dropout_mask
    

In [174]:
input_matrix = np.array([
    [0, 1, 1, 1, 0],
    [0, 1, 0, 1, 0],
    [0, 1, 1, 1, 0],
    [0, 0, 0, 1, 0],
    [0, 0, 0, 1, 0],
    [0, 0, 0, 1, 0],
    [0, 0, 0, 1, 0]
])

filter_matrix = np.array([
    [1, 1, 1],
    [1, 0, 1],
    [1, 1, 1]
])

In [175]:
input_shape = (7, 5)
channels = 1
strides = (1, 1)
padding = 'same'
filter_size = (3, 3)

cnn_model = CNN()

In [176]:
conv_output = cnn_model.convLayer(input_shape, channels, strides, padding, filter_size, input_matrix, filter_matrix)

Input Shape (with channels): (7, 5, 1)

Input Matrix:
 [[0 1 1 1 0]
 [0 1 0 1 0]
 [0 1 1 1 0]
 [0 0 0 1 0]
 [0 0 0 1 0]
 [0 0 0 1 0]
 [0 0 0 1 0]]

Filter Matrix:
 [[1 1 1]
 [1 0 1]
 [1 1 1]]

Pad Height: 1
Pad Width: 1


In [177]:
print("\nConvolution Output:\n", conv_output[:, :, 0])


Convolution Output:
 [[2. 2. 4. 2. 2.]
 [3. 4. 8. 4. 3.]
 [2. 2. 5. 3. 3.]
 [1. 2. 5. 3. 3.]
 [0. 0. 3. 2. 3.]
 [0. 0. 3. 2. 3.]
 [0. 0. 2. 1. 2.]]


In [178]:
pool_size = (3, 3)
strides_pooling = (1, 1)

maxPool_output = cnn_model.maxPooling(conv_output, pool_size, strides_pooling)
print("\nMax Pooling Output:\n", maxPool_output[:, :, 0])


Max Pooling Output:
 [[8. 8. 8.]
 [8. 8. 8.]
 [5. 5. 5.]
 [5. 5. 5.]
 [3. 3. 3.]]


In [179]:
flattened_output = cnn_model.flatten(maxPool_output)
print("\nFlattened Output:\n", flattened_output)


Flattened Output:
 [8. 8. 8. 8. 8. 8. 5. 5. 5. 5. 5. 5. 3. 3. 3.]


In [180]:
dropout_output = cnn_model.dropout(flattened_output, dropout_rate=0.3)
print("\nDropout Output:\n", dropout_output)


Dropout Output:
 [8. 8. 0. 0. 8. 8. 5. 0. 0. 0. 5. 5. 0. 0. 3.]


In [189]:
import numpy as np

class CNN:
    def __init__(self):
        pass

    def convLayer(self, input_shape, channels, strides, padding, filter_size):
        height, width = input_shape
        input_shape_with_channels = (height, width, channels)
        print("Input Shape (with channels):", input_shape_with_channels)
        
        # Generate random input and filter matrices
        input_matrix = np.random.randint(0, 10, size=input_shape_with_channels)
        filter_matrix = np.random.randint(0, 5, size=(filter_size[0], filter_size[1], channels))
        
        print("\nInput Matrix:\n", input_matrix[:, :, 0])
        print("\nFilter Matrix:\n", filter_matrix[:, :, 0])
        
        padding = padding.lower()
        
        if padding == 'same':
            # Calculate padding needed for each dimension
            pad_height = filter_size[0] // 2
            pad_width = filter_size[1] // 2
            print("\nPad Height:", pad_height)
            print("Pad Width:", pad_width)
            
            # Apply padding to the input matrix
            input_matrix = np.pad(input_matrix, ((pad_height, pad_height), (pad_width, pad_width), (0, 0)), mode='constant')
            
            # Adjust height and width to consider the padding
            height += 2 * pad_height
            width += 2 * pad_width

        elif padding == 'valid':
            pass

        else:
            return "Invalid Padding!!"

        # Output dimensions
        conv_height = (height - filter_size[0]) // strides[0] + 1
        conv_width = (width - filter_size[1]) // strides[1] + 1
        
        output_matrix = np.zeros((conv_height, conv_width, channels))
        
        # Convolution Operation
        for i in range(0, height - filter_size[0] + 1, strides[0]):
            for j in range(0, width - filter_size[1] + 1, strides[1]):
                receptive_field = input_matrix[i:i + filter_size[0], j:j + filter_size[1], :]
                output_matrix[i // strides[0], j // strides[1], :] = np.sum(receptive_field * filter_matrix, axis=(0, 1, 2))
        
        return output_matrix

    def maxPooling(self, input_matrix, pool_size=(2, 2), strides_pooling=(2, 2)):
        input_height, input_width, input_channels = input_matrix.shape
        pool_height, pool_width = pool_size
        stride_height, stride_width = strides_pooling
        
        # Calculate output dimensions
        pooled_height = (input_height - pool_height) // stride_height + 1
        pooled_width = (input_width - pool_width) // stride_width + 1
        
        # Initialize output
        pooled_matrix = np.zeros((pooled_height, pooled_width, input_channels))
        
        # Perform max pooling
        for c in range(input_channels):
            for i in range(0, input_height - pool_height + 1, stride_height):
                for j in range(0, input_width - pool_width + 1, stride_width):
                    patch = input_matrix[i:i + pool_height, j:j + pool_width, c]
                    pooled_matrix[i // stride_height, j // stride_width, c] = np.max(patch)
        
        return pooled_matrix

    def flatten(self, input_matrix):
        return input_matrix.flatten()
    
    def dropout(self, input_matrix, dropout_rate=0.5):
        assert 0 <= dropout_rate < 1, "Dropout rate must be in [0, 1)."
        dropout_mask = np.random.binomial(1, 1 - dropout_rate, size=input_matrix.shape)
        return input_matrix * dropout_mask

# Example usage
input_shape = (4, 4)
channels = 3
strides = (1, 1)
padding = 'same'
filter_size = (3, 3)

cnn_model = CNN()

conv_output = cnn_model.convLayer(input_shape, channels, strides, padding, filter_size)
print("\nConvolution Output:\n", conv_output[:, :, 0])

pool_size = (2, 2)
strides_pooling = (1, 1)

maxPool_output = cnn_model.maxPooling(conv_output, pool_size, strides_pooling)
print("\nMax Pooling Output:\n", maxPool_output[:, :, 0])

flattened_output = cnn_model.flatten(maxPool_output)
print("\nFlattened Output:\n", flattened_output)

dropout_output = cnn_model.dropout(flattened_output, dropout_rate=0.3)
print("\nDropout Output:\n", dropout_output)


Input Shape (with channels): (4, 4, 3)

Input Matrix:
 [[1 2 9 6]
 [2 5 9 9]
 [0 3 7 1]
 [0 3 6 0]]

Filter Matrix:
 [[3 4 1]
 [0 0 2]
 [4 1 4]]

Pad Height: 1
Pad Width: 1

Convolution Output:
 [[ 58. 142. 224. 152.]
 [104. 226. 261. 223.]
 [131. 247. 262. 235.]
 [ 78. 143. 178. 117.]]

Max Pooling Output:
 [[226. 261. 261.]
 [247. 262. 262.]
 [247. 262. 262.]]

Flattened Output:
 [226. 226. 226. 261. 261. 261. 261. 261. 261. 247. 247. 247. 262. 262.
 262. 262. 262. 262. 247. 247. 247. 262. 262. 262. 262. 262. 262.]

Dropout Output:
 [226. 226.   0. 261. 261.   0.   0.   0.   0.   0. 247. 247.   0.   0.
 262. 262. 262. 262. 247. 247.   0.   0. 262. 262. 262. 262. 262.]
