# Convolution from scratch

In [None]:
import numpy as np
image = np.array([
    [1, 2, 3, 0, 1],
    [0, 1, 2, 3, 2],
    [3, 0, 1, 2, 1],
    [2, 1, 3, 0, 0],
    [1, 2, 0, 1, 2]
])

image.shape

(5, 5)

In [None]:
5-3+1, 5-3+1

(3, 3)

In [None]:
import numpy as np

# Define a simple 5x5 grayscale image
image = np.array([
    [1, 2, 3, 0, 1],
    [0, 1, 2, 3, 2],
    [3, 0, 1, 2, 1],
    [2, 1, 3, 0, 0],
    [1, 2, 0, 1, 2]
])

# Define a 3x3 kernel (filter)
kernel = np.array([
    [0, 1, 0],
    [1, -4, 1],
    [0, 1, 0]
])

# Define convolution operation
def convolve(image, kernel):
    image_h, image_w = image.shape
    kernel_h, kernel_w = kernel.shape
    output_h = image_h - kernel_h + 1
    output_w = image_w - kernel_w + 1
    output = np.zeros((output_h, output_w))

    for i in range(output_h):
        for j in range(output_w):
            output[i, j] = np.sum(image[i:i+kernel_h, j:j+kernel_w] * kernel)

    return output

# Apply the convolution
output = convolve(image, kernel)
print("Convolution Output:\n", output)


Convolution Output:
 [[  0.   0.  -6.]
 [  6.   3.  -3.]
 [  3. -10.   6.]]


In [None]:
padding = 1
ouput - 5*5

# Padding with stride = 1

1. Padding: Zero-padding is added to the image to control the spatial dimensions of the output.
2. Stride: Controls how much the filter moves at each step. A stride of 1 means the filter moves one pixel at a time.
3. Convolution with Padding and Stride: The output is now controlled by the padding and stride, allowing us to maintain or reduce the spatial dimensions of the output.

In [None]:
# Adding padding
def pad_image(image, pad):
    return np.pad(image, pad, mode='constant', constant_values=0) # zero padding

pad_image(image, 1)

array([[0, 0, 0, 0, 0, 0, 0],
       [0, 1, 2, 3, 0, 1, 0],
       [0, 0, 1, 2, 3, 2, 0],
       [0, 3, 0, 1, 2, 1, 0],
       [0, 2, 1, 3, 0, 0, 0],
       [0, 1, 2, 0, 1, 2, 0],
       [0, 0, 0, 0, 0, 0, 0]])

In [None]:
# Adding padding
def pad_image(image, pad):
    return np.pad(image, pad, mode='constant', constant_values=0) # zero padding

# Modify convolution to include stride and padding
def convolve_with_padding_and_stride(image, kernel, stride=1, padding=0):
    if padding > 0:
        image = pad_image(image, padding)

    image_h, image_w = image.shape
    kernel_h, kernel_w = kernel.shape
    output_h = (image_h - kernel_h) // stride + 1
    output_w = (image_w - kernel_w) // stride + 1
    output = np.zeros((output_h, output_w))

    for i in range(0, output_h * stride, stride):
        for j in range(0, output_w * stride, stride):
            output[i // stride, j // stride] = np.sum(
                image[i:i+kernel_h, j:j+kernel_w] * kernel
            )

    return output

# Define a simple 5x5 grayscale image
image = np.array([
    [1, 2, 3, 0, 1],
    [0, 1, 2, 3, 2],
    [3, 0, 1, 2, 1],
    [2, 1, 3, 0, 0],
    [1, 2, 0, 1, 2]
])

# Apply the convolution with padding and stride
padded_output = convolve_with_padding_and_stride(image, kernel, stride=1, padding=1)
print("Padded Convolution Output:\n", padded_output)


Padded Convolution Output:
 [[ -2.  -3.  -8.   7.  -2.]
 [  5.   0.   0.  -6.  -3.]
 [-10.   6.   3.  -3.   0.]
 [ -3.   3. -10.   6.   3.]
 [  0.  -6.   6.  -2.  -7.]]


# Max Pooling

In [None]:
# Max Pooling function
def max_pooling(image, pool_size, stride):
    image_h, image_w = image.shape
    output_h = (image_h - pool_size) // stride + 1
    output_w = (image_w - pool_size) // stride + 1
    output = np.zeros((output_h, output_w))

    for i in range(0, output_h * stride, stride):
        for j in range(0, output_w * stride, stride):
            output[i // stride, j // stride] = np.max(
                image[i:i+pool_size, j:j+pool_size]
            )

    return output


image = np.array([
    [1, 2, 3, 0, 1],
    [0, 1, 2, 3, 2],
    [3, 0, 1, 2, 1],
    [2, 1, 3, 0, 0],
    [1, 2, 0, 1, 2]
])
# Apply max pooling
pooled_output = max_pooling(image, pool_size=2, stride=2)
print("Max Pooling Output:\n", pooled_output)


Max Pooling Output:
 [[2. 3.]
 [3. 3.]]


# Convolution on RGB Images

In [None]:
import numpy as np

# Define a simple 5x5x3 RGB image (3 channels)
image = np.array([
    [[1, 0, 2], [2, 1, 1], [3, 2, 0], [0, 1, 1], [1, 0, 2]],
    [[0, 1, 0], [1, 0, 1], [2, 2, 2], [3, 1, 3], [2, 0, 1]],
    [[3, 0, 2], [0, 1, 0], [1, 0, 1], [2, 2, 2], [1, 0, 0]],
    [[2, 1, 1], [1, 0, 2], [3, 3, 1], [0, 1, 0], [0, 2, 1]],
    [[1, 2, 2], [2, 1, 0], [0, 0, 1], [1, 2, 2], [2, 1, 1]]
])

image.shape

(5, 5, 3)

In [None]:
# Define a 3x3x3 filter (kernel) for each channel (RGB)
kernel = np.array([
    [[0, 1, 0], [1, -1, 1], [0, 1, 0]],
    [[1, 0, 1], [0, -1, 0], [1, 0, 1]],
    [[0, 1, 0], [1, 1, 1], [0, 1, 0]]
])

kernel.shape

(3, 3, 3)

In [None]:
image_h, image_w, image_c = image.shape

print(image_h, image_w, image_c)
kernel_h, kernel_w, kernel_c = kernel.shape
print(kernel_h, kernel_w, kernel_c )
output_h = image_h - kernel_h + 1
output_w = image_w - kernel_w + 1
output = np.zeros((output_h, output_w, 1))
output.shape

5 5 3
3 3 3


(3, 3, 1)

In [None]:
for k in range(image_c):  # Apply the convolution for each channel
        for i in range(output_h):
            for j in range(output_w):
                # print(np.sum(image[i:i+kernel_h, j:j+kernel_w, k] * kernel))
                output[i, j] = np.sum(image[i:i+kernel_h, j:j+kernel_w, k] * kernel)
                # break
            # break
        # break
output

array([[[10.],
        [12.],
        [12.]],

       [[16.],
        [15.],
        [11.]],

       [[ 8.],
        [11.],
        [15.]]])

In [None]:
import numpy as np

# Define a simple 5x5x3 RGB image (3 channels)
image = np.array([
    [[1, 0, 2], [2, 1, 1], [3, 2, 0], [0, 1, 1], [1, 0, 2]],
    [[0, 1, 0], [1, 0, 1], [2, 2, 2], [3, 1, 3], [2, 0, 1]],
    [[3, 0, 2], [0, 1, 0], [1, 0, 1], [2, 2, 2], [1, 0, 0]],
    [[2, 1, 1], [1, 0, 2], [3, 3, 1], [0, 1, 0], [0, 2, 1]],
    [[1, 2, 2], [2, 1, 0], [0, 0, 1], [1, 2, 2], [2, 1, 1]]
])

# Define a 3x3x3 filter (kernel) for each channel (RGB)
kernel = np.array([
    [[0, 1, 0], [1, -1, 1], [0, 1, 0]],
    [[1, 0, 1], [0, -1, 0], [1, 0, 1]],
    [[0, 1, 0], [1, 1, 1], [0, 1, 0]]
])

# Convolution operation
def convolve_rgb(image, kernel):
    image_h, image_w, image_c = image.shape
    kernel_h, kernel_w, kernel_c = kernel.shape
    output_h = image_h - kernel_h + 1
    output_w = image_w - kernel_w + 1
    output = np.zeros((output_h, output_w, 1))

    for k in range(image_c):  # Apply the convolution for each channel
        for i in range(output_h):
            for j in range(output_w):
                output[i, j] = np.sum(image[i:i+kernel_h, j:j+kernel_w, k] * kernel)
    return output

# Apply the convolution
output = convolve_rgb(image, kernel)
print("Convolution Output:\n", output)

Convolution Output:
 [[[10.]
  [12.]
  [12.]]

 [[16.]
  [15.]
  [11.]]

 [[ 8.]
  [11.]
  [15.]]]


# Implementing the same with Keras

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Example input shape for a 32x32 RGB image
input_shape = (100, 100, 3)

# Define the model
model = models.Sequential()

# Add Input layer
model.add(layers.Input(shape=input_shape))

# Adding Convolution Layer
model.add(layers.Conv2D(16, (3, 3), padding='same', strides=1, activation='relu'))

# Adding Max Pooling Layer
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))

# Adding more convolutional and pooling layers as needed
model.add(layers.Conv2D(32, (3, 3), padding='same', strides=1, activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))

# Print model summary to see the structure
model.summary()


1. Learn About Number of Parameters calculation.
2. Convolution on RGB Images.