In [2]:
import numpy as np

def conv2d(input_mat, kernel, stride=1, padding=0):
    # Add padding
    if padding > 0:
        input_mat = np.pad(input_mat, ((padding, padding), (padding, padding)), mode='constant')
    
    # Get dimensions
    in_height, in_width = input_mat.shape
    k_height, k_width = kernel.shape
    
    # Calculate output dimensions
    out_height = (in_height - k_height) // stride + 1
    out_width = (in_width - k_width) // stride + 1
    
    # Initialize output matrix
    output = np.zeros((out_height, out_width))
    
    # Perform convolution
    for i in range(0, out_height):
        for j in range(0, out_width):
            h_start = i * stride
            h_end = h_start + k_height
            w_start = j * stride
            w_end = w_start + k_width
            
            # Extract the current window
            window = input_mat[h_start:h_end, w_start:w_end]
            
            # Convolve
            output[i, j] = np.sum(window * kernel)
    
    return output

def max_pool2d(input_mat, pool_size=2, stride=2):
    # Get dimensions
    in_height, in_width = input_mat.shape
    
    # Calculate output dimensions
    out_height = (in_height - pool_size) // stride + 1
    out_width = (in_width - pool_size) // stride + 1
    
    # Initialize output matrix
    output = np.zeros((out_height, out_width))
    
    # Perform max pooling
    for i in range(out_height):
        for j in range(out_width):
            h_start = i * stride
            h_end = h_start + pool_size
            w_start = j * stride
            w_end = w_start + pool_size
            
            # Extract the current window
            window = input_mat[h_start:h_end, w_start:w_end]
            
            # Apply max pooling
            output[i, j] = np.max(window)
    
    return output

def relu(x):
    return np.maximum(0, x)

# Define the filter/kernel
kernel = np.array([
    [1, 1, 0],
    [0, 1, 1],
    [-1, 0, 1],
    [-1, 0, 1],
    [0, 1, 1]
])

# Input matrices (3 channels)
matrix1_g = np.array([
    [112, 125, 25, 80, 220, 110],
    [150, 95, 15, 100, 115, 152],
    [200, 100, 48, 90, 70, 175],
    [187, 56, 43, 86, 180, 200],
    [190, 87, 70, 37, 24, 35],
    [80, 75, 65, 45, 32, 20]
])

matrix1_b = np.array([
    [150, 125, 38, 80, 20, 10],
    [130, 95, 25, 100, 115, 152],
    [80, 100, 148, 90, 70, 175],
    [170, 160, 43, 160, 170, 180],
    [100, 150, 70, 37, 124, 135],
    [85, 75, 65, 45, 232, 120]
])

matrix2_g = np.array([
    [200, 125, 25, 80, 220, 150],
    [50, 95, 15, 150, 115, 152],
    [90, 110, 48, 190, 70, 175],
    [180, 135, 43, 106, 180, 110],
    [55, 98, 70, 37, 24, 35],
    [78, 150, 65, 45, 32, 80]
])

# Process each channel through conv + ReLU + max pool
def process_channel(input_mat, kernel, stride=2, padding=1, pool_size=2):
    # Convolution
    conv_output = conv2d(input_mat, kernel, stride, padding)
    
    # ReLU activation
    relu_output = relu(conv_output)
    
    # Max pooling
    pool_output = max_pool2d(relu_output, pool_size, pool_size)
    
    return conv_output, relu_output, pool_output

# Process all channels
print("Processing first channel (G):")
conv1, relu1, pool1 = process_channel(matrix1_g, kernel)
print("Convolution output:\n", conv1)
print("ReLU output:\n", relu1)
print("Max pooling output:\n", pool1)

print("\nProcessing second channel (B):")
conv2, relu2, pool2 = process_channel(matrix1_b, kernel)
print("Convolution output:\n", conv2)
print("ReLU output:\n", relu2)
print("Max pooling output:\n", pool2)

print("\nProcessing third channel (G):")
conv3, relu3, pool3 = process_channel(matrix2_g, kernel)
print("Convolution output:\n", conv3)
print("ReLU output:\n", relu3)
print("Max pooling output:\n", pool3)

# Part 2: Calculate flatten layer size for complete CNN
def calculate_output_size(input_size, padding, kernel_size, stride):
    return ((input_size + 2*padding - kernel_size) // stride) + 1

# Initial input size: 6x6 (assuming single channel for simplicity)
input_size = 6

# First conv layer: filter=4x4, s=2, p=1
conv1_out = calculate_output_size(input_size, 1, 4, 2)
print(f"\nAfter 1st conv (4x4, s=2, p=1): {conv1_out}x{conv1_out}")

# First max pool: filter=2x2, s=2
pool1_out = calculate_output_size(conv1_out, 0, 2, 2)
print(f"After 1st pool (2x2, s=2): {pool1_out}x{pool1_out}")

# Second conv layer: filter=3x3, s=1
conv2_out = calculate_output_size(pool1_out, 0, 3, 1)
print(f"After 2nd conv (3x3, s=1): {conv2_out}x{conv2_out}")

# Second max pool: filter=2x2, s=2
pool2_out = calculate_output_size(conv2_out, 0, 2, 2)
print(f"After 2nd pool (2x2, s=2): {pool2_out}x{pool2_out}")

# Flatten layer size
flatten_size = pool2_out * pool2_out  # Assuming 1 channel
print(f"\nFlatten layer size: {flatten_size}")

Processing first channel (G):
Convolution output:
 [[675. 229. 847.]
 [748. 338. 624.]]
ReLU output:
 [[675. 229. 847.]
 [748. 338. 624.]]
Max pooling output:
 [[748.]]

Processing second channel (B):
Convolution output:
 [[800. 316. 517.]
 [780. 355. 930.]]
ReLU output:
 [[800. 316. 517.]
 [780. 355. 930.]]
Max pooling output:
 [[800.]]

Processing third channel (G):
Convolution output:
 [[845. 389. 647.]
 [711. 368. 624.]]
ReLU output:
 [[845. 389. 647.]
 [711. 368. 624.]]
Max pooling output:
 [[845.]]

After 1st conv (4x4, s=2, p=1): 3x3
After 1st pool (2x2, s=2): 1x1
After 2nd conv (3x3, s=1): -1x-1
After 2nd pool (2x2, s=2): -1x-1

Flatten layer size: 1
