In [7]:
import numpy as np

def conv2d_multichannel(input_tensor, kernel, padding=0, stride=1):
    """
    input_tensor: (H, W, C)
    kernel:       (kH, kW, C)
    output:       (H_out, W_out)
    """

    image_height, image_width, image_channels = input_tensor.shape
    kernel_height, kernel_width, kernel_channels = kernel.shape

    assert image_channels == kernel_channels, "kernel channels must equal image channels"

    padded_image = np.pad(input_tensor,
                         ((padding, padding), (padding, padding), (0,0)),
                          mode='constant')

    output_height = ((image_height + 2 * padding - kernel_height) // stride) + 1
    output_width = ((image_width + 2 * padding - kernel_width) // stride) + 1

    output_tensor = np.zeros((output_height, output_width))

    for i in range(output_height):
        for j in range(output_width):
            start_i = i * stride
            start_j = i * stride

            patch = padded_image[
                start_i : start_i + stride,
                start_j : start_j + stride,
                :
            ]

            output_tensor[i,j] = np.sum(patch * kernel)

    return output_tensor

In [8]:
X = np.random.rand(5, 5, 3)      # 5×5 RGB image
K = np.random.rand(3, 3, 3)      # 3×3 kernel across 3 channels

In [9]:
conv2d_multichannel(X, K)

array([[6.41458844, 6.41458844, 6.41458844],
       [7.15143976, 7.15143976, 7.15143976],
       [5.59722592, 5.59722592, 5.59722592]])