In [1]:
import torch
import torch.nn.functional as F
import numpy as np

# ------------------------------------------------------------------
# 1) “Image”  (4 × 4)   and   Kernel  (3 × 3)
v_img = torch.tensor([[1,  2,  3,  4],
                      [5,  6,  7,  8],
                      [9, 10, 11, 12],
                      [13,14, 15, 16]], dtype=torch.float32)

kernel = torch.tensor([[1, 2, 3],
                       [4, 5, 6],
                       [7, 8, 9]], dtype=torch.float32)

# Reshape to NCHW and OIHW so F.conv2d is happy
v_bchw = v_img.unsqueeze(0).unsqueeze(0)      # (1,1,4,4)
w_oihw = kernel.unsqueeze(0).unsqueeze(0)     # (1,1,3,3)

y_torch = F.conv2d(v_bchw, w_oihw, stride=1, padding=0)  # (1,1,2,2)
print("PyTorch output:\n", y_torch.squeeze())            # [[348, 393], [528, 573]]

# ------------------------------------------------------------------
# 2) Build the explicit convolution matrix A  (4 × 16)
H, W = 4, 4
out_h, out_w = 2, 2
A = torch.zeros(out_h * out_w, H * W)

for p in range(H * W):
    # make a delta-image with a 1.0 at pixel p
    delta = torch.zeros_like(v_bchw)
    r, c = divmod(p, W)
    delta[0, 0, r, c] = 1.0

    # convolve: the result is this *column* of A
    A[:, p] = F.conv2d(delta, w_oihw).flatten()

# Sanity check:   A @ vec(v)  ==  y_torch ?
v_flat = v_img.flatten()
y_from_A = A @ v_flat
assert torch.allclose(y_from_A, y_torch.flatten())

print("\nConvolution matrix A (rows = outputs, cols = input pixels):\n")
print(A)

PyTorch output:
 tensor([[348., 393.],
        [528., 573.]])

Convolution matrix A (rows = outputs, cols = input pixels):

tensor([[1., 2., 3., 0., 4., 5., 6., 0., 7., 8., 9., 0., 0., 0., 0., 0.],
        [0., 1., 2., 3., 0., 4., 5., 6., 0., 7., 8., 9., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 2., 3., 0., 4., 5., 6., 0., 7., 8., 9., 0.],
        [0., 0., 0., 0., 0., 1., 2., 3., 0., 4., 5., 6., 0., 7., 8., 9.]])


In [3]:
import numpy as np

# -------------------------------------------------------------
# Helpers
# -------------------------------------------------------------
def conv2d_single_gray(im, ker, padding=0):
    """Single-channel 2-D convolution with zero-padding, stride = 1."""
    kH, kW = ker.shape
    H,  W  = im.shape
    # pad image
    im_p = np.pad(im, ((padding, padding), (padding, padding)), mode='constant')
    out_h, out_w = H, W  # because stride=1 and padding chosen so that output = input size
    out = np.zeros((out_h, out_w), dtype=im.dtype)
    for i in range(out_h):
        for j in range(out_w):
            patch = im_p[i:i+kH, j:j+kW]
            out[i, j] = np.sum(patch * ker)
    return out

def conv2d_rgb(im, ker, padding=0):
    """
    im : (C, H, W)
    ker: (C, kH, kW) -- single output channel, C input channels
    returns (H, W)
    """
    C, H, W   = im.shape
    kC, kH, kW = ker.shape
    assert C == kC
    im_p = np.pad(im, ((0,0), (padding, padding), (padding, padding)), mode='constant')
    out = np.zeros((H, W), dtype=im.dtype)
    for i in range(H):
        for j in range(W):
            patch = im_p[:, i:i+kH, j:j+kW]   # (C, kH, kW)
            out[i, j] = np.sum(patch * ker)
    return out

# -------------------------------------------------------------
# 1) Grayscale example with padding = 1
# -------------------------------------------------------------
v_img = np.array([[1,  2,  3,  4],
                  [5,  6,  7,  8],
                  [9, 10, 11, 12],
                  [13,14, 15, 16]], dtype=np.float32)

kernel_gray = np.array([[1, 2, 3],
                        [4, 5, 6],
                        [7, 8, 9]], dtype=np.float32)

H, W = v_img.shape
out_gray = conv2d_single_gray(v_img, kernel_gray, padding=1)
print("➡️  Grayscale conv (padding=1) output shape:", out_gray.shape)
print(out_gray)
print()

# Build A (16 × 16)
num_out = H * W
num_in  = H * W
A_gray = np.zeros((num_out, num_in), dtype=np.float32)
for p in range(num_in):
    delta = np.zeros_like(v_img)
    r, c = divmod(p, W)
    delta[r, c] = 1.0
    A_gray[:, p] = conv2d_single_gray(delta, kernel_gray, padding=1).flatten()

print("Matrix A_gray (16 × 16) — first 6 rows:\n")
for row in A_gray[:6]:
    print(" ".join(f"{v:4g}" for v in row))
print("...")

# -------------------------------------------------------------
# 2) RGB example with padding = 1
# -------------------------------------------------------------
# Create a simple 3-channel image so the channels are easy to distinguish
# Channel 0:  1..16
# Channel 1: 11..26
# Channel 2: 21..36
base = np.arange(1, 17, dtype=np.float32).reshape(4,4)
v_rgb = np.stack([base,
                  base + 10,
                  base + 20])           # shape (3,4,4)

kernel_rgb = np.array([[[ 1,  1,  1],
                        [ 1,  1,  1],
                        [ 1,  1,  1]],    # channel 0 kernel
                       [[ 2,  2,  2],
                        [ 2,  2,  2],
                        [ 2,  2,  2]],    # channel 1 kernel
                       [[-1, -1, -1],
                        [-1, -1, -1],
                        [-1, -1, -1]]],   # channel 2 kernel
                       dtype=np.float32)   # shape (3,3,3)

C, H, W = v_rgb.shape
out_rgb = conv2d_rgb(v_rgb, kernel_rgb, padding=1)
print("\n➡️  RGB conv (padding=1) output shape:", out_rgb.shape)
print(out_rgb)
print()

# Build A_rgb  (16 outputs × 48 inputs)
num_out = H * W
num_in  = C * H * W
A_rgb = np.zeros((num_out, num_in), dtype=np.float32)

for p in range(num_in):
    delta = np.zeros_like(v_rgb)
    channel = p // (H * W)
    idx_in_ch = p % (H * W)
    r, c = divmod(idx_in_ch, W)
    delta[channel, r, c] = 1.0
    A_rgb[:, p] = conv2d_rgb(delta, kernel_rgb, padding=1).flatten()

print("Matrix A_rgb (16 × 48) — first 4 rows:\n")
for row in A_rgb[:4]:
    print(" ".join(f"{v:4g}" for v in row[:20]), "...")  # print first 20 cols for readability


➡️  Grayscale conv (padding=1) output shape: (4, 4)
[[111. 178. 217. 145.]
 [231. 348. 393. 252.]
 [363. 528. 573. 360.]
 [197. 274. 295. 175.]]

Matrix A_gray (16 × 16) — first 6 rows:

   5    6    0    0    8    9    0    0    0    0    0    0    0    0    0    0
   4    5    6    0    7    8    9    0    0    0    0    0    0    0    0    0
   0    4    5    6    0    7    8    9    0    0    0    0    0    0    0    0
   0    0    4    5    0    0    7    8    0    0    0    0    0    0    0    0
   2    3    0    0    5    6    0    0    8    9    0    0    0    0    0    0
   1    2    3    0    4    5    6    0    7    8    9    0    0    0    0    0
...

➡️  RGB conv (padding=1) output shape: (4, 4)
[[ 28.  48.  60.  44.]
 [ 66. 108. 126.  90.]
 [114. 180. 198. 138.]
 [ 92. 144. 156. 108.]]

Matrix A_rgb (16 × 48) — first 4 rows:

   1    1    0    0    1    1    0    0    0    0    0    0    0    0    0    0    2    2    0    0 ...
   1    1    1    0    1    1    1    0    0