In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
import numpy as np

np.random.seed(1337)
input_image = np.random.randn(3, 3).astype(np.float32)
kernel = np.random.randn(2, 2).astype(np.float32)

In [3]:
input_image

array([[-0.7031873 , -0.49028236, -0.32181433],
       [-1.7550787 ,  0.20666447, -2.0112646 ],
       [-0.5572507 ,  0.337217  ,  1.548836  ]], dtype=float32)

In [4]:
kernel

array([[-1.3707366 ,  1.4252914 ],
       [-0.27946392, -0.5596279 ]], dtype=float32)

In [5]:
input_image_tensor = torch.tensor(np.expand_dims(np.expand_dims(input_image, 0), 0), dtype=torch.float32)
kernel_tensor = torch.tensor(np.expand_dims(np.expand_dims(kernel, 0), 0), dtype=torch.float32)

In [6]:
F.conv2d(input_image_tensor, kernel_tensor)

tensor([[[[ 0.6399,  1.2812],
          [ 2.6673, -4.1109]]]])

In [7]:
a = np.pad(input_image, ((0, 1), (0, 1)))
a

array([[-0.7031873 , -0.49028236, -0.32181433,  0.        ],
       [-1.7550787 ,  0.20666447, -2.0112646 ,  0.        ],
       [-0.5572507 ,  0.337217  ,  1.548836  ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ]],
      dtype=float32)

In [8]:
np.sum(a[-2:, -2:] * kernel)

-2.1230462

In [9]:
F.conv2d(input_image_tensor, kernel_tensor, padding="same")

  F.conv2d(input_image_tensor, kernel_tensor, padding="same")


tensor([[[[ 0.6399,  1.2812,  1.0032],
          [ 2.6673, -4.1109,  2.3241],
          [ 1.2445,  1.7453, -2.1230]]]])

In [11]:
conv2d(input_image, kernel, padding="same")

array([[ 0.63991527,  1.28117325,  1.00319855],
       [ 2.66732309, -4.11093248,  2.32407017],
       [ 1.2444764 ,  1.74530696, -2.12304618]])

In [10]:
import numpy as np
import typing as tp


def dilate(
    kernel,
    dilation: tp.Optional[tp.Union[int, tp.Tuple[int, int]]] = None,
):
    if not dilation:
        dilation_height = dilation_weight = 1
    elif isinstance(dilation, int):
        dilation_height = dilation_weight = dilation
    else:
        assert len(dilation) == 2
        dilation_height, dilation_weight = dilation

    kernel_height, kernel_weight = kernel.shape
    dilated_kernel = np.zeros(
        (
            kernel_height + (dilation_height - 1) * (kernel_height - 1),
            kernel_weight + (dilation_weight - 1) * (kernel_weight - 1),
        )
    )
    for i in range(0, kernel_height):
        for j in range(0, kernel_weight):
            dilated_kernel[i * dilation_height, j * dilation_weight] = kernel[i, j]

    return dilated_kernel


def conv2d(
    input,
    kernel,
    stride: tp.Optional[tp.Union[int, tp.Tuple[int, int]]] = None,
    padding: tp.Optional[tp.Union[int, tp.Tuple[int, int], str]] = None,
    dilation: tp.Optional[tp.Union[int, tp.Tuple[int, int]]] = None,
):
    assert len(input.shape) == 2
    assert len(kernel.shape) == 2

    # Stride
    if not stride:
        stride_height = stride_weight = 1
    elif isinstance(stride, int):
        stride_height = stride_weight = stride
    else:
        assert len(stride) == 2
        stride_height, stride_weight = stride

    kernel = dilate(kernel, dilation)

    # Pad input
    # Use 4 parameters for padding to deal with imbalanced cases.
    kernel_height, kernel_weight = kernel.shape

    if not padding:
        padding_up = padding_down = padding_left = padding_right = 0
    elif isinstance(padding, str):
        if padding == "valid":
            padding_up = padding_down = padding_left = padding_right = 0
        elif padding == "same":
            assert stride_height == 1 and stride_weight == 1, "'same' padding can only be applied to stride == 1."
            # new_input_height == input_height + 2 * pad
            # output_height = new_input_height - kernel_height + 1
            # We want output_height == input_height
            # Thus, new_input_height - kernel_height + 1 == new_input_height - 2 * pad
            # 2 * pad == kernel_height - 1, w.r.t pad is integer.
            # In case of imbalance, we pad more in 'down' and 'right'.
            padding_up = int(np.floor((kernel_height - 1) / 2))
            padding_down = int(np.ceil((kernel_height - 1) / 2))
            padding_left = int(np.floor((kernel_weight - 1) / 2))
            padding_right = int(np.ceil((kernel_weight - 1) / 2))

        elif padding == "full":
            padding_up = padding_down = kernel_height - 1
            padding_left = padding_right = kernel_weight - 1
        else:
            raise Exception(f"{padding} is not recognized. Can only be 'valid' or 'same' or 'full'.")

    elif isinstance(padding, int):
        padding_up = padding_down = padding_left = padding_right = padding
    else:
        assert len(padding) == 2
        padding_up = padding_down = padding[0]
        padding_left = padding_right = padding[1]

    input = np.pad(
        input, ((padding_up, padding_down), (padding_left, padding_right)), mode="constant", constant_values=(0, 0)
    )
    input_height, input_weight = input.shape

    output_height = int(np.floor((input_height - kernel_height) / stride_height)) + 1
    output_weight = int(np.floor((input_weight - kernel_weight) / stride_weight)) + 1
    output = np.zeros((output_height, output_weight))

    for i in range(0, output_height):
        for j in range(0, output_weight):
            row = i * stride_height
            col = j * stride_weight
            output[i, j] = np.sum(input[row : row + kernel_height, col : col + kernel_weight] * kernel)

    return output

In [15]:
F.conv2d(input_image_tensor, kernel_tensor, padding="same", dilation=3)

tensor([[[[-0.8668,  0.1557, -0.0942],
          [-0.4587,  0.9639,  0.6720],
          [-2.8666,  2.4058, -0.2833]]]])

In [16]:
conv2d(input_image, kernel, padding="same", dilation=3)

array([[-0.86677182,  0.15573146, -0.09423998],
       [-0.4586792 ,  0.96388455,  0.67204797],
       [-2.86663812,  2.40575057, -0.28328256]])