In [34]:
import numpy as np
from numpy.typing import NDArray
from typing import Tuple, Any
import cv2

In [35]:
def check_type(obj: Any, name: str):
    if isinstance(obj, int):
        return "int"
    elif not isinstance(obj, Tuple):
        raise ValueError(f"{name} should be tuple[int, int] or int, give {type(obj)}")
    if len(obj) == 1:
        return "int"
    if len(obj) == 0:
        raise ValueError(f"{name} should have len = 2, give {len(obj)}")

    if len(obj) > 2:
        raise RuntimeError(f"{name} should have len is 2, give {len(obj)}")
    
    if not isinstance(obj[0], int) or not isinstance(obj[1], int):
        raise ValueError(f"{name} should be tuple[int, int], give {type(obj[0]), type(obj[1])}")

In [36]:
class Conv2D:
    def __init__(
        self,
        channel_in: int, 
        channel_out: int, 
        kernel_size: int | Tuple[int, int],
        stride: int | Tuple[int, int],
        padding: int | Tuple[int, int]
    ) -> None:
        if not isinstance(channel_in, int): 
            raise ValueError(f"channel_in should be int, give {type(channel_in)}")
        if not isinstance(channel_out, int):
            raise ValueError(f"channel_out should be int, give {type(channel_out)}")
        if check_type(kernel_size, "kernel_size") == "int":
            kernel_size = (kernel_size, kernel_size)
        if check_type(stride, "stride") == "int":
            stride = (stride, stride)
        if check_type(padding, "padding") == "int":
            padding = (padding, padding)
        print(f"kernel_size = {kernel_size}, stride = {stride}, padding = {padding}")
        self._channel_in = channel_in
        self._channel_out = channel_out
        self._kernel_size = kernel_size
        self._stride = stride
        self._padding = padding
        np.random.seed(42)

    def __call__(self, img: NDArray[np.uint8]):
        # create list of filters
        self._filters = []
        for _ in range(self._channel_out):
            self._filters.append(np.random.rand(self._kernel_size[0], self._kernel_size[1], self._channel_in))
        return self.forward(img)
    
    def forward(self, img: NDArray[np.uint8]):
        n_filters = len(self._filters)
        n_channels = self._channel_in

        # output dimensional
        out_height = ((img.shape[0] + 2 * self._padding[0] - self._kernel_size[0]) // self._stride[0]) + 1
        out_width = ((img.shape[1] + 2 * self._padding[1] - self._kernel_size[1]) // self._stride[1]) + 1
        out_channels = n_filters
        out = np.zeros((out_height, out_width, out_channels))

        # add padding to image
        padd_height = img.shape[0] + 2 * self._padding[0]
        padd_weight = img.shape[1] + 2 * self._padding[1]
        padd_img = np.zeros((padd_height, padd_weight, img.shape[2]))
        padd_img[self._padding[0]:img.shape[0] + self._padding[0], self._padding[1]:img.shape[1] + self._padding[1]] = img

        # check that kernel_size, stride and padding are proper

        for n in range(n_filters):
            for c in range(n_channels):
                for x in range(out_height):
                    for y in range(out_width):
                        for i in range(self._kernel_size[0]):
                            for j in range(self._kernel_size[1]):
                                out[x][y][n] += padd_img[self._stride[0] * x + i][self._stride[1] * y + j][c] * self._filters[n][i][j][c]

        return out

In [37]:
conv = Conv2D(3, 8, kernel_size=3, stride=1, padding=1)

kernel_size = (3, 3), stride = (1, 1), padding = (1, 1)


In [38]:
dummy = np.random.rand(10, 10, 3)

In [40]:
out = conv(dummy)
out.shape

(10, 10, 8)

In [41]:
img = cv2.imread("./data/chars1.png")

In [42]:
img.shape

(545, 640, 3)

In [46]:
img

array([[[215, 217, 215],
        [221, 223, 221],
        [218, 220, 218],
        ...,
        [208, 208, 207],
        [207, 207, 205],
        [206, 206, 204]],

       [[217, 219, 217],
        [217, 219, 217],
        [217, 219, 217],
        ...,
        [208, 208, 205],
        [207, 207, 205],
        [205, 205, 203]],

       [[219, 220, 218],
        [218, 219, 217],
        [220, 221, 219],
        ...,
        [209, 209, 207],
        [207, 207, 205],
        [207, 207, 205]],

       ...,

       [[173, 171, 166],
        [175, 173, 168],
        [171, 170, 165],
        ...,
        [163, 163, 161],
        [162, 163, 160],
        [160, 161, 158]],

       [[174, 172, 167],
        [172, 171, 166],
        [171, 170, 165],
        ...,
        [165, 165, 165],
        [160, 160, 159],
        [159, 160, 158]],

       [[168, 167, 162],
        [170, 169, 164],
        [171, 170, 165],
        ...,
        [160, 160, 160],
        [160, 160, 160],
        [159, 159, 159]]

In [43]:
out = conv(img)

In [44]:
out.shape

(545, 640, 8)

In [45]:
out

array([[[1508.73211067, 1191.37103411, 1546.96192579, ...,
         1131.93640916, 1330.27724766, 1038.33302047],
        [2214.11851244, 1925.16109072, 2524.26885915, ...,
         1672.04225751, 2088.69838481, 1769.16644076],
        [2224.16703785, 1932.44643055, 2535.92726954, ...,
         1680.96163346, 2099.97869484, 1780.1372685 ],
        ...,
        [2098.21628381, 1829.58673292, 2398.01602364, ...,
         1587.36269186, 1984.93596455, 1682.06203486],
        [2088.62443333, 1821.37025271, 2386.56939971, ...,
         1578.75841473, 1976.74037349, 1675.21836147],
        [1237.47686229, 1279.79139436, 1704.08724325, ...,
          958.31261028, 1352.07646295, 1237.90511761]],

       [[2238.37683628, 1903.74584724, 2164.81321885, ...,
         1540.48549813, 1636.91591783, 1510.44297005],
        [3290.97570442, 2908.58513351, 3460.29349712, ...,
         2519.26058964, 2654.27663689, 2715.80009273],
        [3299.80727808, 2919.01116126, 3468.90901189, ...,
         2531.