In [1]:
import numpy as np
import math

class conv2d:
    def __init__(self, k, s, p):
        self.k = k
        self.p = p
        self.s = s

    def get_x(self):
        return self.X + self.p * 2

    def get_y(self):
        return self.Y + self.p * 2

    def get_slides_done(self):# Find slides that are done already
        cols_done = self.y_id * self.y_size + self.p
        h_slides_done = max(math.floor((cols_done-self.k)/self.s) + 1, 0)
        rows_done = self.x_id * self.x_size + self.p
        v_slides_done = max(math.floor((rows_done-self.k)/self.s) + 1, 0)

        # Windows done so far
        return (v_slides_done, h_slides_done)

    def get_block_win_val(self, w, i, j, ch, imgs, img):
        cols_done = self.y_id * self.y_size + self.p
        rows_done = self.x_id * self.x_size + self.p

        h_slides = math.floor((self.get_y()-self.k)/self.s) + 1
        v_slides = math.floor((self.get_x()-self.k)/self.s) + 1

        r = math.floor(w / h_slides)
        c = w % h_slides
        x = (r * self.s) + i
        y = (c * self.s) + j

        print("ORIGIN: ", r * self.s ,c * self.s , "COORD:", x,y, "TCOORD: ", x-rows_done, y-cols_done)
        x -= rows_done
        y -= cols_done

        if x < 0 or y < 0 or x >= self.b_X or y >= self.b_Y:
            return 0

        return imgs[img][ch][x][y]

    def img_block_to_conv2d(self, imgs, X, Y, x_id, y_id, x_size, y_size):
        self.channels, self.b_X, self.b_Y = imgs[0].shape
        self.X, self.Y, self.x_id, self.y_id, self.x_size, self.y_size = X, Y, x_id, y_id, x_size, y_size
        
        cols_done = self.y_id * self.y_size
        rows_done = self.x_id * self.x_size

        h_slides = math.floor((self.get_y()-self.k)/self.s) + 1
        v_slides = math.floor((self.get_x()-self.k)/self.s) + 1

        v_cols = cols_done + self.b_Y + self.p
        v_rows = rows_done + self.b_X + self.p
        if cols_done + self.b_Y >= Y:
            v_cols += self.p
        if rows_done + self.b_X >= X:
            v_rows += self.p

        h_vslides = max(math.floor((v_cols - self.k)/self.s) + 1, 0)
        v_vslides = max(math.floor((v_rows - self.k)/self.s) + 1, 0)
        
        if cols_done + self.b_Y < Y:
            diff = math.ceil((v_cols - (h_vslides - 1) * self.s) / self.s) - 1
            h_vslides += diff
        if rows_done + self.b_X < X:
            diff = math.ceil((v_rows - (v_vslides - 1) * self.s) / self.s) - 1
            v_vslides += diff
        
        print("Virtual matrix slides ", h_vslides, v_vslides)

        v_done, h_done = self.get_slides_done()
        print("Starting window ", v_done, h_done)

        data = {}
        for x in range(v_done, v_vslides):
            for y in range(h_done, h_vslides):
                print("Looking at window ", x * h_slides + y)
                row = []
                for c in range(self.channels):
                    for i in range(self.k):
                        for j in range(self.k):
                            row.append(self.get_block_win_val(x * h_slides + y, i, j, c, imgs, 0))
                data[x * h_slides+ y] = np.array(row)

        return data
 
    def aggregate(self, datas):
        fin = {}
        for partial in datas:
            for window in partial:
                if window in fin:
#                     for i in range(len(fin[window])):
#                         if fin[window][i] == 0:
#                             fin[window][i] = partial[window][i]
#                         else:
#                             assert fin[window][i] == partial[window][i]
                    fin[window] += partial[window]
                else:
                    fin[window] = partial[window]
        res = [fin[d] for d in range(len(fin))]
        return np.array(res)

    def get_win_val(self, w, i, j, ch, imgs, img):    
        h_slides = math.floor((self.get_y()-self.k)/self.s) + 1
        v_slides = math.floor((self.get_x()-self.k)/self.s) + 1

        r = math.floor(w / h_slides)
        c = w % h_slides
        x = (r * self.s) + i - self.p
        y = (c * self.s) + j - self.p

        print("ORIGIN: ", r * self.s ,c * self.s , "COORD:", x,y)
        if x < 0 or y < 0 or x >= self.X or y >= self.Y:
            return 0

        return imgs[img][ch][x][y]
    
    def img_to_conv2d(self, imgs):
        self.channels, self.X, self.Y = imgs[0].shape

        h_slides = math.floor((self.get_y()-self.k)/self.s) + 1
        v_slides = math.floor((self.get_x()-self.k)/self.s) + 1

        windows = h_slides * v_slides

        data = []

        for w in range(windows):
            print("Looking at window ", w)
            row = []
            for c in range(self.channels):
                for i in range(self.k):
                    for j in range(self.k):
                        row.append(self.get_win_val(w, i, j, c, imgs, 0))
            data.append(row)

        return np.array(data)

In [7]:
imgs = np.random.rand(1,2,10,10)
header = "{},{},{},{}".format(*imgs.shape)
np.savetxt("images_2_10_10.np", imgs.flatten(), header=header)

kernel_size = 3
stride = 4
padding = 0

con = conv2d(kernel_size, stride, padding)
data = con.img_to_conv2d(imgs)
print(data.shape)
for i in range(data.shape[0]):
    print(data[i].tolist())

Looking at window  0
ORIGIN:  0 0 COORD: 0 0
ORIGIN:  0 0 COORD: 0 1
ORIGIN:  0 0 COORD: 0 2
ORIGIN:  0 0 COORD: 1 0
ORIGIN:  0 0 COORD: 1 1
ORIGIN:  0 0 COORD: 1 2
ORIGIN:  0 0 COORD: 2 0
ORIGIN:  0 0 COORD: 2 1
ORIGIN:  0 0 COORD: 2 2
ORIGIN:  0 0 COORD: 0 0
ORIGIN:  0 0 COORD: 0 1
ORIGIN:  0 0 COORD: 0 2
ORIGIN:  0 0 COORD: 1 0
ORIGIN:  0 0 COORD: 1 1
ORIGIN:  0 0 COORD: 1 2
ORIGIN:  0 0 COORD: 2 0
ORIGIN:  0 0 COORD: 2 1
ORIGIN:  0 0 COORD: 2 2
Looking at window  1
ORIGIN:  0 4 COORD: 0 4
ORIGIN:  0 4 COORD: 0 5
ORIGIN:  0 4 COORD: 0 6
ORIGIN:  0 4 COORD: 1 4
ORIGIN:  0 4 COORD: 1 5
ORIGIN:  0 4 COORD: 1 6
ORIGIN:  0 4 COORD: 2 4
ORIGIN:  0 4 COORD: 2 5
ORIGIN:  0 4 COORD: 2 6
ORIGIN:  0 4 COORD: 0 4
ORIGIN:  0 4 COORD: 0 5
ORIGIN:  0 4 COORD: 0 6
ORIGIN:  0 4 COORD: 1 4
ORIGIN:  0 4 COORD: 1 5
ORIGIN:  0 4 COORD: 1 6
ORIGIN:  0 4 COORD: 2 4
ORIGIN:  0 4 COORD: 2 5
ORIGIN:  0 4 COORD: 2 6
Looking at window  2
ORIGIN:  4 0 COORD: 4 0
ORIGIN:  4 0 COORD: 4 1
ORIGIN:  4 0 COORD: 4 2
O

In [8]:
kernel = np.loadtxt("kernel_3.np").reshape(3,2,3,3)
bias = np.loadtxt("bias_3.np").reshape(3)

In [9]:
arr = [kernel[i].flatten() for i in range(kernel.shape[0])]
arr = np.array(arr)
arr = arr.T
arr = np.vstack((arr, bias.reshape(1, 3)))
arr.shape

(19, 3)

In [11]:
data = np.hstack((data, np.ones(data.shape[0]).reshape(-1, 1)))
data@arr

array([[5.15348091, 3.73308533, 5.73540866],
       [5.26820541, 3.87565422, 5.33113346],
       [5.97792431, 4.34766819, 5.74530992],
       [5.37354933, 3.8488311 , 5.28647831]])

In [12]:
import torch
m = torch.nn.Conv2d(2, 3, 3, stride=4, padding=0, bias=True)
m.weight = torch.nn.Parameter(torch.tensor(kernel))
m.bias = torch.nn.Parameter(torch.tensor(bias))

In [13]:
op = m(torch.tensor(imgs))
op

tensor([[[[5.1535, 5.2682],
          [5.9779, 5.3735]],

         [[3.7331, 3.8757],
          [4.3477, 3.8488]],

         [[5.7354, 5.3311],
          [5.7453, 5.2865]]]], dtype=torch.float64,
       grad_fn=<ThnnConv2DBackward>)