In [None]:
# This program implements a traditional 4-deep loop conv2d and a complementary
# im2col based conv2d

In [1]:
import numpy as np
import tensorflow as tf
import keras
from math import ceil

2023-10-13 11:09:28.075987: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
class ctx:
    def __init__(self, ifmap, kernels, stride=1, padding=0):
        """ Args:
                ifmap: input map
                kernels: list of kernels each of size KH, KW
            Returns:
                ctx: object that collects N,C,IH,IW,KH,KW,Hout,Wout
        """
        self.N, self.C, self.IH, self.IW = ifmap.shape
        self.KH, self.KW = kernels[0].shape
        self.S, self.P = (stride, padding)
        self.KN = kernels.shape[0]
        self.Hout = ceil((self.IW - self.KW)/stride) + 1
        self.Wout = ceil((self.IH - self.KH)/stride) + 1

In [3]:
def _conv2d(ctx, ifmap, kernel):
    """ conv2d helper - conv ifmap[0,0,i,j] with kernel """
    out = np.zeros(ctx.Hout * ctx.Wout)
    out_index = 0
    for i in range(ctx.Hout):
        for j in range(ctx.Wout):
            for ii in range(ctx.KH):
                for jj in range(ctx.KW):
                     out[out_index] = out[out_index] + (ifmap[0,0,ii+i,jj+j] * kernel[ii, jj])                       
            out_index = out_index + 1
    return out.reshape(ctx.Hout, ctx.Wout)
    
def conv2d(ctx, ifmap, kernels):
    out = np.empty((ctx.KN, ctx.Hout, ctx.Wout))
    for i in range(ctx.KN):
        out[i] = _conv2d(ctx, ifmap, kernels[i])
    return out

def get_im2col_indices(ctx):
    # First figure out what the size of the output should be
    N, C, H, W = (ctx.N, ctx.C, ctx.IH, ctx.IW)
    field_height, field_width = (ctx.KH, ctx.KW)
    stride, padding = (ctx.S, ctx.P)
    out_height, out_width = (ctx.Hout, ctx.Wout)

    i0 = np.repeat(np.arange(field_height), field_width)
    i0 = np.tile(i0, C)
    i1 = stride * np.repeat(np.arange(out_height), out_width)
    j0 = np.tile(np.arange(field_width), field_height * C)
    j1 = stride * np.tile(np.arange(out_width), out_height)
    i = i0.reshape(-1, 1) + i1.reshape(1, -1)
    j = j0.reshape(-1, 1) + j1.reshape(1, -1)

    k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)

    return (k, i, j)


def im2col_indices(ctx, x):
    """ An implementation of im2col based on some fancy indexing """
    # Zero-pad the input
    field_height, field_width = (ctx.KH, ctx.KW)
    p = ctx.P
    x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')

    k, i, j = get_im2col_indices(ctx)
    cols = x_padded[:, k, i, j]
    C = ctx.C
    cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
    return cols

def conv2d_im2col(ctx, ifmap, kernels):
    """ Only conv kernel[0] for now """
    out = np.empty((ctx.KN, ctx.Hout, ctx.Wout))
    for i in range(ctx.KN):
        out1 = kernels[i].flatten().reshape(1, ctx.KH * ctx.KW)
        out2 = im2col_indices(ctx, ifmap)
        out[i] = (out1 @ out2).reshape(ctx.Hout, ctx.Wout)
    return out

In [160]:
vgg_image = np.random.random((1,3,224,224))

ifm = np.array([[1,2,3,2],[2,3,2,1],[1,2,1,2],[2,1,3,2]]).reshape(1,1,4,4)
kernels = np.array([[1,2,2],[1,2,3],[4,2,3]]).reshape(1,3,3)
ctxo = ctx(ifm, kernels, stride=1, padding=0)
print(f"Conv2d without im2col: {conv2d(ctxo, ifm, kernels)}")
print(f"Conv2d with im2col: {conv2d_im2col(ctxo, ifm, kernels)}")

Conv2d without im2col: [[[36. 38.]
  [39. 35.]]]
Conv2d with im2col: [[[36. 38.]
  [39. 35.]]]


In [17]:
isz = 6
ksz = 3
ifm = np.arange(1,isz*isz+1).reshape(1,1,isz,isz)
kernels = np.arange(1,ksz*ksz+1).reshape(1,ksz,ksz)
k2 = np.arange(10,9+ksz*ksz+1).reshape(1,ksz,ksz)
k3 = np.arange(19,18+ksz*ksz+1).reshape(1,ksz,ksz)
print(kernels)
print(k2)
print(k3)
ctxo = ctx(ifm, kernels, stride=1, padding=0)
res = conv2d(ctxo, ifm, kernels)
res2 = conv2d(ctxo, ifm, k2)
res3 = conv2d(ctxo, ifm, k3)
print(res.flatten())
print(res2.flatten())
print(res3.flatten())

[[[1 2 3]
  [4 5 6]
  [7 8 9]]]
[[[10 11 12]
  [13 14 15]
  [16 17 18]]]
[[[19 20 21]
  [22 23 24]
  [25 26 27]]]
[ 474.  519.  564.  609.  744.  789.  834.  879. 1014. 1059. 1104. 1149.
 1284. 1329. 1374. 1419.]
[1122. 1248. 1374. 1500. 1878. 2004. 2130. 2256. 2634. 2760. 2886. 3012.
 3390. 3516. 3642. 3768.]
[1770. 1977. 2184. 2391. 3012. 3219. 3426. 3633. 4254. 4461. 4668. 4875.
 5496. 5703. 5910. 6117.]


In [7]:
ifm = np.arange(0, 9).reshape(3,3)
out = ifm @ ifm