# Convolution operation

In [2]:
import torch

def convolve2d(input, kernel, stride, padding=0, dilation=1):
    ih, iw = input.shape
    kh, kw = kernel.shape

    if padding:
        input = torch.nn.functional.pad(input, pad=[padding] * 4)

    # calculating output size
    oh = (ih - kh + 2 * padding - (kh - 1) * (dilation - 1)) // stride + 1
    ow = (iw - kw + 2 * padding - (kw - 1) * (dilation - 1)) // stride + 1    
    
    output = []
    for i in range(0, oh * stride, stride):
        row = []
        for j in range(0, ow * stride, stride):
            region = input[i:i+kh + (kh-1)*(dilation-1):dilation, j:j+kw + (kw-1)*(dilation-1):dilation]
            total = torch.sum(region * kernel)
            row.append(total)
        output.append(row)
    
    output = torch.tensor(output)
    assert output.shape == (oh, ow)

    return output

def print_tensor(tensor, title=""):
    shape_str = "x".join(str(s) for s in tensor.shape)
    print(f"{title.upper()} ({shape_str})")
    print(tensor)
    print()




## Input and Kernel

In [3]:
torch.manual_seed(42)
input = torch.randint(low=0, high=10, size=(6, 6))
kernel = torch.tensor([[0, 0, 0],
                       [0, 1, 0],
                       [0, 0, 0]])

print_tensor(input, title="input")
print_tensor(kernel, title="kernel")

INPUT (6x6)
tensor([[2, 7, 6, 4, 6, 5],
        [0, 4, 0, 3, 8, 4],
        [0, 4, 1, 2, 5, 5],
        [7, 6, 9, 6, 3, 1],
        [9, 3, 1, 9, 7, 9],
        [2, 0, 5, 9, 3, 4]])

KERNEL (3x3)
tensor([[0, 0, 0],
        [0, 1, 0],
        [0, 0, 0]])



## Convolutions

In [4]:
print_tensor(input, title="input")
print_tensor(kernel, title="kernel")

for stride, padding, dilation in [(1, 0, 1),
                                  (2, 0, 1),
                                  (1, 1, 1),
                                  (1, 1, 2)]:

    print("-" * 60)
    print(f"stride: {stride}  padding: {padding}  dilation: {dilation}")

    output = convolve2d(input, kernel, stride, padding, dilation)
    print_tensor(output, title="output")

    torch_output = torch.nn.functional.conv2d(input=input.view(1, 1, *input.shape), 
                                              weight=kernel.view(1, 1, *kernel.shape),
                                              stride=stride,
                                              padding=padding,
                                              dilation=dilation).squeeze()

    assert torch.equal(output, torch_output)

INPUT (6x6)
tensor([[2, 7, 6, 4, 6, 5],
        [0, 4, 0, 3, 8, 4],
        [0, 4, 1, 2, 5, 5],
        [7, 6, 9, 6, 3, 1],
        [9, 3, 1, 9, 7, 9],
        [2, 0, 5, 9, 3, 4]])

KERNEL (3x3)
tensor([[0, 0, 0],
        [0, 1, 0],
        [0, 0, 0]])

------------------------------------------------------------
stride: 1  padding: 0  dilation: 1
OUTPUT (4x4)
tensor([[4, 0, 3, 8],
        [4, 1, 2, 5],
        [6, 9, 6, 3],
        [3, 1, 9, 7]])

------------------------------------------------------------
stride: 2  padding: 0  dilation: 1
OUTPUT (2x2)
tensor([[4, 3],
        [6, 6]])

------------------------------------------------------------
stride: 1  padding: 1  dilation: 1
OUTPUT (6x6)
tensor([[2, 7, 6, 4, 6, 5],
        [0, 4, 0, 3, 8, 4],
        [0, 4, 1, 2, 5, 5],
        [7, 6, 9, 6, 3, 1],
        [9, 3, 1, 9, 7, 9],
        [2, 0, 5, 9, 3, 4]])

------------------------------------------------------------
stride: 1  padding: 1  dilation: 2
OUTPUT (4x4)
tensor([[4, 0, 3

# Pooling

In [5]:
def pool2d(input, size, stride=None, padding=0, dilation=1, mode="max"):
    if stride is None:
        stride = size
    
    ih, iw = input.shape
    oh = (ih - size + 2 * padding - (dilation - 1) * (size - 1)) // stride + 1
    ow = (iw - size + 2 * padding - (dilation - 1) * (size - 1)) // stride + 1
    print(oh, ow)

    if mode == "max":
        mode = torch.max
    elif mode == "min":
        mode = torch.min
    elif mode == "avg" or mode == "mean":
        mode = lambda t: torch.mean(t.float())

    if padding:
        input = torch.nn.functional.pad(input, pad=[padding] * 4)

    output = []
    for i in range(0, oh * stride, stride):
        row = []
        for j in range(0, ow * stride, stride):
            region = input[i:i+size + (size-1)*(dilation-1):dilation, j:j+size + (size-1)*(dilation-1):dilation]
            total = mode(region)
            row.append(total)
        output.append(row)
    
    output = torch.tensor(output)
    assert output.shape == (oh, ow)

    return output    

pool2d(input, 2, mode="avg")


3 3


tensor([[3.2500, 3.2500, 5.7500],
        [4.2500, 4.5000, 3.5000],
        [3.5000, 6.0000, 5.7500]])

## Pools

In [6]:
print_tensor(input, title="input")
for mode, size, stride, padding, dilation in [("max", 2, 2, 0, 1),
                                              ("max", 2, 2, 1, 1),
                                              ("max", 2, 2, 1, 2)]:

    print("-" * 60)
    print(f"mode: {mode}  kernel size: {size}  stride: {stride}  padding: {padding}  dilation: {dilation}")

    output = pool2d(input, size, stride=stride, padding=padding, dilation=dilation, mode=mode)
    print_tensor(output, title="output")

    torch_pool = torch.nn.functional.max_pool2d    

    torch_output = torch_pool(input=input.view(1, 1, *input.shape).float(), 
                              kernel_size=size,
                              stride=stride,
                              padding=padding,
                              dilation=dilation).squeeze()

    assert torch.equal(output.float(), torch_output)

INPUT (6x6)
tensor([[2, 7, 6, 4, 6, 5],
        [0, 4, 0, 3, 8, 4],
        [0, 4, 1, 2, 5, 5],
        [7, 6, 9, 6, 3, 1],
        [9, 3, 1, 9, 7, 9],
        [2, 0, 5, 9, 3, 4]])

------------------------------------------------------------
mode: max  kernel size: 2  stride: 2  padding: 0  dilation: 1
3 3
OUTPUT (3x3)
tensor([[7, 6, 8],
        [7, 9, 5],
        [9, 9, 9]])

------------------------------------------------------------
mode: max  kernel size: 2  stride: 2  padding: 1  dilation: 1
4 4
OUTPUT (4x4)
tensor([[2, 7, 6, 5],
        [0, 4, 8, 5],
        [9, 9, 9, 9],
        [2, 5, 9, 4]])

------------------------------------------------------------
mode: max  kernel size: 2  stride: 2  padding: 1  dilation: 2
3 3
OUTPUT (3x3)
tensor([[4, 4, 4],
        [6, 6, 6],
        [6, 9, 9]])



# Batches and Channels

In [7]:
torch.manual_seed(42)

iN, iC, iH, iW = 1, 2, 4, 4
iN, iC, iH, iW = 1, 2, 6, 6
input = torch.randint(low=0, high=10, size=(iN, iC, iH, iW))

groups = 1
# oC - number of kernels (output channels)
# fC - ic // groups
oC, fC, kH, kW = 4, iC // groups, 3, 3
filter = torch.zeros((oC, fC, kW, kH)).long()
k = 0
for i in range(oC):
    for j in range(fC):
        fi = k // kW
        fj = k % kW
        filter[i,j][fi,fj]=1
        k += 1
        
stride = 1
oH = (iH - kH) // stride + 1
oW = (iW - kW) // stride + 1
result = torch.zeros((iN, oC, oH, oW), dtype=torch.long)
for i in range(iN):
    for j in range(oC):
        for k in range(iC):
            channel = input[i, k]
            kernel = filter[j, k]
            total = convolve2d(channel, kernel, stride=stride)
            result[i, j] += total
            
            
print_tensor(input, title="input")
print_tensor(filter, title="filter")
print_tensor(result, title="output")


INPUT (1x2x6x6)
tensor([[[[2, 7, 6, 4, 6, 5],
          [0, 4, 0, 3, 8, 4],
          [0, 4, 1, 2, 5, 5],
          [7, 6, 9, 6, 3, 1],
          [9, 3, 1, 9, 7, 9],
          [2, 0, 5, 9, 3, 4]],

         [[9, 6, 2, 0, 6, 2],
          [7, 9, 7, 3, 3, 4],
          [3, 7, 0, 9, 0, 9],
          [6, 9, 5, 4, 8, 8],
          [6, 0, 0, 0, 0, 1],
          [3, 0, 1, 1, 7, 9]]]])

FILTER (4x2x3x3)
tensor([[[[1, 0, 0],
          [0, 0, 0],
          [0, 0, 0]],

         [[0, 1, 0],
          [0, 0, 0],
          [0, 0, 0]]],


        [[[0, 0, 1],
          [0, 0, 0],
          [0, 0, 0]],

         [[0, 0, 0],
          [1, 0, 0],
          [0, 0, 0]]],


        [[[0, 0, 0],
          [0, 1, 0],
          [0, 0, 0]],

         [[0, 0, 0],
          [0, 0, 1],
          [0, 0, 0]]],


        [[[0, 0, 0],
          [0, 0, 0],
          [1, 0, 0]],

         [[0, 0, 0],
          [0, 0, 0],
          [0, 1, 0]]]])

OUTPUT (1x4x4x4)
tensor([[[[ 8,  9,  6, 10],
          [ 9, 11,  3,  6],


In [8]:
input_size = (1, 17)

# ic, oc, kernel_size, stride, padding, dilation
layers = [(77, 256, (3, 1), 1, 0, 1),
          (256, 256, (3, 1), 2, 0, 1),
          (256, 256, (3, 1), 2, 0, 1),
          (256, 1, (3, 1), 1, 0, 1),]

ih, iw = input_size
for ic, oc, kernel_size, stride, padding, dilation in layers:
    kw, kh = kernel_size
    oh = (ih - kh + 2 * padding - (kh - 1) * (dilation - 1)) // stride + 1
    ow = (iw - kw + 2 * padding - (kw - 1) * (dilation - 1)) // stride + 1    
    print(f"#{ic} {ih}x{iw} -> #{oc} {oh}x{ow}")
    ih, iw = oh, ow


#77 1x17 -> #256 1x15
#256 1x15 -> #256 1x7
#256 1x7 -> #256 1x3
#256 1x3 -> #1 1x1


# 1D Convolutions DEMO

In [9]:
torch.manual_seed(42)

ic = 7
ih = 10
oc = 8
kh = 2
oh = (ih - kh) // 1 + 1

# input
x = torch.randint(low=0, high=10, size=(ic, ih)).float()
assert x.shape == (ic, ih)

# convolution
layer = torch.nn.Conv1d(in_channels=ic, out_channels=oc, kernel_size=kh)
print(layer)
w, b = layer.parameters()
assert w.shape == (oc, ic, kh)
assert b.shape == (oc, )

y = layer(x)
assert y.shape == (oc, oh)

# output
output = []
for kernel, bias in zip(w, b):
    kernel.shape == (kh, )
    channel = []
    for i in range(0, oh * 1, 1):
        region = x[:,i:i + kh]
        assert region.shape == (ic, kh)
        total = torch.sum(region * kernel) + bias
        channel.append(total)
    output.append(channel)

output = torch.tensor(output)
output, y



Conv1d(7, 8, kernel_size=(2,), stride=(1,))


(tensor([[ -1.4427,   2.1214,   1.0646,  -1.0073,  -2.6071,  -1.4694,  -2.3458,
           -5.0284,  -3.9909],
         [  1.0692,   0.2119,   0.6061,   1.5847,  -1.6509,  -2.1876,   0.4745,
           -1.4009,  -1.3466],
         [ -5.8700,  -7.9829,  -7.5512, -10.8062,  -8.2657,  -5.8836,  -6.4643,
           -6.1355,  -7.9130],
         [ -7.3901,  -3.5969,  -5.3637,  -7.3507,  -4.4702,  -4.2736,  -8.0131,
           -5.2857,  -7.0544],
         [ -0.4411,  -1.2163,  -4.0269,  -6.1607,  -3.6083,  -2.5938,  -2.4093,
           -1.5252,  -2.1961],
         [ -0.5494,   0.7373,   0.2268,   2.6194,   0.5533,   1.7961,   1.7508,
            0.7103,  -1.4017],
         [ -0.3983,   0.8388,   0.6003,   0.9702,   2.1276,   4.2901,   0.8479,
            3.4084,  -0.1641],
         [ -4.2917,   1.3800,  -4.2675,  -4.0642,  -2.1963,  -0.2951,  -3.6937,
           -2.2918,  -1.4031]]),
 tensor([[ -1.4427,   2.1214,   1.0646,  -1.0073,  -2.6071,  -1.4694,  -2.3458,
           -5.0284,  -3.9909],

# Batch Norm

*Not finished*

https://stackoverflow.com/questions/65613694/calculation-of-mean-and-variance-in-batch-normalization-in-convolutional-neural

In [49]:
# TODO
# Without Learnable Parameters
import torch
m = torch.nn.BatchNorm2d(2, affine=True)
input = torch.tensor([[[[1., 0.],
                        [0., 0.]],

                       [[0., 1.],
                        [0., 0.]]],

                      [[[0., 1.],
                        [2., 3.]],
                         
                       [[4., 5.],
                        [6., 7.]]]])



w, b = m.parameters()
print(w, b)       
output = m(input)
output

Parameter containing:
tensor([1., 1.], requires_grad=True) Parameter containing:
tensor([0., 0.], requires_grad=True)


tensor([[[[ 0.1187, -0.8307],
          [-0.8307, -0.8307]],

         [[-1.0422, -0.6797],
          [-1.0422, -1.0422]]],


        [[[-0.8307,  0.1187],
          [ 1.0681,  2.0175]],

         [[ 0.4078,  0.7703],
          [ 1.1329,  1.4954]]]], grad_fn=<NativeBatchNormBackward0>)

In [50]:
dims = (0, 2, 3)
mean = input.mean(dim=dims)
var = input.var(dim=dims)
print(mean, var)
(input - mean) / torch.sqrt(var + m.eps) * w + b



tensor([0.8750, 2.8750]) tensor([1.2679, 8.6964])


tensor([[[[ 0.1110, -0.9749],
          [-0.7771, -0.9749]],

         [[-0.7771, -0.6358],
          [-0.7771, -0.9749]]],


        [[[-0.7771, -0.6358],
          [ 0.9991,  0.0424]],

         [[ 2.7753,  0.7206],
          [ 4.5515,  1.3988]]]], grad_fn=<AddBackward0>)

# Discovering dimensions

In [51]:
input_size = (1, 17)

# ic, oc, kernel_size, stride, padding, dilation
layers = [(77, 256, (3, 1), 1, 0, 1),
          (256, 256, (3, 1), 2, 0, 1),
          (256, 256, (3, 1), 2, 0, 1),
          (256, 1, (3, 1), 1, 0, 1),]

ih, iw = input_size
for ic, oc, kernel_size, stride, padding, dilation in layers:
    kw, kh = kernel_size
    oh = (ih - kh + 2 * padding - (kh - 1) * (dilation - 1)) // stride + 1
    ow = (iw - kw + 2 * padding - (kw - 1) * (dilation - 1)) // stride + 1    
    print(f"#{ic} {ih}x{iw} -> #{oc} {oh}x{ow}")
    ih, iw = oh, ow

#77 1x17 -> #256 1x15
#256 1x15 -> #256 1x7
#256 1x7 -> #256 1x3
#256 1x3 -> #1 1x1
