# Convolution Neural Network
```
INPUT -> [(Filter + Activation + Padding) -> Pooling] X n -> FCLayer -> OUTPUT
```

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [22]:
# Conv
batch_size = 10

img_size = 128
img_channel = 3

stride = 1
filter_size = 3
filter_channel = 64
conv_channel = filter_channel

pool_size = (2, 2)
pool_stride = 2

output_category = 10

src = torch.ones(batch_size, img_size, img_size, img_channel, dtype=torch.float32) 
print(f'SRC : {src[0, ..., 0]}')

W = torch.full((filter_size, filter_size, filter_channel), 2, requires_grad = True, dtype=torch.float32)
b = torch.full((1, filter_channel), 10, dtype=torch.float32, requires_grad=True) # , channel
print(f'Filter : {W[..., 0]}')
print(f'Bias : {b[:, 0]}')

assert (img_size - filter_size) % stride == 0, "Image, filter, and stride are not matched"
conv_size = (img_size - filter_size) // stride
conv = torch.zeros(conv_size, conv_size, conv_channel, dtype=torch.float32)
print(f'Convolution Layer size: {conv[0].shape}')


def calculate_convolution(img, W, b, stride):
    '''
    img : [height, width, channel]
    W : [filter_height, filter_width, filter_channel]
    b : [1, filter_channel]
    cnv : [conv_height, conv_width, conv_channel(=filter_channel)]
    stride : the moving step
    '''
    img_height, img_width, img_channel = img.shape
    filter_height, filter_width, filter_channel = W.shape
    
    assert (img_height - filter_height) % stride == 0, "Image, filter, and stride are not matched"
    assert (img_width - filter_height) % stride == 0, "Image, filter, and stride are not matched"
    
    conv_height = (img_height - filter_height) // stride
    conv_width = (img_width - filter_width) // stride
    conv_channel = filter_channel
    
    conv = torch.zeros(conv_height, conv_width, conv_channel, dtype=torch.float32)
    
    for y in range(0, conv_height, stride):
        for x in range(0, conv_width, stride):
            for channel in range(img_channel):
                conv[y,x,:] += (img[y:y+filter_height,x:x+filter_width,channel] @ W + b)\
                    .flatten(start_dim=0, end_dim=-2).sum(dim=0)
                conv[y,x,:] = F.leaky_relu(conv[y,x,:])
    return conv

def pad_zeros(conv, img_size):
    '''
    conv : [conv_height, conv_width, conv_channel]
    img_size : [img_height, img_width]
    padded_conv : [img_height, img_width, conv_channel]
    '''
    conv_h, conv_w, conv_c = conv.shape
    img_h, img_w = img_size
    assert img_h >= conv_h and img_w >= conv_w, "Convolution map is bigger than original"
    
    gap_h = img_h - conv_h
    gap_w = img_w - conv_w
    
    pad_upper = gap_h // 2 + 1
    pad_lower = gap_h - pad_upper
    pad_left = gap_w // 2 + 1
    pad_right = gap_w - pad_left
    
    padded_conv = torch.zeros(img_h, img_w, conv_c, dtype=conv.dtype).to(conv.device)
    padded_conv[pad_upper:img_h-pad_lower, pad_left:img_w-pad_right] = conv
    return padded_conv

def max_pooling(conv, pool_size, pool_stride):
    '''
    conv : [conv_height, conv_width, conv_channel]
    pool_size : [pool_height, pool_width]
    pool_stride : the moving step
    pooled_conv : [pooled_height, pooled_width, conv_channel]
    '''
    conv_height, conv_width, conv_channel = conv.shape
    pool_height, pool_width = pool_size
    
    assert (conv_height - pool_height) % pool_stride == 0, "Conv, pool, and stride are not matched"
    assert (conv_width - pool_width) % pool_stride == 0, "Conv, pool, and stride are not matched"
    
    pooled_height = (conv_height - pool_height) // pool_stride
    pooled_width = (conv_width - pool_width) // pool_stride
    
    pooled_conv = torch.zeros(pooled_height, pooled_width, conv_channel, dtype=conv.dtype).to(conv.device)
    
    for y in range(0, pooled_height, pool_stride):
        for x in range(0, pooled_width, pool_stride):
                pooled_conv[y,x,:] = conv[y:y+pooled_height, x:x+pooled_width].flatten(0, -2).max(dim=0).values
    return pooled_conv

def fc_layer(conv, output_category):
    '''
    conv : [conv_height, conv_width, conv_channel]
    output_category : The number of outputs
    '''
    flatten_conv = conv.flatten()
    
    in_size = flatten_conv.shape[0]
    out_size = output_category
    
    W = torch.rand(out_size, in_size, requires_grad=True).to(conv.device)
    b = torch.rand(1, requires_grad=True).to(conv.device)
    
    output = W @ flatten_conv + b
    return output

SRC : tensor([[1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        ...,
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.]])
Filter : tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]], grad_fn=<SelectBackward0>)
Bias : tensor([10.], grad_fn=<SelectBackward0>)
Convolution Layer size: torch.Size([125, 64])


In [23]:
conv = calculate_convolution(src[0], W, b, stride)
padded_conv = pad_zeros(conv, src[0,...,0].shape)
pooled_conv = max_pooling(padded_conv, pool_size, pool_stride)
output = fc_layer(pooled_conv, output_category)