# 220. CNN Basic

## Convolution

<img src='conv_layer.gif' height=60% width=60% />

In [1]:
import torch 
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np

<img src = "convolution.JPG" width = 500, align = "center">

In [2]:
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=2, stride=1, padding=0, bias=False)
conv

Conv2d(1, 1, kernel_size=(2, 2), stride=(1, 1), bias=False)

- <code>nn.Conv2d</code> 의 parameter 는 random 하게 초기화되고, 훈련 과정에서 학습됨.  

In [3]:
conv.state_dict()

OrderedDict([('weight',
              tensor([[[[ 0.2371,  0.4073],
                        [-0.1145,  0.0169]]]]))])

- 간단한 계산 예시를 위해 kernel parameter 값을 임의로 수정

In [8]:
conv.state_dict()['weight'][0][0] = \
        torch.tensor([
            [1., 2.],
            [3., 4.]
])
conv.state_dict()

OrderedDict([('weight',
              tensor([[[[1., 2.],
                        [3., 4.]]]]))])

- sample image를 4x4 크기로 생성  

- ``torch.nn`` 은 미니 배치(mini-batch)만 지원하므로, `nnConv2D` 는 `nSamples x nChannels x Height x Width` 의
    4차원 Tensor를 입력으로 한다. 하나의 샘플만 있다면, `input.unsqueeze(0)` 을 사용해서 가짜 차원을 추가한다.

In [9]:
sample_image = torch.tensor([
                             [1, 0, 1, 0],
                             [0, 1, 1, 0],
                             [1, 0, 1, 0],
                             [1, 0, 1, 1]], dtype=torch.float)

sample_image = sample_image.unsqueeze(0).unsqueeze(0)
sample_image.numpy()

array([[[[1., 0., 1., 0.],
         [0., 1., 1., 0.],
         [1., 0., 1., 0.],
         [1., 0., 1., 1.]]]], dtype=float32)

- convolution 결과

In [11]:
z = conv(sample_image)
z.detach().numpy()

array([[[[5., 9., 4.],
         [5., 7., 4.],
         [4., 6., 8.]]]], dtype=float32)

In [12]:
z.shape

torch.Size([1, 1, 3, 3])

### Convolutional Layer 의 output size 계산

<img src = "padding.JPG" width = 500, align = "center">

$$ \text{output width} = \lfloor\frac{W - F_w + 2P}{S_w} + 1\rfloor  $$
$$ \text{output height} = \lfloor\frac{H - F_h + 2P}{S_h} + 1 \rfloor$$
$W - \text{input width}$  
$F_{w,h} - \text{filter width/height}$  
$S_{w,h} - \text{stride width/height}$  
$P - padding$

`conv output size = (input size - filter size + 2 * padding size) / stride size + 1`

- no padding 의 경우 output image

In [13]:
def output_size(W, F, P, S, poolsize=1):
    size = (W - F + 2 * P)/S + 1
    return size if poolsize == 1 else size / poolsize

In [15]:
image1 = torch.tensor([
                       [0, 1, 2],
                       [3, 4, 5],
                       [6, 7, 8]], dtype=torch.float)

image1 = image1.unsqueeze(0).unsqueeze(0)
image1.size()

torch.Size([1, 1, 3, 3])

In [28]:
F = 2
S = 1
P = 1

conv1 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=F, stride=S, padding=P, bias=False)
conv1.state_dict()['weight'][0][0] = torch.tensor([
    [0., 1.], [2., 3.]
])
z = conv1(image1)
image1.size(), z.size()

(torch.Size([1, 1, 3, 3]), torch.Size([1, 1, 4, 4]))

In [29]:
output_size(3, F, P, S)

4.0

- padding=1 인 경우의 output image

In [25]:
output_size(3, F, 1, S)

4.0

## Pooling

- Max Pooling - <code>torch.nn.MaxPool2d</code>  
- Average Pooling - <code>torch.nn.AvgPool2d</code>

In [50]:
image2 = torch.tensor([
                       [1, 1, 1, 4, 5],
                        [2, 2, 2, 4, 5],
                       [2, 6, 5, 8, 5],
                       [3, 2, 1, 0, 5],
                       [1, 1, 3, 0, 0]], dtype=torch.float)

image2 = image2.unsqueeze(0).unsqueeze(0)

max1 = torch.nn.MaxPool2d(2, stride=2, dilation=2)
max1

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=2, ceil_mode=False)

In [51]:
max1(image2)

tensor([[[[5., 5.],
          [5., 5.]]]])

In [52]:
max2 = torch.nn.AvgPool2d(2, stride=2)
max2(image2)

tensor([[[[1.5000, 2.7500],
          [3.2500, 3.5000]]]])

### Multiple Channels

In [53]:
conv = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=2, bias=False)
conv(sample_image)

tensor([[[[ 0.0877,  0.4432,  0.3424],
          [ 0.2790, -0.0523,  0.3424],
          [ 0.3424,  0.0243,  0.5066]],

         [[ 0.1423,  0.0221, -0.1192],
          [-0.1388,  0.1040, -0.1192],
          [-0.1192,  0.1227,  0.0417]],

         [[-0.0819,  0.0206, -0.6329],
          [-0.3957, -0.3429, -0.6329],
          [-0.6329,  0.1552, -0.2166]]]], grad_fn=<ConvolutionBackward0>)

### Flatten

- ``torch.flatten(x, 1)``

In [54]:
t = torch.tensor([
                   [[1, 2],
                   [3, 4]],
                  [[5, 6],
                   [7, 8]]])
print(t.shape)
print(t)

torch.Size([2, 2, 2])
tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])


In [55]:
torch.flatten(t, 1)

tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])

In [56]:
t.view(-1, 1)

tensor([[1],
        [2],
        [3],
        [4],
        [5],
        [6],
        [7],
        [8]])