# 220. CNN Basic

## Convolution

<img src='conv_layer.gif' height=60% width=60% />

In [1]:
import torch 
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np

<img src = "convolution.JPG" width = 500, align = "center">

In [2]:
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=2, stride=1, padding=0, bias=False)
conv

Conv2d(1, 1, kernel_size=(2, 2), stride=(1, 1), bias=False)

- <code>nn.Conv2d</code> 의 parameter 는 random 하게 초기화되고, 훈련 과정에서 학습됨.  

In [3]:
conv.state_dict()

OrderedDict([('weight',
              tensor([[[[-0.4688, -0.0984],
                        [ 0.1677,  0.2118]]]]))])

- 간단한 계산 예시를 위해 kernel parameter 값을 임의로 수정

In [5]:
conv.state_dict()['weight'][0][0] = \
                        torch.tensor([[1., 2.], 
                                      [3., 4.]])
conv.state_dict()

OrderedDict([('weight',
              tensor([[[[1., 2.],
                        [3., 4.]]]]))])

- sample image를 4x4 크기로 생성  

- ``torch.nn`` 은 미니 배치(mini-batch)만 지원하므로, `nnConv2D` 는 `nSamples x nChannels x Height x Width` 의
    4차원 Tensor를 입력으로 한다. 하나의 샘플만 있다면, `input.unsqueeze(0)` 을 사용해서 가짜 차원을 추가한다.

In [6]:
sample_image = torch.tensor([
                             [1, 0, 1, 0],
                             [0, 1, 1, 0],
                             [1, 0, 1, 0],
                             [1, 0, 1, 1]], dtype=torch.float)

sample_image = sample_image.unsqueeze(0).unsqueeze(0)
print(sample_image.numpy())

[[[[1. 0. 1. 0.]
   [0. 1. 1. 0.]
   [1. 0. 1. 0.]
   [1. 0. 1. 1.]]]]


- convolution 결과

In [7]:
z = conv(sample_image)

print(z.shape)
print()
print(z.detach().numpy())

torch.Size([1, 1, 3, 3])

[[[[5. 9. 4.]
   [5. 7. 4.]
   [4. 6. 8.]]]]


### Convolutional Layer 의 output size 계산

<img src = "padding.JPG" width = 500, align = "center">

$$ \text{output width} = \lfloor\frac{W - F_w + 2P}{S_w} + 1\rfloor  $$
$$ \text{output height} = \lfloor\frac{H - F_h + 2P}{S_h} + 1 \rfloor$$
$W - \text{input width}$  
$F_{w,h} - \text{filter width/height}$  
$S_{w,h} - \text{stride width/height}$  
$P - padding$

`conv output size = (input size - filter size + 2 * padding size) / stride size + 1`

In [8]:
def output_size(W, F, P, S, poolsize=1):
    size = (W - F + 2*P)/S + 1
    return size if poolsize == 1 else size / poolsize

In [10]:
image1 = torch.tensor([
                       [0, 1, 2],
                       [3, 4, 5],
                       [6, 7, 8]], dtype=torch.float)

image1 = image1.unsqueeze(0).unsqueeze(0)
print(image1.size())
print(image1.numpy())

torch.Size([1, 1, 3, 3])
[[[[0. 1. 2.]
   [3. 4. 5.]
   [6. 7. 8.]]]]


- no padding 의 경우 output image

In [12]:
F = 2 
S = 1
P = 0

conv1 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=F, stride=S, bias=False)

conv1.state_dict()['weight'][0][0] = torch.tensor([[0., 1.], [2., 3.]])

z1 = conv1(image1)

print("입력 image size =", image1.size())
print("convolution 후 image size =", z1.size())
print("Convolution 출력 data =", z1.detach().numpy())
print()

H = image1.size()[2]
W = image1.size()[3]
print("출력 Hight = ", (H - F + 2*P) // S + 1)
print("출력 Width = ", (W - F + 2*P) // S + 1)

입력 image size = torch.Size([1, 1, 3, 3])
convolution 후 image size = torch.Size([1, 1, 2, 2])
Convolution 출력 data = [[[[19. 25.]
   [37. 43.]]]]

출력 Hight =  2
출력 Width =  2


In [13]:
output_size(W, F, P, S, poolsize=1)

2.0

- padding=1 인 경우의 output image

In [14]:
F = 2 
S = 1
P = 1

conv2 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=F, stride=S, padding=P, bias=False)

conv2.state_dict()['weight'][0][0] = torch.tensor([[0., 1.], [2., 3.]])

z2 = conv2(image1)

print("입력 image size =", image1.size())
print("convolution 후 image size =", z2.size())
print("Convolution 출력 data =", z2.detach().numpy())
print()

print("출력 Hight = ", (H - F + 2*P) // S + 1)
print("출력 Width = ", (W - F + 2*P) // S + 1)

입력 image size = torch.Size([1, 1, 3, 3])
convolution 후 image size = torch.Size([1, 1, 4, 4])
Convolution 출력 data = [[[[ 0.  3.  8.  4.]
   [ 9. 19. 25. 10.]
   [21. 37. 43. 16.]
   [ 6.  7.  8.  0.]]]]

출력 Hight =  4
출력 Width =  4


In [15]:
output_size(W, F, P, S, poolsize=1)

4.0

## Pooling

- Max Pooling - <code>torch.nn.MaxPool2d</code>  
- Average Pooling - <code>torch.nn.AvgPool2d</code>

In [16]:
image2 = torch.tensor([
                       [1, 1, 1, 4],
                       [2, 6, 5, 8],
                       [3, 2, 1, 0],
                       [1, 1, 3, 5]], dtype=torch.float)

image2 = image2.unsqueeze(0).unsqueeze(0)

In [17]:
max1 = torch.nn.MaxPool2d(2, stride=2)

print(max1(image2).numpy())

[[[[6. 8.]
   [3. 5.]]]]


In [18]:
max2 = torch.nn.AvgPool2d(2, stride=2)
max2(image2)

tensor([[[[2.5000, 4.5000],
          [1.7500, 2.2500]]]])

### Multiple Channels

In [19]:
conv = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=2, bias=False)
conv(sample_image)

tensor([[[[-0.2617, -0.0764, -0.7037],
          [-0.2757, -0.2947, -0.7037],
          [-0.7037,  0.1663, -0.5044]],

         [[-0.6575, -0.4721, -0.4233],
          [-0.2129, -0.8454, -0.4233],
          [-0.4233, -0.4471, -0.6825]],

         [[ 0.2630,  0.0849,  0.7941],
          [ 0.2839,  0.2148,  0.7941],
          [ 0.7941, -0.2472,  0.5951]]]], grad_fn=<ConvolutionBackward0>)

### Flatten

- ``torch.flatten(x, 1)``

In [20]:
t = torch.tensor([
                   [[1, 2],
                   [3, 4]],
                  [[5, 6],
                   [7, 8]]])
print(t.shape)
print(t)

torch.Size([2, 2, 2])
tensor([[[1, 2],
         [3, 4]],

        [[5, 6],
         [7, 8]]])


In [21]:
torch.flatten(t, 1)

tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])

In [22]:
# flatten() 과 동일 result
t.view(t.size()[0], -1)

tensor([[1, 2, 3, 4],
        [5, 6, 7, 8]])