## Convolutional Neural Network

- commonly applied to analyze visual imagery
- add additional convolution and pooling layers before feedforward neural network


### convolutional layer

In [67]:
import torch
import torch.nn as nn

torch.manual_seed(0)
#1, 4, range
#[1, 4, 4] is matrix demension
img2 = torch.randint(1, 4, [1, 5, 5]).float()
img=torch.tensor([([[ 0.,1.,2.,3.,4.],
        [ 1.,2.,3.,4.,0.],
        [ 2.,3.,4.,0.,1.],
        [ 3.,4.,0.,1.,2.],
        [ 4.,0.,1.,2.,3.]])])
print('input image: \n', img)
print('shape image: \n', img.shape)

input image: 
 tensor([[[0., 1., 2., 3., 4.],
         [1., 2., 3., 4., 0.],
         [2., 3., 4., 0., 1.],
         [3., 4., 0., 1., 2.],
         [4., 0., 1., 2., 3.]]])
shape image: 
 torch.Size([1, 5, 5])


In [68]:
cnn3 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)
cnn2 = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1)
cnn = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride=2, padding=1)
#cnn = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=3, stride=2, padding=1)

# weight: of size (out_channels, in_channels, kernel_size[0], kernel_size[1])
# bias: of size (out_channels)
for name, param in cnn.named_parameters():
    if param.requires_grad:
        print(name, ': \n', param.data)

weight : 
 tensor([[[[-0.1718,  0.2103,  0.1954],
          [-0.1478, -0.0120,  0.2132],
          [ 0.3314,  0.1323,  0.0450]]]])
bias : 
 tensor([0.2235])


In [69]:
# to better illustrate convolution operations, change the parameters from random numbers to some integers in CNN
w, b = cnn.weight, cnn.bias

w_np = w.detach().numpy()
w_np[0,0,0,:] = 1.
w_np[0,0,1,:] = -1.
w_np[0,0,2,:] = 0.

b_np = b.detach().numpy()
b_np[0] = 2.

cnn.weight = torch.nn.Parameter(torch.from_numpy(w_np))
cnn.bias = torch.nn.Parameter(torch.from_numpy(b_np))
for name, param in cnn.named_parameters():
    if param.requires_grad:
        print(name, ': \n', param.data)    

weight : 
 tensor([[[[ 1.,  1.,  1.],
          [-1., -1., -1.],
          [ 0.,  0.,  0.]]]])
bias : 
 tensor([2.])


#### Basic operators in convolutional layer
- step 1: element-wise matrix multiplication
$$
\begin{pmatrix}
3 & 1 & 3 \\
2 & 1 & 2 \\
2 & 1 & 3
\end{pmatrix}
\odot
\begin{pmatrix}
1 & 1 & 1 \\
-1 & -1 & -1 \\
0 & 0 & 0
\end{pmatrix}
=
\begin{pmatrix}
3 & 1 & 3 \\
-2 & -1 & -2 \\
0 & 0 & 0
\end{pmatrix}
$$

- step 2: summation of all the elements
$$
3 + 1 + 3 + (-2) + (-1) + (-2) + 0 + 0 + 0 = 2
$$

- step 3: add bias
$$
2 + 2 = 4
$$

### padding: adding zeros around the matrix
before padding: matrix of size 4 * 4
$$
\begin{pmatrix}
3 & 1 & 3 & 1 \\
2 & 1 & 2 & 2 \\
2 & 1 & 3 & 3 \\
1 & 1 & 2 & 3
\end{pmatrix}
$$

after padding: matrix of size 6 * 6
$$
\begin{pmatrix}
0 & 0 & 0 & 0 & 0 & 0 \\
0 & 3 & 1 & 3 & 1 & 0 \\
0 & 2 & 1 & 2 & 2 & 0 \\
0 & 2 & 1 & 3 & 3 & 0 \\
0 & 1 & 1 & 2 & 3 & 0 \\
0 & 0 & 0 & 0 & 0 & 0
\end{pmatrix}
$$

In [70]:
out = cnn(img)
print('output image: \n', out)

output image: 
 tensor([[[ 1., -4., -5.],
         [ 0.,  4.,  5.],
         [ 5.,  4.,  0.]]], grad_fn=<SqueezeBackward1>)


#### Output Formula for Convolution
- $ O = \frac {W - K + 2P}{S} + 1$
    - $O$: output size
    - $W$: input size
    - $K$: kernel size
    - $P$: same padding (non-zero)
        - $P = \frac{K - 1}{2}  = \frac{5 - 1}{2} = 2$
    - $S$: stride

### Pooling layer
reduce the dimensions of the hidden layer by combining the outputs of neuron clusters at the previous layer into a single neuron in the next layer

In [61]:
img = torch.randint(1, 4, [1, 4, 4]).float()
print('input image: \n', img)

input image: 
 tensor([[[1., 2., 3., 3.],
         [2., 3., 1., 2.],
         [2., 2., 3., 2.],
         [2., 3., 2., 2.]]])


In [62]:
maxpool = nn.MaxPool2d(kernel_size=2)
maxpool2 = nn.MaxPool2d(kernel_size=2, stride=1)
maxpool3 = nn.MaxPool2d(kernel_size=2, stride=1, padding=1)

out = maxpool(img)
out2 = maxpool2(img)
out3 = maxpool3(img)
print('output image: \n', out)
print('output image: \n', out2)
print('output image: \n', out3)

output image: 
 tensor([[[3., 3.],
         [3., 3.]]])
output image: 
 tensor([[[3., 3., 3.],
         [3., 3., 3.],
         [3., 3., 3.]]])
output image: 
 tensor([[[1., 2., 3., 3., 3.],
         [2., 3., 3., 3., 3.],
         [2., 3., 3., 3., 2.],
         [2., 3., 3., 3., 2.],
         [2., 3., 3., 2., 2.]]])


#### Output Formula for Pooling
- $ O = \frac {W - K}{S} + 1$
    - W: input size
    - K: kernel size
    - S: stride size = kernel size, PyTorch defaults the stride to kernel filter size
        - If using PyTorch default stride, this will result in the formula $ O = \frac {W}{K}$