# convolution network

## description in textbook

- continous form
  
$\begin{equation*}
(f*g)(n)=∫_{-∞}^{∞}f{τ}g(n-τ)dτ
\end{equation*}$

- discrete from

$\begin{equation*}
(f*g)(n)=∑_{τ=-∞}^{∞}f{τ}g(n-τ)
\end{equation*}$


对卷积这个名词的理解：**所谓两个函数的卷积，本质上就是先将一个函数翻转，然后进行滑动叠加。**

参考[知乎](https://www.zhihu.com/question/22298352)回答。

**瞬时行为的持续性后果**


In [1]:
# 2d convolution

import torch
from torch import nn

x = torch.rand(1, 3, 28, 28)
# parameters 1st: 3, input has 3 channels
# parameters 2nd: 3, output has 3 channels
layer = nn.Conv2d(3, 3, kernel_size=3, stride=1, padding=1)
out = layer.forward(x)
print("padding=0", out.size())

# [b,1,28,28] == kernel[3,1,3,3] ==> [1,2,28,28]
out = layer.forward(x)
print("padding=1", out.size())

# convenient way
out = layer(x)  #.__call__
print("convenient", out.size())

# some information
print("layer.weight", layer.weight)
print("layer.bias", layer.bias)


padding=0 torch.Size([1, 3, 28, 28])
padding=1 torch.Size([1, 3, 28, 28])
convenient torch.Size([1, 3, 28, 28])
layer.weight Parameter containing:
tensor([[[[-0.0315,  0.0885,  0.0659],
          [ 0.0216, -0.1016,  0.0011],
          [ 0.1141, -0.0933, -0.1203]],

         [[-0.1121,  0.0020,  0.1166],
          [-0.0271,  0.0315, -0.0622],
          [ 0.1611,  0.0671,  0.1478]],

         [[ 0.1047, -0.0065,  0.1638],
          [ 0.0945, -0.1282, -0.0828],
          [ 0.1281, -0.1003, -0.0865]]],


        [[[-0.1422, -0.1166, -0.0918],
          [-0.0167,  0.1351, -0.1817],
          [-0.0331,  0.1771, -0.1429]],

         [[ 0.1405,  0.0679, -0.0774],
          [-0.1561, -0.0875, -0.0400],
          [-0.0537,  0.0367, -0.1841]],

         [[-0.1595,  0.0247, -0.1373],
          [ 0.1016,  0.0223,  0.0657],
          [-0.0454,  0.1220, -0.1142]]],


        [[[ 0.0137, -0.0186, -0.0910],
          [ 0.1029,  0.1110,  0.1907],
          [ 0.0869,  0.0613,  0.1082]],

         [[ 0.11

In [2]:
# low-level usage

from torch.nn import functional as F
import torch

x = torch.rand(1, 3, 28, 28)
w = torch.rand(16, 3, 5, 5)
b = torch.rand(16)
out = F.conv2d(x, w, b, stride=1, padding=1)
print("out", out.shape)


out torch.Size([1, 16, 26, 26])


### Max pooling & Subsampling

## Batch Normalization


In [3]:
import torch

from torch import nn

x = torch.rand(100, 16, 784)  # 28*28
layer = nn.BatchNorm1d(16)
out = layer(x)
print("layer.running_mean", layer.running_mean)
print("layer.running_var", layer.running_var)


layer.running_mean tensor([0.0499, 0.0499, 0.0502, 0.0501, 0.0500, 0.0501, 0.0502, 0.0499, 0.0499,
        0.0500, 0.0502, 0.0500, 0.0499, 0.0501, 0.0499, 0.0501])
layer.running_var tensor([0.9083, 0.9083, 0.9083, 0.9083, 0.9083, 0.9084, 0.9084, 0.9083, 0.9083,
        0.9083, 0.9084, 0.9083, 0.9083, 0.9083, 0.9084, 0.9083])


## ResNet Implementation

In [4]:
from torch import nn
from torch.nn import functional as F


class ResBlk(nn.Module):
    def __init__(self, ch_in, ch_out):
        self.conv1 = nn.Conv2d(
            ch_in, ch_out, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(ch_out)
        self.conv2 = nn.Conv2d(
            ch_in, ch_out, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(ch_out)

        self.extra = nn.Sequential()
        if ch_out != ch_in:
            # [b,ch_in,h,w] ==> [b,ch_out,h,w]
            self.extra = nn.Sequential(
                nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=1),
                nn.BatchNorm2d(ch_out))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.extra(x) + out
        return out


### **nn.Module** 模块
`nn.Module` 是所有网络层的父类，即所有自己实现的和官方已经实现的网络层均需继承自`nn.Moudle`

In [1]:
# linear layer example
import torch
from torch import nn


class MyLinear(nn.Module):
    def __init__(self, inp, outp):
        super(MyLinear, self).__init__()
        # requires_grad = True
        self.w = nn.Parameter(torch.randn(outp, inp))
        self.b = nn.Parameter(torch.randn(outp))

    def forward(self, x):
        x = x@self.w.t()+self.b
        return x


#### Magic
- Every Layer is `nn.Module`
  - `nn.Linear`
  - `nn.BatchNorm2d`
  - `nn.Conv2s`
- `nn.Module` nested in `nn.Module`

1. embed current layers
  - Linear
  - ReLu
  - Sigmoid
  - Conv2d
  - ConvTransposed2d
  - Dropout
  - etc.
2. container
  - net(x)
  ```python
  self.net = nn.Sequential(
    nn.Conv2d(1,32,5,1,1),
    nn.MaxPool2d(2,2),
    nn.RelU(True),
    nn.BatchNorm2s(32),

    nn.Conv2d(32,63,3,1,1),
    nn.RelU(True),
    nn.BatchNorm2s(64),

    nn.Conv2d(64,64,3,1,1),
    nn.MaxPool2d(2,2),
    nn.RelU(True),
    nn.BatchNorm2s(64),

    nn.Conv2d(64,128,3,1,1),
    nn.RelU(True),
    nn.BatchNorm2s(128),
  )
  ```
3. parameters management

In [20]:
import torch
from torch import nn

net = nn.Sequential(nn.Linear(4, 2), nn.Linear(2, 2))
print("2-linear-layer network has parameters of",
      len(list(net.parameters())))  # 4 parameters group in total
for i in range(len(list(net.parameters()))):
    print(list(net.parameters())[i].shape)

print(list(net.named_parameters())[0])
print(list(net.named_parameters())[1])
print(dict(net.named_parameters()).items())

# the way to iterate parameters
optimizer = torch.optim.SGD(net.parameters(), lr=1e-3)


2-linear-layer network has parameters of 4
torch.Size([2, 4])
torch.Size([2])
torch.Size([2, 2])
torch.Size([2])
('0.weight', Parameter containing:
tensor([[ 0.1828,  0.2472, -0.3861, -0.0761],
        [ 0.3324, -0.3383,  0.0449,  0.1659]], requires_grad=True))
('0.bias', Parameter containing:
tensor([0.2517, 0.3928], requires_grad=True))
dict_items([('0.weight', Parameter containing:
tensor([[ 0.1828,  0.2472, -0.3861, -0.0761],
        [ 0.3324, -0.3383,  0.0449,  0.1659]], requires_grad=True)), ('0.bias', Parameter containing:
tensor([0.2517, 0.3928], requires_grad=True)), ('1.weight', Parameter containing:
tensor([[ 0.1360, -0.6089],
        [ 0.4348,  0.4154]], requires_grad=True)), ('1.bias', Parameter containing:
tensor([ 0.2304, -0.1044], requires_grad=True))])


4. modules
  - modules: all nodes
  - children: direct children

In [None]:
import torch
from torch import nn


class BasicNet(nn.Module):
    def __init__(self):
        super(BasicNet, self).__init__()
        self.net = nn.Linear(4, 3)

    def forward(self, x):
        return self.net(x)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.net = nn.Sequential(BasicNet(),
                                 nn.ReLU(),
                                 nn.Linear(3, 2))

    def forward(self, x):
        return self.net(x)


5. to(device)<br>
example:   
   ```python
   device=torch.device('cuda')
   net=Net()
   net.to(device)
   ```

6. save and load
   ```python
   device=torch.device('cuda')
   net=Net()
   net.to(device)

   net.load_state_dict(torch.load('ckpt.mdl'))

   # train...

   torch.save(net.state_dict(), 'cpkt.mdl')
   ```

7. train/test
   ```python
   device=torch.device('cuda')
   net=Net()
   net.to(device)

   # train
   net.train()

   # test
   net.eval()
   ```

8. implement own layer

In [None]:
import torch
from torch import nn


class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()

    def forward(self, input):
        return input.view(input.size(0), -1)


class TestNet(nn.Module):
    def __init__(self):
        super(TestNet, self).__init__()
        self.net = nn.Sequential(nn.Conv2d(1, 16, stride=1, padding=1),
                                 nn.MaxPool2d(2, 2),
                                 Flatten(),
                                 nn.Linear(1*14*14, 10))

    def forward(self, x):
        return self.net(x)


## data argumentation
### Big Data
- the key to prevent overfitting
##### sample more data?
- consuming
##### limited data
- small network capacity
- regularization
- data argumentation
### Recap

In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

cifar_train = datasets.CIFAR10(
    'cifar', True, transform=transforms.Compose([transforms.Resize(32, 32), transforms.ToTensor()]), download=True)
cifar_train = DataLoader(cifar_train, batch_size=10, shuffle=True)

cifar_test = datasets.CIFAR10(
    'cifar', False, transform=transforms.Compose([transforms.Resize(32, 32), transforms.ToTensor()]), download=True)
