<a href="https://colab.research.google.com/github/boshuaiYu/CaiCai_DL/blob/main/CaiCai_DL_Week8.2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **构筑自己的卷积神经网络模型**

## **VGG16的复现**

In [None]:
import torch
from torch import nn,optim

In [None]:
data = torch.ones(size=(100,3,224,224))

In [None]:
class VGG16(nn.Module):
  def __init__(self):
    super(VGG16,self).__init__()
    # block1
    self.conv1 = nn.Conv2d(3,64,kernel_size=3,stride=1,padding=1)
    self.conv2 = nn.Conv2d(64,64,3,stride=1,padding=1)
    self.pool1 = nn.MaxPool2d(2)
    # block2
    self.conv3 = nn.Conv2d(64,128,kernel_size=3,stride=1,padding=1)
    self.conv4 = nn.Conv2d(128,128,kernel_size=3,stride=1,padding=1)
    self.pool2 = nn.MaxPool2d(2)
    # block3
    self.conv5 = nn.Conv2d(128,256,kernel_size=3,stride=1,padding=1)
    self.conv6 = nn.Conv2d(256,256,kernel_size=3,stride=1,padding=1)
    self.conv7 = nn.Conv2d(256,256,kernel_size=3,stride=1,padding=1)
    self.pool3 = nn.MaxPool2d(2)
    # block4
    self.conv8 = nn.Conv2d(256,512,kernel_size=3,stride=1,padding=1)
    self.conv9 = nn.Conv2d(512,512,kernel_size=3,stride=1,padding=1)
    self.conv10 = nn.Conv2d(512,512,kernel_size=3,stride=1,padding=1)
    self.pool4 = nn.MaxPool2d(2)
    # block5
    self.conv11 = nn.Conv2d(512,512,kernel_size=3,stride=1,padding=1)
    self.conv12 = nn.Conv2d(512,512,kernel_size=3,stride=1,padding=1)
    self.conv13 = nn.Conv2d(512,512,kernel_size=3,stride=1,padding=1)
    self.pool5 = nn.MaxPool2d(2)
    # FC层
    self.fc1 = nn.Linear(7*7*512,4096)
    self.fc2 = nn.Linear(4096,4096)
    self.fc3 = nn.Linear(4096,10)
    self.dropout = nn.Dropout(p=0.5)

  def forward(self,x):
    # block1
    x = torch.relu(self.conv1(x))
    x = torch.relu(self.conv2(x))
    x = self.pool1(x)
    # block2
    x = torch.relu(self.conv3(x))
    x = torch.relu(self.conv4(x))
    x = self.pool2(x)
    # block3
    x = torch.relu(self.conv5(x))
    x = torch.relu(self.conv6(x))
    x = torch.relu(self.conv7(x))
    x = self.pool3(x)
    # block4
    x = torch.relu(self.conv8(x))
    x = torch.relu(self.conv9(x))
    x = torch.relu(self.conv10(x))
    x = self.pool4(x)
    # block5
    x = torch.relu(self.conv11(x))
    x = torch.relu(self.conv12(x))
    x = torch.relu(self.conv13(x))
    x = self.pool5(x)
    # FC层
    x = x.reshape(-1,7*7*512)
    x = torch.relu(self.fc1(self.dropout(x)))
    x = torch.relu(self.fc2(self.dropout(x)))
    out = torch.softmax(self.fc3(x),1)

In [None]:
net = VGG16()

In [None]:
!pip install torchinfo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.7.1-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.1


In [None]:
from torchinfo import summary

In [None]:
summary(net,input_size=(10,3,224,224),device="cpu")

Layer (type:depth-idx)                   Output Shape              Param #
VGG16                                    --                        --
├─Conv2d: 1-1                            [10, 64, 224, 224]        1,792
├─Conv2d: 1-2                            [10, 64, 224, 224]        36,928
├─MaxPool2d: 1-3                         [10, 64, 112, 112]        --
├─Conv2d: 1-4                            [10, 128, 112, 112]       73,856
├─Conv2d: 1-5                            [10, 128, 112, 112]       147,584
├─MaxPool2d: 1-6                         [10, 128, 56, 56]         --
├─Conv2d: 1-7                            [10, 256, 56, 56]         295,168
├─Conv2d: 1-8                            [10, 256, 56, 56]         590,080
├─Conv2d: 1-9                            [10, 256, 56, 56]         590,080
├─MaxPool2d: 1-10                        [10, 256, 28, 28]         --
├─Conv2d: 1-11                           [10, 512, 28, 28]         1,180,160
├─Conv2d: 1-12                           [10, 5

每一层的感受野计算公式：
<font color="orange">这一层的感受野 = 上一层的感受野+(这一层的核尺寸-1)*连乘(从最初的一层到上一层的步长)

### **用receptive_field自动计算感受野大小**

In [None]:
import sys
sys.path.append("/content/drive/MyDrive")

In [None]:
from torch_receptive_field import receptive_field

In [None]:
data = torch.ones(size=(10,1,32,32))

In [None]:
class LeNet5(nn.Module):
   def __init__(self):
     super(LeNet5,self).__init__()
     self.conv1 = nn.Conv2d(1,6,5)
     self.pool1 = nn.MaxPool2d(kernel_size=2,stride=2) # nn.Avgpool2d(2)
     self.conv2 = nn.Conv2d(6,16,5)
     self.pool2 = nn.MaxPool2d(kernel_size=2,stride=2) # nn.Avgpool2d(2)
     # self.fc1 = nn.Linear(5*5*16,120)
     # self.fc2 = nn.Linear(120,84)

   def forward(self,x):
     x = torch.tanh(self.conv1(x))
     x = self.pool1(x)
     x = torch.tanh(self.conv2(x))
     x = self.pool2(x)
     x = x.view(-1,5*5*16)
     # x = torch.tanh(self.fc1(x))
     out = torch.softmax(x,dim=1)

In [None]:
net1 = LeNet5()
net1(data)

In [None]:
receptive = receptive_field(net1,(1,32,32))

------------------------------------------------------------------------------
        Layer (type)    map size      start       jump receptive_field 
        0               [32, 32]        0.5        1.0             1.0 
        1               [28, 28]        2.5        1.0             5.0 
        2               [14, 14]        3.0        2.0             6.0 
        3               [10, 10]        7.0        2.0            14.0 
        4                 [5, 5]        8.0        4.0            16.0 


In [None]:
import torch
from torch import nn

In [None]:
conv1 = nn.Conv2d(3,6,3) # 3*3*3*6+6=168
conv2 = nn.Conv2d(6,4,3) # 3*3*6*4+4=220

In [None]:
conv1.weight.numel()

162

In [None]:
conv1.bias.numel()

6

In [None]:
conv2.weight.numel()

216

In [None]:
conv2.bias.numel()

4

In [None]:
conv1 = nn.Conv2d(4,8,3) # 9*32+8=296
conv2 = nn.Conv2d(4,8,3,groups=2) # (9*32)/2+8=152

In [None]:
conv1.weight.numel()

288

In [None]:
conv1.bias.numel()

8

In [None]:
conv2.weight.numel()

144

In [None]:
conv2.bias.numel()

8

In [None]:
# 普通卷积 = (ksh * ksw)*c_in*c_out
# 深度可分离卷积 = (ksh * ksw)*c_in_depth + c_in_pair * c_out_pair
# 比例 = 1/c_in_depth + c_out_pair/(ksh * ksw)*c_in_depth

In [None]:
conv1 = nn.Conv2d(4,8,3,bias=False) # 3*3*4*8=288

In [None]:
conv1_depthwise = nn.Conv2d(4,8,3,groups=4,bias=False) # 1/4*288=72
conv11_pairwise = nn.Conv2d(8,8,1,bias=False) # 8*8=64
# ratio = 1/4 + 8/(3*3*4)= 0.4722222222222222

In [None]:
from torch.nn import functional as F

In [None]:
!pip install torchinfo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.7.1-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.1


In [None]:
from torchinfo import summary

In [None]:
data = torch.ones(size=(10,3,229,229))
class Model(nn.Module):
  def __init__(self):
    super().__init__()
    #block1
    self.conv1 = nn.Conv2d(3,6,3)
    self.conv2 = nn.Conv2d(6,4,3)
    self.pool1 = nn.MaxPool2d(2)
    #block2
    self.conv3 = nn.Conv2d(4,16,5,stride=2,padding=1)
    self.conv4 = nn.Conv2d(16,3,5,stride=3,padding=2)
    self.pool2 = nn.MaxPool2d(2)
    #FC层
    self.linear1 = nn.Linear(9*9*3,256)
    self.linear2 = nn.Linear(256,256)
    self.linear3 = nn.Linear(256,10)
  def forward(self,x):
    x = F.relu(self.conv1(x))
    x = self.pool1(F.relu(self.conv2(x)))
    x = F.relu(self.conv3(x))
    x = self.pool2(F.relu(self.conv4(x)))
    x = x.view(-1,9*9*3)
    x = F.relu(self.linear1(F.dropout(x,p=0.5)))
    x = F.relu(self.linear2(F.dropout(x,p=0.5)))
    output = F.softmax(self.linear3(x),dim=1)

In [None]:
net = Model()

In [None]:
summary(net,input_size=(10,3,229,229))

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    --                        --
├─Conv2d: 1-1                            [10, 6, 227, 227]         168
├─Conv2d: 1-2                            [10, 4, 225, 225]         220
├─MaxPool2d: 1-3                         [10, 4, 112, 112]         --
├─Conv2d: 1-4                            [10, 16, 55, 55]          1,616
├─Conv2d: 1-5                            [10, 3, 19, 19]           1,203
├─MaxPool2d: 1-6                         [10, 3, 9, 9]             --
├─Linear: 1-7                            [10, 256]                 62,464
├─Linear: 1-8                            [10, 256]                 65,792
├─Linear: 1-9                            [10, 10]                  2,570
Total params: 134,033
Trainable params: 134,033
Non-trainable params: 0
Total mult-adds (M): 252.48
Input size (MB): 6.29
Forward/backward pass size (MB): 44.93
Params size (MB): 0.54
Estimated Total Size (

## **nn.Sequential构建神经网络写法(类似于pipeline)**

In [None]:
data = torch.ones(size=(10,3,229,229))

In [None]:
net = nn.Sequential(
            nn.Conv2d(3,6,3)
            ,nn.ReLU(inplace=True)
            ,nn.Conv2d(6,4,3)
            ,nn.ReLU(inplace=True)
            ,nn.MaxPool2d(2)
            ,nn.Conv2d(4,16,5,stride=2,padding=1)
            ,nn.ReLU(inplace=True)
            ,nn.Conv2d(16,3,5,stride=3,padding=2)
            ,nn.ReLU(inplace=True)
            ,nn.MaxPool2d(2) 
)

In [None]:
net(data).shape # 卷积+池化操作之后的特征图尺寸的大小及特征图的数目

torch.Size([10, 3, 9, 9])

In [None]:
import sys
sys.path.append("/content/drive/MyDrive")
from torch_receptive_field import receptive_field

In [None]:
rfdict = receptive_field(net,(3,229,229))

------------------------------------------------------------------------------
        Layer (type)    map size      start       jump receptive_field 
        0             [229, 229]        0.5        1.0             1.0 
        1             [227, 227]        1.5        1.0             3.0 
        2             [227, 227]        1.5        1.0             3.0 
        3             [225, 225]        2.5        1.0             5.0 
        4             [225, 225]        2.5        1.0             5.0 
        5             [112, 112]        3.0        2.0             6.0 
        6               [55, 55]        5.0        4.0            14.0 
        7               [55, 55]        5.0        4.0            14.0 
        8               [19, 19]        5.0       12.0            30.0 
        9               [19, 19]        5.0       12.0            30.0 
        10                [9, 9]       11.0       24.0            42.0 


In [None]:
class VGG16(nn.Module):
  def __init__(self):
    super(VGG16,self).__init__()
    self.features_ = nn.Sequential(nn.Conv2d(3,64,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(64,64,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.MaxPool2d(2)
                    
                    ,nn.Conv2d(64,128,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(128,128,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.MaxPool2d(2)
                    
                    ,nn.Conv2d(128,256,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(256,256,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(256,256,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.MaxPool2d(2)

                    ,nn.Conv2d(256,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(512,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(512,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.MaxPool2d(2)
                    
                    ,nn.Conv2d(512,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(512,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(512,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.MaxPool2d(2)
                    )
    self.clf_ = nn.Sequential(nn.Dropout(0.5)
                  ,nn.Linear(512*7*7,4096),nn.ReLU(inplace=True)
                  ,nn.Dropout(0.5)
                  ,nn.Linear(4096,4096),nn.ReLU(inplace=True)
                  ,nn.Linear(4096,1000),nn.Softmax(dim=1)
                  )
  def forward(self,x):
    x = self.features_(x) # 用特征提取的架构提取特征
    x = x.view(-1,512*7*7) # 调整数据结构，拉平数据
    output = self.clf_(x)
    return output

In [None]:
vgg = VGG16()

In [None]:
summary(vgg,input_size=(10,3,229,229),device="cpu")

Layer (type:depth-idx)                   Output Shape              Param #
VGG16                                    [10, 1000]                --
├─Sequential: 1-1                        [10, 512, 7, 7]           --
│    └─Conv2d: 2-1                       [10, 64, 229, 229]        1,792
│    └─ReLU: 2-2                         [10, 64, 229, 229]        --
│    └─Conv2d: 2-3                       [10, 64, 229, 229]        36,928
│    └─ReLU: 2-4                         [10, 64, 229, 229]        --
│    └─MaxPool2d: 2-5                    [10, 64, 114, 114]        --
│    └─Conv2d: 2-6                       [10, 128, 114, 114]       73,856
│    └─ReLU: 2-7                         [10, 128, 114, 114]       --
│    └─Conv2d: 2-8                       [10, 128, 114, 114]       147,584
│    └─ReLU: 2-9                         [10, 128, 114, 114]       --
│    └─MaxPool2d: 2-10                   [10, 128, 57, 57]         --
│    └─Conv2d: 2-11                      [10, 256, 57, 57]         29

In [None]:
net = nn.Sequential(nn.Conv2d(3,64,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(64,64,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.MaxPool2d(2)
                    
                    ,nn.Conv2d(64,128,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(128,128,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.MaxPool2d(2)
                    
                    ,nn.Conv2d(128,256,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(256,256,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(256,256,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.MaxPool2d(2)

                    ,nn.Conv2d(256,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(512,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(512,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.MaxPool2d(2)
                    
                    ,nn.Conv2d(512,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(512,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.Conv2d(512,512,3,padding=1),nn.ReLU(inplace=True)
                    ,nn.MaxPool2d(2)
                    )

In [None]:
data = torch.ones(size=(10,3,224,224))

In [None]:
net(data).shape # 512个特征图，特征图尺寸为7*7

torch.Size([10, 512, 7, 7])

## **NiN网络架构**

In [3]:
!pip install torchinfo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.7.1-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.1


In [1]:
import torch
from torch import nn

In [4]:
from torchinfo import summary

In [7]:
data = torch.ones(size=(10,3,32,32))

In [8]:
class NiN(nn.Module):
  def __init__(self):
    super(NiN,self).__init__()
    # block1
    self.block1 = nn.Sequential(nn.Conv2d(3,192,5,padding=2),nn.ReLU(inplace=True)
                  ,nn.Conv2d(192,160,1),nn.ReLU(inplace=True)
                  ,nn.Conv2d(160,96,1),nn.ReLU(inplace=True)
                  ,nn.MaxPool2d(3,stride=2),nn.Dropout(p=0.25)) # dropout在卷积层p可以小一点，线性层大一点
    # block2
    self.block2 = nn.Sequential(nn.Conv2d(96,192,5,padding=2),nn.ReLU(inplace=True)
                   ,nn.Conv2d(192,192,1),nn.ReLU(inplace=True)
                   ,nn.Conv2d(192,192,1),nn.ReLU(inplace=True)
                   ,nn.MaxPool2d(3,stride=2),nn.Dropout(p=0.25))
    # block3
    self.block3 = nn.Sequential(nn.Conv2d(192,192,3,padding=1),nn.ReLU(inplace=True)
                   ,nn.Conv2d(192,192,1),nn.ReLU(inplace=True)
                   ,nn.Conv2d(192,10,1),nn.ReLU(inplace=True)
                   ,nn.AvgPool2d(7,stride=1),nn.Softmax(dim=1))
    
  def forward(self,x):
    out = self.block3(self.block2(self.block1(x)))
    return out

net = NiN()

In [9]:
net(data).shape # 10个特征图，每个特征图尺寸是1x1

torch.Size([10, 10, 1, 1])

In [10]:
summary(net,input_size=(10,3,32,32))

Layer (type:depth-idx)                   Output Shape              Param #
NiN                                      [10, 10, 1, 1]            --
├─Sequential: 1-1                        [10, 96, 15, 15]          --
│    └─Conv2d: 2-1                       [10, 192, 32, 32]         14,592
│    └─ReLU: 2-2                         [10, 192, 32, 32]         --
│    └─Conv2d: 2-3                       [10, 160, 32, 32]         30,880
│    └─ReLU: 2-4                         [10, 160, 32, 32]         --
│    └─Conv2d: 2-5                       [10, 96, 32, 32]          15,456
│    └─ReLU: 2-6                         [10, 96, 32, 32]          --
│    └─MaxPool2d: 2-7                    [10, 96, 15, 15]          --
│    └─Dropout: 2-8                      [10, 96, 15, 15]          --
├─Sequential: 1-2                        [10, 192, 7, 7]           --
│    └─Conv2d: 2-9                       [10, 192, 15, 15]         460,992
│    └─ReLU: 2-10                        [10, 192, 15, 15]         -