<a href="https://colab.research.google.com/github/boshuaiYu/CaiCai_DL/blob/main/CaiCai_DL_Week8.3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# <font color="orange">**SOTA Model(state of the art models)**

## **1.1 GoogleNet**

构建架构前要先构建几个模块：
<font color="orange">
1. **conv+BN+ReLU --> BasicConv**
2. **Inception**
3. **AuxClf(辅助分类器)**



In [None]:
import torch
from torch import nn

### &emsp;&emsp;模块一：BasicConv2d

In [None]:
class BasicConv2d(nn.Module):
  def __init__(self,in_channels,out_channels,**kwargs): # **kwargs表示可以调用小模块中的所有参数
    super(BasicConv2d,self).__init__()
    self.conv = nn.Sequential(nn.Conv2d(in_channels,out_channels,bias=False,**kwargs)
                  ,nn.BatchNorm2d(out_channels)
                  ,nn.ReLU(inplace=True))
  
  def forward(self,x):
    x = self.conv(x)
    return x

In [None]:
BasicConv2d(2,10,kernel_size=3,padding=1,stride=1)

BasicConv2d(
  (conv): Sequential(
    (0): Conv2d(2, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
)

### &emsp;&emsp;模块二：Inception

In [None]:
class Inception(nn.Module):
  def __init__(self
         ,in_channels : int
         ,ch1x1 : int
         ,ch3x3red : int
         ,ch3x3 : int
         ,ch5x5red : int
         ,ch5x5 : int
         ,pool_proj : int
         ):
    super(Inception,self).__init__()

    # 1x1
    self.branch1 = BasicConv2d(in_channels,ch1x1,kernel_size=1)

    # 1x1+3x3
    self.branch2 = nn.Sequential(BasicConv2d(in_channels,ch3x3red,kernel_size=1)
                   ,BasicConv2d(ch3x3red,ch3x3,kernel_size=3,padding=1))
    
    # 1x1+5x5
    self.branch3 = nn.Sequential(BasicConv2d(in_channels,ch5x5red,kernel_size=1)
                   ,BasicConv2d(ch5x5red,ch5x5,kernel_size=5,padding=2))
    # pool+1x1
    self.branch4 = nn.Sequential(nn.MaxPool2d(kernel_size=3,stride=1,padding=1,ceil_mode=True)
                   ,BasicConv2d(in_channels,pool_proj,kernel_size=1))
    
  def forward(self,x):
    branch1 = self.branch1(x) # 28x28xch1x1
    branch2 = self.branch2(x) # 28x28xch3x3
    branch3 = self.branch3(x) # 28x28xch5x5
    branch4 = self.branch4(x) # 28x28xpool_proj
    outputs = [branch1,branch2,branch3,branch4]
    return torch.cat(outputs,1)

In [None]:
in3a = Inception(192,64,96,128,16,32,32)
in3a

Inception(
  (branch1): BasicConv2d(
    (conv): Sequential(
      (0): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (branch2): Sequential(
    (0): BasicConv2d(
      (conv): Sequential(
        (0): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
    (1): BasicConv2d(
      (conv): Sequential(
        (0): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
  )
  (branch3): Sequential(
    (0): BasicConv2d(
      (conv): Sequential(
        (0): Conv2d(192, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): 

In [None]:
data = torch.ones(10,192,28,28)
in3a(data).shape

torch.Size([10, 256, 28, 28])

### &emsp;&emsp;模块三：AuxClf(辅助分类器)

In [None]:
class AuxClf(nn.Module):
  def __init__(self,in_channels:int,num_classes:int,**kwargs):
    super(AuxClf,self).__init__()
    self.features_ = nn.Sequential(nn.AvgPool2d(kernel_size=5,stride=3)
                         ,BasicConv2d(in_channels,128,kernel_size=1))
    self.clf_ = nn.Sequential(nn.Linear(4*4*128,1024)
                  ,nn.ReLU(inplace=True)
                  ,nn.Dropout(p=0.7)
                  ,nn.Linear(1024,num_classes))
  
  def forward(self,x):
    x = self.features_(x)
    x = x.view(-1,4*4*128)
    out = self.clf_(x)
    return out

In [None]:
# 4a后的
AuxClf(512,1000)

AuxClf(
  (features_): Sequential(
    (0): AvgPool2d(kernel_size=5, stride=3, padding=0)
    (1): BasicConv2d(
      (conv): Sequential(
        (0): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
      )
    )
  )
  (clf_): Sequential(
    (0): Linear(in_features=2048, out_features=1024, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.7, inplace=False)
    (3): Linear(in_features=1024, out_features=1000, bias=True)
  )
)

### &emsp;&emsp;模块三：完整架构

In [None]:
class GoogleNet(nn.Module):
  def __init__(self,num_classes:int=1000,blocks=None):
    super(GoogleNet,self).__init__()

    if blocks is None: # 对blocks进行处理，简化后续
      blocks = [BasicConv2d,Inception,AuxClf]
    conv_block = blocks[0]
    inception_block = blocks[1]
    auxclf_block = blocks[2]

    # block1
    self.conv1 = conv_block(3,64,kernel_size=7,stride=2,padding=3)
    self.maxpool1 = nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True)  # 向上取整

    # block2
    self.conv2 = conv_block(64,64,kernel_size=1)
    self.conv3 = conv_block(64,192,kernel_size=3,padding=1)
    self.maxpool2 = nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True)

    # block3
    self.inception3a = inception_block(192,64,96,128,16,32,32)
    self.inception3b = inception_block(256,128,128,192,32,96,64)
    self.maxpool3 = nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True)

    # block4
    self.inception4a = inception_block(480,192,96,208,16,48,64)
    self.inception4b = inception_block(512,160,112,224,24,64,64)
    self.inception4c = inception_block(512,128,128,256,24,64,64)
    self.inception4d = inception_block(512,112,144,288,32,64,64)
    self.inception4e = inception_block(528,256,160,320,32,128,128)
    self.maxpool4 = nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True)

    # block5
    self.inception5a = inception_block(832,256,160,320,32,128,128)
    self.inception5b = inception_block(832,384,192,384,48,128,128)

    # clf
    self.avgpool = nn.AdaptiveAvgPool2d((1,1)) # 给出需要的特征图尺寸就可以
    self.dropout = nn.Dropout(0.4)
    self.fc = nn.Linear(1024,num_classes)

    # auxclf
    self.aux1 = auxclf_block(512,num_classes) # 4a
    self.aux2 = auxclf_block(528,num_classes) # 4d
  
  def forward(self,x):

    # block1
    x = self.maxpool1(self.conv1(x))

    # block2
    x = self.maxpool2(self.conv3(self.conv2(x)))

    # block3
    x = self.inception3a(x)
    x = self.inception3b(x)
    x = self.maxpool3(x)

    # block4
    x = self.inception4a(x)
    aux1 = self.aux1(x)

    x = self.inception4b(x)
    x = self.inception4c(x)
    x = self.inception4d(x)
    aux2 = self.aux2(x)

    x = self.inception4e(x)
    x = self.maxpool4(x)

    # block5
    x = self.inception5a(x)
    x = self.inception5b(x)

    # clf
    x = self.avgpool(x) # 全局平均池化之后，特征图变成1x1
    x = torch.flatten(x,1)
    x = self.dropout(x)
    out = self.fc(x)

    return out,aux2,aux1  

In [None]:
data = torch.ones(10,3,224,224)
net = GoogleNet(num_classes=1000)

In [None]:
fc2,fc1,fc0 = net(data)

In [None]:
for i in [fc2,fc1,fc0]:
  print(i.shape)

torch.Size([10, 1000])
torch.Size([10, 1000])
torch.Size([10, 1000])


In [None]:
!pip install torchinfo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchinfo
  Downloading torchinfo-1.7.1-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.1


In [None]:
from torchinfo import summary
summary(net,input_size=(10,3,224,224),depth=1)

Layer (type:depth-idx)                        Output Shape              Param #
GoogleNet                                     [10, 1000]                --
├─BasicConv2d: 1-1                            [10, 64, 112, 112]        9,536
├─MaxPool2d: 1-2                              [10, 64, 56, 56]          --
├─BasicConv2d: 1-3                            [10, 64, 56, 56]          4,224
├─BasicConv2d: 1-4                            [10, 192, 56, 56]         110,976
├─MaxPool2d: 1-5                              [10, 192, 28, 28]         --
├─Inception: 1-6                              [10, 256, 28, 28]         164,064
├─Inception: 1-7                              [10, 480, 28, 28]         389,376
├─MaxPool2d: 1-8                              [10, 480, 14, 14]         --
├─Inception: 1-9                              [10, 512, 14, 14]         376,800
├─AuxClf: 1-10                                [10, 1000]                3,188,968
├─Inception: 1-11                             [10, 512, 14, 14

## **1.2 ResNet**

### **模块一、小卷积层(3x3、1x1)**

In [3]:
import torch
from torch import nn
from typing import Type, Union, List, Optional

In [None]:
# basicConv(conv3x3,conv1x1) 
# Residual unit, Bottleneck

In [4]:
def conv3x3(in_, out_, stride=1 ,intialzero=False): # 大多数发生的情况用默认值
  bn = nn.BatchNorm2d(out_)
  # 判断是否对bn层进行0初始化：只有最后一层才进行0初始化；不是最后一层不改变gamma和belta
  if intialzero == True:
    nn.init.constant_(bn.weight,0)
  return nn.Sequential(nn.Conv2d(in_,out_,kernel_size=3,padding=1,stride=stride,bias=False)
             ,bn)

In [5]:
conv3x3(2,10)

Sequential(
  (0): Conv2d(2, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [6]:
def conv1x1(in_, out_, stride=1 ,intialzero=False): # 大多数发生的情况用默认值
  bn = nn.BatchNorm2d(out_)
  # 判断是否对bn层进行0初始化：只有最后一层才进行0初始化；不是最后一层不改变gamma和belta
  if intialzero == True:
    nn.init.constant_(bn.weight,0)
  return nn.Sequential(nn.Conv2d(in_,out_,kernel_size=1,padding=0,stride=stride,bias=False)
             ,bn)

In [7]:
conv1x1(2,10,1,True)[1].weight # 进行了0初始化

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

### **模块二、残差单元(Residual Unit)**

In [8]:
class ResidualUnit(nn.Module):
  def __init__(self,out_:int,stride1:int=1,in_:Optional[int]=None):
    super(ResidualUnit,self).__init__()

    # 判断stride1是否为2，如果为2，则特征图尺寸会发生变化，需要在跳跃连接中加上1x1卷积层

    self.stride1 = stride1 # 表示stride1的属性，否则forward无法调用

    # 判断特征图是否会缩小
    # 当特征图需要缩小时，卷积层的输出特征图数 out_= 2 * in_
    # 当特征图不需要缩小时，卷积层的输出特征图 out_ = in_

    if stride1 != 1:
      in_ = int(out_/2)
    else:
      in_ = out_

    # 拟合部分，输出F(x)
    self.fit_ = nn.Sequential(conv3x3(in_,out_,stride=stride1)
                  ,nn.ReLU(inplace=True)
                  ,conv3x3(out_,out_,intialzero=True))
    
    # 跳跃连接，输出x
    self.skipconv = conv1x1(in_,out_,stride=stride1)

    # 放在H(x)之后的激活函数
    self.relu = nn.ReLU(inplace=True)

  def forward(self,x):
    fx = self.fit_(x) # 拟合结果
    if self.stride1 != 1:
      x = self.skipconv(x) # 跳跃连接
    hx = self.relu(x+fx)
    return hx

In [9]:
data = torch.ones(size=(10,64,56,56))

In [10]:
conv3_x_18_0 = ResidualUnit(out_=128,stride1=2)

In [11]:
conv3_x_18_0(data).shape

torch.Size([10, 128, 28, 28])

In [12]:
conv2_x_18_0 = ResidualUnit(out_=64)
conv2_x_18_0(data).shape

torch.Size([10, 64, 56, 56])

### **模块三、Bottleneck**

In [13]:
class Bottleneck(nn.Module):
  def __init__(self,middle_out,stride1:int=1,in_:Optional[int]=None):
    super(Bottleneck,self).__init__()

    out_ = 4 * middle_out

    # 判断是否需要将特征图尺寸缩小
    if in_ == None:
      if stride1 != 1:
        in_ = middle_out * 2
      else:
        in_ = middle_out * 4


    self.fit_ = nn.Sequential(conv1x1(in_,middle_out,stride=stride1)
                  ,nn.ReLU(inplace=True)
                  ,conv3x3(middle_out,middle_out)
                  ,nn.ReLU(inplace=True)
                  ,conv1x1(middle_out,out_,intialzero=True))
    
    self.relu = nn.ReLU(inplace=True)

    self.skipconv = conv1x1(in_,out_,stride=stride1)
    
    self.stride1 = stride1

  
  def forward(self,x):
    fx = self.fit_(x)
    x = self.skipconv(x)
    hx = self.relu(x+fx)
    return hx

In [14]:
data1 = torch.ones(10,64,56,56)

In [15]:
# conv1后紧跟的第一个瓶颈结构
conv2_x_101_0 = Bottleneck(in_=64,middle_out=64)

In [16]:
conv2_x_101_0(data1).shape

torch.Size([10, 256, 56, 56])

In [17]:
# 不是conv1后紧跟的第一个瓶颈结构，但需要缩小特征图尺寸
data2 = torch.ones(10,256,56,56)
conv3_x_101_0 = Bottleneck(middle_out=128,stride1=2)
conv3_x_101_0(data2).shape

torch.Size([10, 512, 28, 28])

In [18]:
# 不是conv1后紧跟的第一个瓶颈结构，也不需要缩小特征图尺寸
data3 = torch.ones(10,512,28,28)
conv3_x_101_1 = Bottleneck(middle_out=128)
conv3_x_101_1(data3).shape # 输出翻不变，特征图尺寸不变

torch.Size([10, 512, 28, 28])

### **模块四、生成每一个layers**

In [59]:
def make_layers(block: Type[Union[ResidualUnit,Bottleneck]] # 块的类型，可选Residua或Bottleneck，根据选择的架构块类型，判断残差网络的深浅
        ,middle_out: int
        ,num_blocks: int
        ,afterconv1: bool=False
        ): # 有默认值的参数应该放在无默认值后面
  
  layers = []
  if afterconv1 == True:
    layers.append(block(middle_out,in_=64))
  else:
    layers.append(block(middle_out,stride1=2))

  for i in range(num_blocks-1):
    layers.append(block(middle_out))

  return nn.Sequential(*layers) # 星号解析列表/储存器,变为类

In [37]:
lay_4_conv4_x = make_layers(ResidualUnit,256,6,False)

In [49]:
# 34层网络，conv2_x，紧跟在conv1后的首个架构
# 不缩小特征图尺寸，每层的输出都是64，3个块、
conv2_x_34 = make_layers(ResidualUnit,64,3,True)
datashape = (10,64,56,56)

In [46]:
!pip install torchinfo
from torchinfo import summary

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [50]:
summary(conv2_x_34,input_size=datashape,depth=1)

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [10, 64, 56, 56]          --
├─ResidualUnit: 1-1                      [10, 64, 56, 56]          78,208
├─ResidualUnit: 1-2                      [10, 64, 56, 56]          78,208
├─ResidualUnit: 1-3                      [10, 64, 56, 56]          78,208
Total params: 234,624
Trainable params: 234,624
Non-trainable params: 0
Total mult-adds (G): 6.94
Input size (MB): 8.03
Forward/backward pass size (MB): 192.68
Params size (MB): 0.89
Estimated Total Size (MB): 201.59

In [55]:
conv2_x_101 = make_layers(Bottleneck,64,3,afterconv1=True)
summary(conv2_x_101,datashape,depth=3,device="cpu")

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [10, 256, 56, 56]         --
├─Bottleneck: 1-1                        [10, 256, 56, 56]         --
│    └─Sequential: 2-1                   [10, 256, 56, 56]         --
│    │    └─Sequential: 3-1              [10, 64, 56, 56]          4,224
│    │    └─ReLU: 3-2                    [10, 64, 56, 56]          --
│    │    └─Sequential: 3-3              [10, 64, 56, 56]          36,992
│    │    └─ReLU: 3-4                    [10, 64, 56, 56]          --
│    │    └─Sequential: 3-5              [10, 256, 56, 56]         16,896
│    └─Sequential: 2-2                   [10, 256, 56, 56]         --
│    │    └─Conv2d: 3-6                  [10, 256, 56, 56]         16,384
│    │    └─BatchNorm2d: 3-7             [10, 256, 56, 56]         512
│    └─ReLU: 2-3                         [10, 256, 56, 56]         --
├─Bottleneck: 1-2                        [10, 256, 56, 56]         --

In [58]:
datashape3 = (10,512,28,28)
conv4_x_101 = make_layers(Bottleneck,256,23)
summary(conv4_x_101,datashape3,depth=1,device="cpu")

Layer (type:depth-idx)                   Output Shape              Param #
Sequential                               [10, 1024, 14, 14]        --
├─Bottleneck: 1-1                        [10, 1024, 14, 14]        1,512,448
├─Bottleneck: 1-2                        [10, 1024, 14, 14]        2,167,808
├─Bottleneck: 1-3                        [10, 1024, 14, 14]        2,167,808
├─Bottleneck: 1-4                        [10, 1024, 14, 14]        2,167,808
├─Bottleneck: 1-5                        [10, 1024, 14, 14]        2,167,808
├─Bottleneck: 1-6                        [10, 1024, 14, 14]        2,167,808
├─Bottleneck: 1-7                        [10, 1024, 14, 14]        2,167,808
├─Bottleneck: 1-8                        [10, 1024, 14, 14]        2,167,808
├─Bottleneck: 1-9                        [10, 1024, 14, 14]        2,167,808
├─Bottleneck: 1-10                       [10, 1024, 14, 14]        2,167,808
├─Bottleneck: 1-11                       [10, 1024, 14, 14]        2,167,808
├─Bottle

### **模块五、完整架构**

In [69]:
class ResNet(nn.Module):
  def __init__(self,block:Type[Union[ResidualUnit,Bottleneck]],layers:List[int],num_classes:int=1000):
    super(ResNet,self).__init__()

    # layer1:卷积+池化
    self.layer1 = nn.Sequential(nn.Conv2d(3,64,kernel_size=7,stride=2,padding=3,bias=False)
                   ,nn.BatchNorm2d(64)
                   ,nn.ReLU(inplace=True)
                   ,nn.MaxPool2d(kernel_size=3,stride=2,ceil_mode=True))
    # layer2-5:残差块/瓶颈结构
    self.layer2_x = make_layers(block,64,layers[0],afterconv1=True)
    self.layer3_x = make_layers(block,128,layers[1])
    self.layer4_x = make_layers(block,256,layers[2])
    self.layer5_x = make_layers(block,512,layers[3])

    # 全局平均池化
    self.avgpool = nn.AdaptiveAvgPool2d((1,1))

    if block == ResidualUnit:
      self.fc = nn.Linear(512,num_classes)
    else:
      self.fc = nn.Linear(2048,num_classes)
  
  def forward(self,x):
    x = self.layer1(x)
    x = self.layer5_x(self.layer4_x(self.layer3_x(self.layer2_x(x))))
    x = self.avgpool(x) # 特征图尺寸1x1(n_samples,fc,1,1)
    x = torch.flatten(x,1)
    x = self.fc(x)

In [67]:
datashape = (10,3,224,224)

In [72]:
resnet34 = ResNet(ResidualUnit,layers=[3,4,6,3],num_classes=1000)
summary(resnet34,datashape,depth=2,device="cpu")

Layer (type:depth-idx)                        Output Shape              Param #
ResNet                                        --                        --
├─Sequential: 1-1                             [10, 64, 56, 56]          --
│    └─Conv2d: 2-1                            [10, 64, 112, 112]        9,408
│    └─BatchNorm2d: 2-2                       [10, 64, 112, 112]        128
│    └─ReLU: 2-3                              [10, 64, 112, 112]        --
│    └─MaxPool2d: 2-4                         [10, 64, 56, 56]          --
├─Sequential: 1-2                             [10, 64, 56, 56]          --
│    └─ResidualUnit: 2-5                      [10, 64, 56, 56]          78,208
│    └─ResidualUnit: 2-6                      [10, 64, 56, 56]          78,208
│    └─ResidualUnit: 2-7                      [10, 64, 56, 56]          78,208
├─Sequential: 1-3                             [10, 128, 28, 28]         --
│    └─ResidualUnit: 2-8                      [10, 128, 28, 28]         230,144

In [74]:
resnet101 = ResNet(Bottleneck,layers=[3,4,23,3],num_classes=1000)
summary(resnet101,datashape,depth=2,device="cpu")

Layer (type:depth-idx)                        Output Shape              Param #
ResNet                                        --                        --
├─Sequential: 1-1                             [10, 64, 56, 56]          --
│    └─Conv2d: 2-1                            [10, 64, 112, 112]        9,408
│    └─BatchNorm2d: 2-2                       [10, 64, 112, 112]        128
│    └─ReLU: 2-3                              [10, 64, 112, 112]        --
│    └─MaxPool2d: 2-4                         [10, 64, 56, 56]          --
├─Sequential: 1-2                             [10, 256, 56, 56]         --
│    └─Bottleneck: 2-5                        [10, 256, 56, 56]         75,008
│    └─Bottleneck: 2-6                        [10, 256, 56, 56]         136,448
│    └─Bottleneck: 2-7                        [10, 256, 56, 56]         136,448
├─Sequential: 1-3                             [10, 512, 28, 28]         --
│    └─Bottleneck: 2-8                        [10, 512, 28, 28]         379,3