# LeNet5 与 AlexNet

### LeNet5 现代CNN的奠基者

In [None]:
# 输入-> (卷积+池化)-> (卷积+池化)-> (线性*2)-> 输出

In [None]:
import torch
from torch import nn
from torch.nn import functional as F

In [None]:
data = torch.ones(size=(10,1,32,32))

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        
        self.conv1 = nn.Conv2d(1,6,5)
        self.pool1 = nn.AvgPool2d(kernel_size=2,stride=2)
        self.conv2 = nn.Conv2d(6,16,5)
        self.pool2 = nn.AvgPool2d(2)
        self.fc1 = nn.Linear(5*5*16,120) # weight(120,400)
        self.fc2 = nn.Linear(120,84)
    
    def forward(self,x):
        x = F.tanh(self.conv1(x))
        x = self.pool1(x)
        x = F.tanh(self.conv2(x))
        x = self.pool2(x)
        # 线性前，数据拉平
        x = x.view(-1,5*5*16) # -1:占位符，自动计算
        x = F.tanh(self.fc1(x)) 
        output = F.softmax(self.fc2(x),dim=1) # (samples,features)

In [4]:
net = Model() # 实例化

In [5]:
net(data)



In [6]:
# torchinfo
# $ pip install torchinfo
# jupyter !pip install torchinfo

In [7]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.6.3-py3-none-any.whl (20 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.6.3


In [8]:
from torchinfo import summary

In [9]:
net = Model()

In [10]:
summary(net, input_size=(10,1,32,32))

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    --                        --
├─Conv2d: 1-1                            [10, 6, 28, 28]           156
├─AvgPool2d: 1-2                         [10, 6, 14, 14]           --
├─Conv2d: 1-3                            [10, 16, 10, 10]          2,416
├─AvgPool2d: 1-4                         [10, 16, 5, 5]            --
├─Linear: 1-5                            [10, 120]                 48,120
├─Linear: 1-6                            [10, 84]                  10,164
Total params: 60,856
Trainable params: 60,856
Non-trainable params: 0
Total mult-adds (M): 4.22
Input size (MB): 0.04
Forward/backward pass size (MB): 0.52
Params size (MB): 0.24
Estimated Total Size (MB): 0.81

In [12]:
# 单一的LeNet5在Fashion-MNIST数据集准确率超过91%，效果比只有线性层提升了5%
# 不能适用于大的数据集

### AlexNet 从浅层到深度

In [13]:
# “视觉界奥林匹克” 大规模视觉识别挑战比赛ILSVRC，AlexNet出现后停赛
# ILSVRC 使用 ImageNet数据集

In [15]:
# AlexNet
# 输入-> (卷积+池化)-> (卷积+池化)-> (卷积*3+池化)-> (线性*3)-> 输出

相比较LeNet5的优化：
- 使用更深的网络
- 卷积核要小
- 增加通道数（特征图数）

使用了relu激活函数

防止过拟合
- FC前有 Dropout层
- 使用图像增强技术，扩充数据集

提出使用GPU训练神经网络、重叠池化

In [16]:
import torch
from torch import nn
from torch.nn import functional as F

In [18]:
data = torch.ones(size=(10,3,227,227)) # 原论文224*224（表示图像的大小）

In [25]:
class Model(nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        
        # 为了处理尺寸较大的原始图片，先使用11*11的卷积核和较大的步长快速降低特征图的尺寸
        # 同时，使用比较多的通道数，来弥补降低尺寸造成的数据损失
        self.conv1 = nn.Conv2d(3,96,kernel_size=11,stride=4)
        self.pool1 = nn.MaxPool2d(kernel_size=3,stride=2) # 池化感受野重叠
        
        # 卷积核、步长恢复到业界常用的大小，进一步扩大通道来提取数据
        # 通过padding让特征图尺寸不要缩小，为后续网络提供更多可能性
        # 已经将特征图尺寸缩小到27*27，计算量可控，可以开始进行特征提取了
        self.conv2 = nn.Conv2d(96,256,kernel_size=5,padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=3,stride=2)
        
        # 疯狂提取特征，连续用多个卷积层
        # stride=1时，kernel_size=5,padding=2 或 kernel_size=3,padding=1 的搭配可以维持特征图大小不变
        self.conv3 = nn.Conv2d(256,384,kernel_size=3,padding=1)
        self.conv4 = nn.Conv2d(384,384,kernel_size=3,padding=1)
        self.conv5 = nn.Conv2d(384,256,kernel_size=3,padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=3,stride=2)
        
        # 特征图尺寸到了6*6（一般经典卷积神经网络特征图尺寸控制到5-7之间，再传入全连接网络）
        # 线性层降低数据维度后，对信息有汇总作用
        self.fc1 = nn.Linear(6*6*256,4096) # 上层所有特征图的所有像素
        self.fc2 = nn.Linear(4096,4096)
        self.fc3 = nn.Linear(4096,1000) 
        
    def forward(self,x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))
        x = self.pool3(x)
        
        x = x.view(-1,6*6*256)
        x = F.relu(F.dropout(self.fc1(x),p=0.5))
        x = F.relu(F.dropout(self.fc2(x),p=0.5))
        output = F.softmax(self.fc3(x),dim=1)

In [26]:
net = Model()

In [27]:
net(data)

In [28]:
from torchinfo import summary

In [29]:
summary(net,input_size=(10,3,227,227))

Layer (type:depth-idx)                   Output Shape              Param #
Model                                    --                        --
├─Conv2d: 1-1                            [10, 96, 55, 55]          34,944
├─MaxPool2d: 1-2                         [10, 96, 27, 27]          --
├─Conv2d: 1-3                            [10, 256, 27, 27]         614,656
├─MaxPool2d: 1-4                         [10, 256, 13, 13]         --
├─Conv2d: 1-5                            [10, 384, 13, 13]         885,120
├─Conv2d: 1-6                            [10, 384, 13, 13]         1,327,488
├─Conv2d: 1-7                            [10, 256, 13, 13]         884,992
├─MaxPool2d: 1-8                         [10, 256, 6, 6]           --
├─Linear: 1-9                            [10, 4096]                37,752,832
├─Linear: 1-10                           [10, 4096]                16,781,312
├─Linear: 1-11                           [10, 1000]                4,097,000
Total params: 62,378,344
Trainable p

In [None]:
# 步长，缩小特征图尺寸
# padding 小于 kernel_size的1/2
# 卷积核尺寸小
# 特征图最后尺寸5*5 7*7 9*9

In [38]:
class VGG16(nn.Module):
    def __init__(self):
        super().__init__()
        
        # block1
        self.conv1 = nn.Conv2d(3,64,3,padding=1)
        self.conv2 = nn.Conv2d(64,64,3,padding=1)
        self.pool1 = nn.MaxPool2d(2)
        
        # block2
        self.conv3 = nn.Conv2d(64,128,3,padding=1)
        self.conv4 = nn.Conv2d(128,128,3,padding=1)
        self.pool2 = nn.MaxPool2d(2)

        # block3
        self.conv5 = nn.Conv2d(128,256,3,padding=1)
        self.conv6 = nn.Conv2d(256,256,3,padding=1)
        self.conv7 = nn.Conv2d(256,256,3,padding=1)
        self.pool3 = nn.MaxPool2d(2)
        
        # block4
        self.conv8 = nn.Conv2d(256,512,3,padding=1)
        self.conv9 = nn.Conv2d(512,512,3,padding=1)
        self.conv10 = nn.Conv2d(512,512,3,padding=1)
        self.pool4 = nn.MaxPool2d(2)
        
        # block5
        self.conv11 = nn.Conv2d(512,512,3,padding=1)
        self.conv12 = nn.Conv2d(512,512,3,padding=1)
        self.conv13 = nn.Conv2d(512,512,3,padding=1)
        self.pool5 = nn.MaxPool2d(2)
        
        # FC 层
        self.fc1 = nn.Linear(512*7*7,4096)
        self.fc2 = nn.Linear(4096,4096)
        self.fc3 = nn.Linear(4096,10)
    
    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = self.pool1(F.relu(self.conv2(x)))
        
        x = F.relu(self.conv3(x))
        x = self.pool2(F.relu(self.conv4(x)))
        
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = self.pool3(F.relu(self.conv7(x)))
        
        x = F.relu(self.conv8(x))
        x = F.relu(self.conv9(x))
        x = self.pool4(F.relu(self.conv10(x)))
        
        x = F.relu(self.conv11(x))
        x = F.relu(self.conv12(x))
        x = self.pool5(F.relu(self.conv13(x)))
        
        x = x.view(-1,512*7*7)
        x = F.relu(self.fc1(F.dropout(x,p=0.5)))
        x = F.relu(self.fc2(F.dropout(x,p=0.5)))
        output = F.softmax(self.fc3(x),dim=1)
        
        return output
    
vgg = VGG16()

In [39]:
summary(vgg,input_size=(10,3,224,224),device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
VGG16                                    --                        --
├─Conv2d: 1-1                            [10, 64, 224, 224]        1,792
├─Conv2d: 1-2                            [10, 64, 224, 224]        36,928
├─MaxPool2d: 1-3                         [10, 64, 112, 112]        --
├─Conv2d: 1-4                            [10, 128, 112, 112]       73,856
├─Conv2d: 1-5                            [10, 128, 112, 112]       147,584
├─MaxPool2d: 1-6                         [10, 128, 56, 56]         --
├─Conv2d: 1-7                            [10, 256, 56, 56]         295,168
├─Conv2d: 1-8                            [10, 256, 56, 56]         590,080
├─Conv2d: 1-9                            [10, 256, 56, 56]         590,080
├─MaxPool2d: 1-10                        [10, 256, 28, 28]         --
├─Conv2d: 1-11                           [10, 512, 28, 28]         1,180,160
├─Conv2d: 1-12                           [10, 5