# CNN卷积神经网络

## 1.1 二维互相关运算

#### 虽然卷积层得名于卷积（convolution）运算，但我们通常在卷积层中使用更加直观的互相关（cross-correlation）

In [1]:
import torch
from torch import nn

def corr2d(X, K):
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1), (X.shape[1] - w + 1)) 
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j] = (X[i:i+h, j:j+w] * K).sum()
    return Y

# 1.2 二维卷积层

#### 二维卷积层将输入和卷积核做互相关运算，并加上一个标量偏差来得到输出。卷积层的模型参数包括了卷积核和标量偏差。在训练模型的时候，通常我们先对卷积核随机初始化，然后不断迭代卷积核和偏差

In [2]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super(Conv2D, self).__init__()
        self.weight = nn.Parameter(torch.randn(kernel_size), requires_grad = True)
        self.bias = nn.Parameter(torch.randn(1), requires_grad = True)
        
    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

## 1.3通过数据学习核数组

#### 使用物体边缘检测中的输入数据X和输出数据Y来学习我们构造的核数组K。我们首先构造一个卷积层，其卷积核将被初始化成随机数组。接下来在每一次迭代中，我们使用平方误差来比较Y和卷积层的输出，然后计算梯度来更新权重。

In [28]:
X = torch.ones(6, 8)
X[:, 2:6] = 0
K = torch.tensor([[1000, -0.1]])
Y = corr2d(X, K)
print(X)
print(K)
print(Y)

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])
tensor([[ 1.0000e+03, -1.0000e-01]])
tensor([[ 9.9990e+02,  1.0000e+03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         -1.0000e-01,  9.9990e+02],
        [ 9.9990e+02,  1.0000e+03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         -1.0000e-01,  9.9990e+02],
        [ 9.9990e+02,  1.0000e+03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         -1.0000e-01,  9.9990e+02],
        [ 9.9990e+02,  1.0000e+03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         -1.0000e-01,  9.9990e+02],
        [ 9.9990e+02,  1.0000e+03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         -1.0000e-01,  9.9990e+02],
        [ 9.9990e+02,  1.0000e+03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         -1.0000e-01,  9.9990e+02]])


In [29]:
conv2d = Conv2D(kernel_size = (1,2))

step = 100
lr = 0.01

for i in range(step):
    Y_hat = conv2d(X)
    l = ((Y_hat - Y)**2).sum()
    l.backward()
    
    #梯度下降
    conv2d.weight.data -= lr * conv2d.weight.grad
    conv2d.bias.data -= lr * conv2d.bias.grad
    
    #梯度清0
    conv2d.weight.grad.fill_(0)
    conv2d.bias.grad.fill_(0)
    
    if (i + 1) % 5 == 0:
        print('Step %d, loss %.3f' % (i + 1, l.item()))

Step 5, loss 1607044.750
Step 10, loss 359850.531
Step 15, loss 90437.742
Step 20, loss 24095.795
Step 25, loss 6588.963
Step 30, loss 1821.451
Step 35, loss 505.761
Step 40, loss 140.686
Step 45, loss 39.162
Step 50, loss 10.905
Step 55, loss 3.037
Step 60, loss 0.846
Step 65, loss 0.235
Step 70, loss 0.066
Step 75, loss 0.018
Step 80, loss 0.005
Step 85, loss 0.001
Step 90, loss 0.000
Step 95, loss 0.000
Step 100, loss 0.000


In [30]:
print(conv2d.weight.data)
print(conv2d.bias.data)

tensor([[ 1.0000e+03, -9.8594e-02]])
tensor([-1.0499e-05])


# 2 填充和步幅

## 2.1 填充

In [31]:
import torch
from torch import nn

# 定义一个函数来计算卷积层。它对输入和输出做相应的升维和降维
def comp_conv2d(conv2d, X):
    # (1, 1)代表批量大小和通道数（“多输入通道和多输出通道”一节将介绍）均为1
    X = X.view((1, 1) + X.shape)
    Y = conv2d(X)
    return Y.view(Y.shape[2:])  # 排除不关心的前两维：批量和通道

In [32]:
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1)

X = torch.rand(8, 8)
comp_conv2d(conv2d, X).shape

print(conv2d)

Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


## 2.2 步长

# 3 多输入和输出通道

## 3.1 多输入通道

#### 多输入，但是单输出。在每个通道上，二维输入数组与二维核数组做互相关运算，再按通道相加即得到输出

In [33]:
import torch
from torch import nn
import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l

def conv2d_multi_in(X, K):
    # 沿着x和k的第0维，分别计算再相加
    res = corr2d(X[0, :, :], K[0, :, :])
    for i in range(1, X.shape[0]):
        res += corr2d(X[i, :, :], K[i, :, :])
    return res

In [34]:
X = torch.tensor([[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
              [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
K = torch.tensor([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])

print(X)
print(K)
print(K.shape)

conv2d_multi_in(X, K)

tensor([[[0, 1, 2],
         [3, 4, 5],
         [6, 7, 8]],

        [[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])
tensor([[[0, 1],
         [2, 3]],

        [[1, 2],
         [3, 4]]])
torch.Size([2, 2, 2])


tensor([[ 56.,  72.],
        [104., 120.]])

## 3.2 多输出通道

In [35]:
def corr2d_multi_in_out(X, K):
    # 对K的第0维遍历，每次同输入X做互相关计算。所有结果使用stack函数合并在一起
    return torch.stack([corr2d_multi_in(X, k) for k in K])

# 4  池化层

In [36]:
import torch
from torch import nn

def pool2d(X, pool_size, mode='max'):
    X = X.float()
    p_h, p_w = pool_size
    Y = torch.zeros(X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i,j] = X[i: i + p_h, j: j + p_w].max()
            if mode == 'avg':
                Y[i,j] = X[i: i + p_h, j: j + p_w].mean()
    return Y
            

In [37]:
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
pool2d(X, (2, 2))

tensor([[4., 5.],
        [7., 8.]])

# 5 LeNet卷积神经网络


## 多层感知机问题:
- 图像在同一列邻近的像素在这个向量中可能相距较远。它们构成的模式可能难以被模型识别
- 对于大尺寸的输入图像，使用全连接层容易造成模型过大。假设输入是高和宽均为1000像素的彩色照片（含3个通道）。即使全连接层输出个数仍是256，该层权重参数的形状是3,000,000×256：它占用了大约3 GB的内存或显存。这带来过复杂的模型和过高的存储开销。

## 卷积层
- 卷积层保留输入形状，使图像的像素在高和宽两个方向上的相关性均可能被有效识别
- 卷积层通过滑动窗口将同一卷积核与不同位置的输入重复计算，从而避免参数尺寸过大


- 卷积层用来识别图像里的空间模式，如线条和物体局部
- 最大池化层则用来降低卷积层对位置的敏感性，池化窗口与步幅形状相同

In [56]:
import time 
import torch
from torch import nn, optim

import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


class Flatten(torch.nn.Module):  #展平操作
    def forward(self, x):
        return x.view(x.shape[0], -1)

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(1, 6, 5, padding=2),# 1@in_channels, 6@out_channels, 5*5@kernel_size
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2), # kernel_size, stride
            
            nn.Conv2d(6, 16, 5),
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2)
            
            
        )
        
        self.fc = nn.Sequential(          
            Flatten(),
            nn.Linear(16*5*5, 120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10),
        )
    
    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature)
        return output
        
        

In [57]:
net = LeNet()
print(net)

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Flatten()
    (1): Linear(in_features=400, out_features=120, bias=True)
    (2): Sigmoid()
    (3): Linear(in_features=120, out_features=84, bias=True)
    (4): Sigmoid()
    (5): Linear(in_features=84, out_features=10, bias=True)
  )
)


# 5.1 评估准确度

In [40]:
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式, 这会关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            n += y.shape[0]
    return acc_sum / n

# 5.2 训练函数

In [60]:
def train_cnn(net, train_iter, test_iter, batch_size, optimizer, device,
             num_epochs):
    
    net = net.to(device)
    print('training on', device)
    
    loss = torch.nn.CrossEntropyLoss()
    
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X,y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
            test_acc = evaluate_accuracy(test_iter, net)
            print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))

In [45]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

In [58]:
for X,y in train_iter:
    print(X.shape)
    print(y.shape)
    y_hat = net(X)
    print(y_hat)
    
    break

torch.Size([256, 1, 28, 28])
torch.Size([256])
tensor([[ 0.8553, -0.0314,  0.0197,  ...,  0.1979, -0.8129, -0.0661],
        [ 0.8554, -0.0314,  0.0196,  ...,  0.1978, -0.8129, -0.0662],
        [ 0.8553, -0.0313,  0.0197,  ...,  0.1979, -0.8129, -0.0661],
        ...,
        [ 0.8552, -0.0312,  0.0197,  ...,  0.1979, -0.8127, -0.0661],
        [ 0.8554, -0.0313,  0.0196,  ...,  0.1979, -0.8128, -0.0662],
        [ 0.8554, -0.0314,  0.0197,  ...,  0.1979, -0.8128, -0.0661]],
       grad_fn=<AddmmBackward>)


In [61]:
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train_cnn(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on cpu
epoch 1, loss 2.3768, train acc 0.078, test acc 0.100, time 2.8 sec
epoch 1, loss 2.3707, train acc 0.092, test acc 0.100, time 5.4 sec
epoch 1, loss 2.3578, train acc 0.100, test acc 0.100, time 8.0 sec
epoch 1, loss 2.3458, train acc 0.105, test acc 0.100, time 10.5 sec
epoch 1, loss 2.3402, train acc 0.102, test acc 0.100, time 13.1 sec
epoch 1, loss 2.3350, train acc 0.104, test acc 0.100, time 15.6 sec
epoch 1, loss 2.3311, train acc 0.104, test acc 0.100, time 18.1 sec
epoch 1, loss 2.3283, train acc 0.107, test acc 0.100, time 20.6 sec
epoch 1, loss 2.3248, train acc 0.109, test acc 0.100, time 23.1 sec
epoch 1, loss 2.3225, train acc 0.107, test acc 0.100, time 26.2 sec
epoch 1, loss 2.3204, train acc 0.107, test acc 0.100, time 29.3 sec
epoch 1, loss 2.3194, train acc 0.106, test acc 0.100, time 33.1 sec
epoch 1, loss 2.3189, train acc 0.106, test acc 0.100, time 35.8 sec
epoch 1, loss 2.3176, train acc 0.106, test acc 0.100, time 39.2 sec
epoch 1, loss 2.3175,