In [1]:
# 用 nn 来建立神经网络。
import torch
import torch.nn as nn
import torch.nn.functional as F
#pytorch 的 network 与 layer 都用 nn.Module 拓展

前向传播·

every PyTorch nn.Module has a forward() method

# 用 pytorch 实现一个神经网络
1. Extend the nn.Module base class.
2. Define layers as class attributes.
3. Implement the forward() method.im


In [2]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        # 开始搭建网络，一层一层一层剥开我的心
#         self.layer = None
        # out_channels 代表了卷积核的数量
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5) # kernel 卷积核
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5) # out_channels 根据卷积核大小
        # 全连接层，需要 flatten
        self.fc1   = nn.Linear(in_features = 12 * 4 * 4, out_features = 120)
        self.fc2   = nn.Linear(in_features = 120, out_features = 60)
        self.out   = nn.Linear(in_features = 60, out_features = 10) # 最终要 10 类别
        
    def forward(self, t):
#         t = self.layer(t) # layer 都有 __call__ 方法，传入张量后有对应的操作
        return t
    # 重写此方法，用于对象的格式化输出
    def __repr__(self):
        return "my " + super().__repr__()

每一个层都有权重张量和待重写的 `forward` 函数(向前传播函数)

同时在 `fashion-mnist` 中， 图片都是灰度图，通道(channel)是 `1`，所以第一卷积层 `in_channels = 1`，如果是三通道，那就是 `3`.

由于 `out_channels = 6`, 所以产生的 `feature map` 就有 `6` 个，下一层的输入也就是同样数量。

CNN Weights - Learnable Parameters In Neural Networks

CNN 中的权重，是不断学习变化的参数。神经网络的学习其实也是可学习参数的学习。

那么这些参数在 `pytorch` 中存放在哪。

In [3]:
network = Network()
network

my Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

In [4]:
print(network.conv1.weight)
print(network.conv1.weight.shape)
# shape = (6, 1, 5, 5) 6 - out_channels 1 - 单通道 5 5 - 卷积核大小 (Number of filters, Depth, Height, Width)`
# Think of this value of 6 here as giving each of the filters some depth

Parameter containing:
tensor([[[[-0.1911,  0.1132,  0.0521, -0.0519, -0.1630],
          [ 0.0017, -0.0731, -0.1816,  0.1704,  0.0231],
          [-0.1792,  0.1042, -0.0264,  0.1664,  0.0965],
          [-0.1085, -0.1906,  0.1135,  0.0865, -0.1134],
          [-0.0709,  0.0220, -0.1816,  0.0121, -0.1762]]],


        [[[ 0.0832, -0.1990, -0.0035, -0.1684, -0.0127],
          [ 0.0345,  0.0486,  0.0023, -0.0649, -0.0712],
          [ 0.1051,  0.1806,  0.0459, -0.0600, -0.1899],
          [ 0.0683,  0.1851,  0.1463, -0.1828,  0.0793],
          [ 0.0702, -0.1815, -0.0468,  0.1229, -0.1092]]],


        [[[ 0.1425,  0.1144, -0.1758, -0.0223, -0.0859],
          [-0.1328,  0.0022,  0.1742,  0.1733,  0.1452],
          [ 0.0505, -0.1239,  0.1721, -0.1935,  0.0584],
          [ 0.1759,  0.0215,  0.0017, -0.1775, -0.0105],
          [-0.1166,  0.1672, -0.0806, -0.0410, -0.1445]]],


        [[[ 0.1080,  0.0560,  0.1909,  0.1307,  0.1864],
          [ 0.0658, -0.0558,  0.1964,  0.0633, -0.1071

PyTorch Parameter Class

用于跟踪神经网络中参数的变化的一个类。The Parameter class extends the tensor class。

nn.Module 会搜索成员变量是否是 `Parameter` 的实例，是就追踪他的变化。

our filter has a depth that matches the number of channels.

在线性模型中， 有一个权重张量。他的 height 是代表预期的输出的特征数，width 是输入的特征数。

In [5]:
in_features   = torch.tensor([1, 2, 3, 4])
weight_matrix = torch.tensor([
    [1, 2, 3, 4],
    [2, 3, 4, 5],
    [3, 4, 5, 6],
])

weight_matrix.matmul(in_features)

tensor([30, 40, 50])

如何访问神经网络的参数？

In [6]:
for param in network.parameters():
    print(param.shape)

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([12, 6, 5, 5])
torch.Size([12])
torch.Size([120, 192])
torch.Size([120])
torch.Size([60, 120])
torch.Size([60])
torch.Size([10, 60])
torch.Size([10])


In [7]:
for name, param in network.named_parameters():
    print(name, '\t\t', param.shape)

conv1.weight 		 torch.Size([6, 1, 5, 5])
conv1.bias 		 torch.Size([6])
conv2.weight 		 torch.Size([12, 6, 5, 5])
conv2.bias 		 torch.Size([12])
fc1.weight 		 torch.Size([120, 192])
fc1.bias 		 torch.Size([120])
fc2.weight 		 torch.Size([60, 120])
fc2.bias 		 torch.Size([60])
out.weight 		 torch.Size([10, 60])
out.bias 		 torch.Size([10])


# 线性模型如何工作


线性模型中，有一个权重矩阵。在 `nn.Linear` 创建时就创建了，这个可以看源码。大概是以 `tensor(out_feature, in_feature` 创建的。同时注意，权重矩阵是需要将张量传入 `nn.Parameter` 得到的对象。

```python
self.weight = Parameter(torch.Tensor(out_features, in_features))
```

然后我们可以直接调用这个对象

In [8]:
in_features = torch.tensor([1, 2, 3, 4], dtype = torch.float32)
fc1 = nn.Linear(in_features = 4, out_features = 3, bias = False)
fc1(in_features) # 实现了 __call__ 元方法 

tensor([ 2.8247, -0.3079,  2.0328], grad_fn=<SqueezeBackward3>)

同时我们之后会发现，`forward()` 方法根本不需要我们显示调用，因为在 `nn.Module` 中，已经在 `__call__` 方法中调用了。

```python
def __call__(self, *input, **kwargs):
    for hook in self._forward_pre_hooks.values():
        hook(self, input)
    if torch._C._get_tracing_state():
        result = self._slow_forward(*input, **kwargs)
    else:
        result = self.forward(*input, **kwargs)
    ...
    ...
```

# 前向传播的实现


这个网络结构就是：输入层，2 个卷积层，2 个全连接层，一个输出层

So a network is just a function

在输入全连接层前，输入的张量需要是摊平的。

In [9]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        # 开始搭建网络，一层一层一层剥开我的心
#         self.layer = None
        # out_channels 代表了卷积核的数量
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5) # kernel 卷积核
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5) # out_channels 根据卷积核大小
        # 全连接层，需要 flatten
        self.fc1   = nn.Linear(in_features = 12 * 4 * 4, out_features = 120)
        self.fc2   = nn.Linear(in_features = 120, out_features = 60)
        self.out   = nn.Linear(in_features = 60, out_features = 10) # 最终要 10 类别
        
    def forward(self, t):
#         t = self.layer(t) # layer 都有 __call__ 方法，传入张量后有对应的操作
        # (1) 输入层
        t = t
        # (2) 卷积层
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride = 2)
        # (3) 卷积层
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride = 2) # 池化操作，这个是最大池
        
        # (4) 全连接
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) 全连接
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) 输出层
        t = self.out(t)
        # The softmax function returns a positive probability 
        # for each of the prediction classes, and the probabilities sum to 1.
        #t = F.softmax(t, dim = 1)
        
        
        return t
    # 重写此方法，用于对象的格式化输出
    def __repr__(self):
        return "my " + super().__repr__()

conv1.weight.shape 是 `[6, 1, 5, 5]` 代表了 `[卷积核数量，输入通道，卷积核高，宽]`

虽然卷积核有 6 个，但是在代码中用一个张量就可以了。

# 前向传播解释

Forward propagation is the process of transforming an input tensor to an output tensor.

In [10]:
import torchvision
import torchvision.transforms as transforms

train_set = torchvision.datasets.FashionMNIST(
    root = './data'
    ,train = True
    ,download = True
    ,transform = transforms.Compose([
        transforms.ToTensor()
    ])
)

train_set

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )

In [11]:
sample = next(iter(train_set)) 
image, label = sample
print(image.shape)
print(label)

torch.Size([1, 28, 28])
9


In [12]:
torch.set_grad_enabled(False) 
network = Network()
pred = network(image.unsqueeze(0)) #加入 batch 这个维度
print(pred.shape)
print(pred.argmax(dim = 1))
print(F.softmax(pred, dim =1 ))

torch.Size([1, 10])
tensor([1])
tensor([[0.1036, 0.1122, 0.0897, 0.0899, 0.1097, 0.0959, 0.0966, 0.0911, 0.0993,
         0.1120]])


In [13]:
data_loader = torch.utils.data.DataLoader(
    train_set, batch_size = 10,
)
data_loader

<torch.utils.data.dataloader.DataLoader at 0x24f960ef4e0>

In [18]:
batch = next(iter(data_loader))
images, labels = batch
print(images.shape)
print(labels)

torch.Size([10, 1, 28, 28])
tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5])


In [19]:
preds = network(images)
preds

tensor([[ 0.0302,  0.1091, -0.1142, -0.1120,  0.0870, -0.0477, -0.0403, -0.0987,
         -0.0128,  0.1079],
        [ 0.0232,  0.1072, -0.1060, -0.1141,  0.0910, -0.0543, -0.0390, -0.1154,
         -0.0088,  0.1051],
        [ 0.0216,  0.1122, -0.1137, -0.1137,  0.0830, -0.0516, -0.0430, -0.1031,
         -0.0095,  0.1100],
        [ 0.0236,  0.1094, -0.1166, -0.1153,  0.0853, -0.0498, -0.0439, -0.1025,
         -0.0108,  0.1094],
        [ 0.0278,  0.1089, -0.1245, -0.1145,  0.0883, -0.0501, -0.0401, -0.0967,
         -0.0103,  0.1072],
        [ 0.0283,  0.1058, -0.1097, -0.1135,  0.0877, -0.0568, -0.0423, -0.1094,
         -0.0077,  0.1053],
        [ 0.0265,  0.1070, -0.1085, -0.1134,  0.0869, -0.0551, -0.0382, -0.1094,
         -0.0112,  0.1123],
        [ 0.0304,  0.1024, -0.1141, -0.1191,  0.0925, -0.0587, -0.0411, -0.1050,
         -0.0093,  0.1047],
        [ 0.0278,  0.1025, -0.1116, -0.1086,  0.0926, -0.0580, -0.0355, -0.1027,
         -0.0052,  0.0987],
        [ 0.0297,  

In [20]:
preds.shape

torch.Size([10, 10])

`[batch_size, 各个类别的可能性]`

利用 `argmax` 对第二个轴查找最大可能的 `index`。

In [21]:
preds.argmax(dim = 1 )

tensor([1, 1, 1, 1, 1, 1, 9, 9, 1, 9])

In [22]:
labels

tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5])

In [29]:
print(preds.argmax(dim = 1).eq(labels))
print(preds.argmax(dim = 1).eq(labels).sum()) # 计算 True 的数量
print(preds.argmax(dim = 1).eq(labels).sum().item()) # 计算 True 的数量,并取出标量值

tensor([False, False, False, False, False, False, False, False, False, False])
tensor(0)
0


# 一层一层讲解

## 第一层卷积
操作前
```python
torch.Size([1, 1, 28, 28])
```

经过卷积操作后
```python
torch.Size([1, 6, 24, 24])
```

`batch_size` 依旧是 `1`。The batch_size is fixed as we move through the forward pass.

而这里只是刚弄完卷积操作。然后看一下各个轴长。

`1` 是 batch_size.

`6` 是  out_channels

`24` 因为 stride = 2, height/width = 2, 经过卷积操作，`28 - 5 -1 + 2 = width - kernel_size - stride + 2` 

In [33]:
print(network.conv1.weight.shape)

torch.Size([6, 1, 5, 5])


其实这个层里头的权重张量，就是卷积核们。(还记得每个层都有权重张量吗。

```
The filters are the weight tensors.
```

# CNN Output Size Formula