In [1]:
# 用 nn 来建立神经网络。
import torch
import torch.nn as nn
import torch.nn.functional as F
#pytorch 的 network 与 layer 都用 nn.Module 拓展

前向传播·

every PyTorch nn.Module has a forward() method

# 用 pytorch 实现一个神经网络
1. Extend the nn.Module base class.
2. Define layers as class attributes.
3. Implement the forward() method.im


In [2]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        # 开始搭建网络，一层一层一层剥开我的心
#         self.layer = None
        # out_channels 代表了卷积核的数量
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5) # kernel 卷积核
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5) # out_channels 根据卷积核大小
        # 全连接层，需要 flatten
        self.fc1   = nn.Linear(in_features = 12 * 4 * 4, out_features = 120)
        self.fc2   = nn.Linear(in_features = 120, out_features = 60)
        self.out   = nn.Linear(in_features = 60, out_features = 10) # 最终要 10 类别
        
    def forward(self, t):
#         t = self.layer(t) # layer 都有 __call__ 方法，传入张量后有对应的操作
        return t
    # 重写此方法，用于对象的格式化输出
    def __repr__(self):
        return "my " + super().__repr__()

每一个层都有权重张量和待重写的 `forward` 函数(向前传播函数)

同时在 `fashion-mnist` 中， 图片都是灰度图，通道(channel)是 `1`，所以第一卷积层 `in_channels = 1`，如果是三通道，那就是 `3`.

由于 `out_channels = 6`, 所以产生的 `feature map` 就有 `6` 个，下一层的输入也就是同样数量。

CNN Weights - Learnable Parameters In Neural Networks

CNN 中的权重，是不断学习变化的参数。神经网络的学习其实也是可学习参数的学习。

那么这些参数在 `pytorch` 中存放在哪。

In [3]:
network = Network()
network

my Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)

In [4]:
print(network.conv1.weight)
print(network.conv1.weight.shape)
# shape = (6, 1, 5, 5) 6 - out_channels 1 - 单通道 5 5 - 卷积核大小 (Number of filters, Depth, Height, Width)`
# Think of this value of 6 here as giving each of the filters some depth

Parameter containing:
tensor([[[[-0.1968,  0.1354,  0.1235, -0.1097, -0.1329],
          [-0.0807, -0.0398,  0.1980, -0.0680, -0.1397],
          [-0.1877, -0.1606,  0.0424, -0.0468,  0.1424],
          [ 0.1185,  0.1408, -0.0512, -0.0086, -0.1582],
          [-0.1598,  0.0316, -0.1682,  0.1693, -0.1927]]],


        [[[ 0.1234, -0.0658, -0.1321,  0.1996,  0.0850],
          [ 0.0431, -0.0631,  0.1728, -0.1278, -0.0669],
          [-0.1900, -0.1297,  0.1975,  0.0087, -0.0751],
          [ 0.1848,  0.0414, -0.0640,  0.1636,  0.1566],
          [ 0.0964,  0.0875, -0.0754,  0.0454,  0.1420]]],


        [[[ 0.1297, -0.1086, -0.1922, -0.0826, -0.1018],
          [ 0.1822, -0.1366,  0.0171,  0.1331, -0.1064],
          [ 0.0989, -0.1914,  0.0126,  0.1144,  0.1912],
          [ 0.0798,  0.1088, -0.0054,  0.1352,  0.0976],
          [-0.0578,  0.0412,  0.1312,  0.0756,  0.0594]]],


        [[[-0.1784, -0.0966, -0.0325,  0.0220, -0.0628],
          [-0.1778,  0.0267,  0.0457, -0.0408, -0.1348

PyTorch Parameter Class

用于跟踪神经网络中参数的变化的一个类。The Parameter class extends the tensor class。

nn.Module 会搜索成员变量是否是 `Parameter` 的实例，是就追踪他的变化。

our filter has a depth that matches the number of channels.

在线性模型中， 有一个权重张量。他的 height 是代表预期的输出的特征数，width 是输入的特征数。

In [5]:
in_features   = torch.tensor([1, 2, 3, 4])
weight_matrix = torch.tensor([
    [1, 2, 3, 4],
    [2, 3, 4, 5],
    [3, 4, 5, 6],
])

weight_matrix.matmul(in_features)

tensor([30, 40, 50])

如何访问神经网络的参数？

In [7]:
for param in network.parameters():
    print(param.shape)

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([12, 6, 5, 5])
torch.Size([12])
torch.Size([120, 192])
torch.Size([120])
torch.Size([60, 120])
torch.Size([60])
torch.Size([10, 60])
torch.Size([10])


In [8]:
for name, param in network.named_parameters():
    print(name, '\t\t', param.shape)

conv1.weight 		 torch.Size([6, 1, 5, 5])
conv1.bias 		 torch.Size([6])
conv2.weight 		 torch.Size([12, 6, 5, 5])
conv2.bias 		 torch.Size([12])
fc1.weight 		 torch.Size([120, 192])
fc1.bias 		 torch.Size([120])
fc2.weight 		 torch.Size([60, 120])
fc2.bias 		 torch.Size([60])
out.weight 		 torch.Size([10, 60])
out.bias 		 torch.Size([10])


# 线性模型如何工作


线性模型中，有一个权重矩阵。在 `nn.Linear` 创建时就创建了，这个可以看源码。大概是以 `tensor(out_feature, in_feature` 创建的。同时注意，权重矩阵是需要将张量传入 `nn.Parameter` 得到的对象。

```python
self.weight = Parameter(torch.Tensor(out_features, in_features))
```

然后我们可以直接调用这个对象

In [10]:
in_features = torch.tensor([1, 2, 3, 4], dtype = torch.float32)
fc1 = nn.Linear(in_features = 4, out_features = 3, bias = False)
fc1(in_features) # 实现了 __call__ 元方法 

tensor([ 2.5112, -2.1990,  2.7608], grad_fn=<SqueezeBackward3>)

同时我们之后会发现，`forward()` 方法根本不需要我们显示调用，因为在 `nn.Module` 中，已经在 `__call__` 方法中调用了。

```python
def __call__(self, *input, **kwargs):
    for hook in self._forward_pre_hooks.values():
        hook(self, input)
    if torch._C._get_tracing_state():
        result = self._slow_forward(*input, **kwargs)
    else:
        result = self.forward(*input, **kwargs)
    ...
    ...
```

# 前向传播的实现


这个网络结构就是：输入层，2 个卷积层，2 个全连接层，一个输出层

So a network is just a function

在输入全连接层前，输入的张量需要是摊平的。

In [15]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        # 开始搭建网络，一层一层一层剥开我的心
#         self.layer = None
        # out_channels 代表了卷积核的数量
        self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5) # kernel 卷积核
        self.conv2 = nn.Conv2d(in_channels = 6, out_channels = 12, kernel_size = 5) # out_channels 根据卷积核大小
        # 全连接层，需要 flatten
        self.fc1   = nn.Linear(in_features = 12 * 4 * 4, out_features = 120)
        self.fc2   = nn.Linear(in_features = 120, out_features = 60)
        self.out   = nn.Linear(in_features = 60, out_features = 10) # 最终要 10 类别
        
    def forward(self, t):
#         t = self.layer(t) # layer 都有 __call__ 方法，传入张量后有对应的操作
        # (1) 输入层
        t = t
        # (2) 卷积层
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride = 2)
        # (3) 卷积层
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size = 2, stride = 2) # 池化操作，这个是最大池
        
        # (4) 全连接
        t = t.reshape(-1, 12 * 4 * 4)
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) 全连接
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) 输出层
        t = self.out(t)
        # The softmax function returns a positive probability 
        # for each of the prediction classes, and the probabilities sum to 1.
        #t = F.softmax(t, dim = 1)
        
        
        return t
    # 重写此方法，用于对象的格式化输出
    def __repr__(self):
        return "my " + super().__repr__()

# 前向传播解释

Forward propagation is the process of transforming an input tensor to an output tensor.

In [17]:
import torchvision
import torchvision.transforms as transforms

train_set = torchvision.datasets.FashionMNIST(
    root = './data'
    ,train = True
    ,download = True
    ,transform = transforms.Compose([
        transforms.ToTensor()
    ])
)

train_set

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )

In [28]:
sample = next(iter(train_set)) 
image, label = sample
print(image.shape)
print(label)

torch.Size([1, 28, 28])
9


In [34]:
network = Network()
pred = network(image.unsqueeze(0))
pred.shape
pred.argmax(dim = 1)

tensor([1])