主要介绍nn模块

# 5.3.1 nn.Conv1d

In [1]:
import torch
from torch import nn

conv1 = nn.Conv1d(in_channels=256, out_channels=100, kernel_size=3 ,stride=1, padding=0) # define a 1 dim conv
input = torch.randn(32, 35, 256) # define input feature tensor (batch_size, MaxLength, feature dimension)
input = input.permute(0, 2, 1) # switch the dimension of tensor to (batch_size, feature dimension, MaxLength) 开始运算前需要将L换到最后一个维度让kernel进行卷积
out = conv1(input) #进行一维卷积操作，输出特征图张量形状为[batch_size, out_channels, (L + 2 * padding - kernel_size) / stride + 1]
print(out.shape)

torch.Size([32, 100, 33])


In [2]:
# 打印所有参数当前值
print(list(conv1.parameters()))

[Parameter containing:
tensor([[[ 0.0079, -0.0332,  0.0343],
         [-0.0286, -0.0011, -0.0111],
         [ 0.0156,  0.0100,  0.0314],
         ...,
         [ 0.0304, -0.0355,  0.0035],
         [-0.0308,  0.0218,  0.0012],
         [-0.0290, -0.0116, -0.0175]],

        [[ 0.0227, -0.0354, -0.0066],
         [ 0.0249,  0.0147, -0.0351],
         [-0.0139, -0.0212,  0.0295],
         ...,
         [ 0.0016,  0.0317,  0.0038],
         [-0.0248, -0.0359, -0.0059],
         [-0.0251,  0.0065, -0.0181]],

        [[-0.0088,  0.0324, -0.0150],
         [-0.0157,  0.0344,  0.0255],
         [ 0.0317,  0.0332, -0.0192],
         ...,
         [ 0.0311,  0.0292,  0.0238],
         [-0.0346, -0.0097, -0.0293],
         [ 0.0227,  0.0093, -0.0051]],

        ...,

        [[-0.0266,  0.0141, -0.0359],
         [-0.0345,  0.0206, -0.0151],
         [-0.0309, -0.0004, -0.0119],
         ...,
         [-0.0185,  0.0020, -0.0111],
         [-0.0341,  0.0107,  0.0256],
         [ 0.0263,  0.0335,

# nn.Conv2d

卷积核从左到右，从上到下对图像进行卷积操作

二维卷积对宽和高的卷积结果形状都为[batch_size, out_channels, (L + 2 * padding - kernel_size) / stride + 1]，其中L为矩阵数据的宽或高，stride都为1，padding默认为0，所以输出张量横向和纵向的维度均为(64+2*0-3)/1 + 1 = 62

In [3]:
x = torch.randn(10, 16, 64, 64) # (batch_size, channel, height, width)
m = nn.Conv2d(16, 32, (3, 3), (1, 1)) # in_channel, out_channel, kernel_size, stride
y = m(x)
print(y.shape)

torch.Size([10, 32, 62, 62])


In [4]:
print(list(m.parameters()))

[Parameter containing:
tensor([[[[ 0.0312,  0.0599,  0.0217],
          [ 0.0624,  0.0422,  0.0084],
          [ 0.0078,  0.0817, -0.0359]],

         [[-0.0771, -0.0167,  0.0064],
          [-0.0540,  0.0435,  0.0106],
          [ 0.0257, -0.0201,  0.0173]],

         [[ 0.0419,  0.0138, -0.0548],
          [ 0.0191,  0.0438,  0.0200],
          [ 0.0299, -0.0613, -0.0237]],

         ...,

         [[-0.0149, -0.0584,  0.0164],
          [-0.0688,  0.0410,  0.0627],
          [-0.0152,  0.0444,  0.0588]],

         [[-0.0189,  0.0058, -0.0275],
          [-0.0404,  0.0725, -0.0242],
          [ 0.0479, -0.0612, -0.0368]],

         [[ 0.0452, -0.0783,  0.0655],
          [ 0.0196,  0.0742,  0.0597],
          [ 0.0483,  0.0034, -0.0283]]],


        [[[-0.0522, -0.0470,  0.0036],
          [ 0.0775,  0.0184,  0.0620],
          [ 0.0809,  0.0075,  0.0564]],

         [[ 0.0207, -0.0596,  0.0609],
          [ 0.0455,  0.0793, -0.0639],
          [ 0.0288, -0.0567, -0.0696]],

        

# 全连接 nn.Linear

nn.Linear(in_features, out_features, bias=True) 

参数in_features表示输入维度的大小，out_features表示输出维度的大小，bias表示是否带偏置，默认是带偏置的。

In [5]:
connected_layer = nn.Linear(in_features = 64*64*3, out_features = 1)
input = torch.randn(10, 3, 64, 64)
input = input.view(10, 64*64*3) # 用于改变张量的形状，也可以用input.reshape((10, 64*64*3))
output = connected_layer(input)
print(output.shape)

torch.Size([10, 1])


In [10]:
print(list(connected_layer.parameters()))

[Parameter containing:
tensor([[-0.0048, -0.0056,  0.0074,  ..., -0.0005,  0.0024, -0.0065]],
       requires_grad=True), Parameter containing:
tensor([-0.0084], requires_grad=True)]


# 平坦化类 nn.Flatten

input = input.view(10, 64* 64*3)语句实现了对张量形状的调整。实际上，nn模块还提供了Flatten类，也可以直接把指定的连续几维数据展平为连续的一维数据，默认从第1维到最后一维进行平坦化，第0维常表示batch_size，因此不进行展平。

In [9]:
# 上面的view()也可以写成

Flatten = nn.Flatten() #实例化
input = Flatten(input)

nn.Flatten作为一种操作，可以放到顺序化容器（nn.Sequential）中，更具有通用性。

## 非线性激活函数

用来对模型的输入和输出构建复杂的映射。例如ReLU、Softmax、Sigmoid、Tanh、LogSigmoid、LogSoftmax等。激活函数常用于在线性变换后，通过加入非线性变换使得模型能进行更复杂的表示。以nn.ReLU为例。

In [11]:
nn.ReLU(inplace = False)

ReLU()

In [13]:
# 示例：线性层引入非线性层
input = torch.randn(4, 3, 64, 64)
Flatten = nn.Flatten() # Instantiate
flat_image = Flatten(input)
layer1 = nn.Linear(in_features=64*64*3, out_features=5)
hidden1 = layer1(flat_image)
print(hidden1.size())
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

torch.Size([4, 5])
Before ReLU: tensor([[ 0.0543,  0.8503, -0.2232,  0.6797,  0.7454],
        [-0.5675,  0.5837,  0.0578, -0.2704,  0.0245],
        [ 0.4450,  1.0845,  0.7304, -0.3484,  0.3928],
        [-0.2471, -0.0282, -0.1664, -0.2699,  0.0721]],
       grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0543, 0.8503, 0.0000, 0.6797, 0.7454],
        [0.0000, 0.5837, 0.0578, 0.0000, 0.0245],
        [0.4450, 1.0845, 0.7304, 0.0000, 0.3928],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0721]], grad_fn=<ReluBackward0>)


可见负数都变成0了

# 顺序化容器 nn.Sequential

## （1）定义时直接加入模块

In [14]:
input = torch.randn(4, 3, 64, 64)
net = nn.Sequential(
    nn.Conv2d(3, 32, (3,3), (1,1)),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(32*62*62, 2),
)
output = net(input)
print(f"output: {output}")
print(net)

output: tensor([[ 0.1262,  0.1395],
        [-0.1480, -0.0952],
        [ 0.0540, -0.2990],
        [ 0.5258,  0.2561]], grad_fn=<AddmmBackward0>)
Sequential(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (1): ReLU()
  (2): Flatten(start_dim=1, end_dim=-1)
  (3): Linear(in_features=123008, out_features=2, bias=True)
)


## （2）先定义对象，后加入模块

In [16]:
# add_module方法
net = nn.Sequential()
net.add_module('conv1', nn.Conv2d(16, 32, (3,3), (1,1)))
net.add_module('relu', nn.ReLU())
net.add_module('flatten', nn.Flatten())
net.add_module('linear', nn.Linear(32*62*62, 1))

## （3）定义时传入有序字典作为参数

In [17]:
from collections import OrderedDict
net = nn.Sequential(OrderedDict([
    ('conv1', nn.Conv2d(16, 32, (3,3), (1,1))),
    ('relu', nn.ReLU()),
    ('flattern', nn.Flatten()),
    ('linear', nn.Linear(32*62*62, 1)),
]))

该方法将有序字典作为参数传入，各个神经网络模块作为有序字典的元素。

# Loss 损失函数

## L1

In [18]:
import torch.nn as nn
import torch
loss = nn.L1Loss()
predict_value = torch.randn(1, 23)
target = torch.randn(1, 23)
output = loss(predict_value, target)
print(output)

tensor(1.2593)


## CrossEntropyLoss

In [19]:
import torch.nn as nn
import torch

torch.manual_seed(0)
p = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5) # 得到每个样本实际类别标签
print(f"p: {p}\ny:{target}")

loss = torch.nn.CrossEntropyLoss()
output = loss(p, target) # loss
print(f"loss: {output}")

p: tensor([[ 1.5410, -0.2934, -2.1788,  0.5684, -1.0845],
        [-1.3986,  0.4033,  0.8380, -0.7193, -0.4033],
        [-0.5966,  0.1820, -0.8567,  1.1006, -1.0712]], requires_grad=True)
y:tensor([3, 0, 0])
loss: 2.272947072982788
