In [2]:
import torch

# Pytorch基础

## 张量

In [4]:
# 行向量
x = torch.arange(12)
x

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [5]:
x.shape

torch.Size([12])

In [6]:
# 改变形状
X = x.reshape(3, 4)
X

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [7]:
# 全0张量
torch.zeros((2, 3, 4))

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [8]:
# 全1张量
torch.ones((2, 3, 4))

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

In [10]:
# 随机张量，标准正态分布
torch.randn(3, 4, 3)

tensor([[[ 0.3449,  0.0794,  1.0447],
         [ 2.0958, -0.7025,  1.1874],
         [ 0.5229,  0.5692,  0.1305],
         [-0.9977,  1.2755, -0.8895]],

        [[-0.3588,  0.5355, -0.1566],
         [-1.0604,  0.2361,  0.6992],
         [ 0.1355,  1.3135,  0.6317],
         [-2.2999, -1.0423,  0.0731]],

        [[-0.8746, -1.5783, -0.1517],
         [-0.0120, -0.3353, -1.3789],
         [-0.0203, -0.4476,  0.6016],
         [-2.3270,  0.1138,  0.1052]]])

In [12]:
# 自定义张量，外层为0轴，内层为1轴
torch.tensor([[2, 1, 4, 3], 
              [1, 2, 3, 4],
              [4, 3, 2, 1]])

tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])

## 运算

In [16]:
# 按元素操作 elementwise
x = torch.tensor([1, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])
x + y, x / y

(tensor([ 3,  4,  6, 10]), tensor([0.5000, 1.0000, 2.0000, 4.0000]))

In [17]:
# 拼接张量
X = torch.arange(12).reshape(3, 4)
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
torch.cat((X, Y), dim=0)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  1.,  4.,  3.],
        [ 1.,  2.,  3.,  4.],
        [ 4.,  3.,  2.,  1.]])

In [18]:
# 求和
X.sum()

tensor(66)

## 广播机制

In [20]:
a = torch.arange(3).reshape(3, 1)
b = torch.arange(2).reshape(1, 2)
a, b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]))

In [22]:
# 行列不匹配，a复制列，b复制行
a + b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

## 节省内存

In [23]:
# 内存地址
id(Y)

140658216723328

In [24]:
Y = Y + X
id(Y)

140658217418304

In [27]:
# 原地更新
Z = torch.zeros_like(Y)
Z[:] = X + Y

## 转化为其他对象

In [29]:
A = X.numpy()
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [30]:
# 标量转化
a = torch.tensor([3.5])
a.item()

3.5

# 线性代数

In [31]:
# 标量
x = torch.tensor([3.0])
y = torch.tensor([2.0])
x, y

(tensor([3.]), tensor([2.]))

In [32]:
# 矩阵
A = torch.arange(20).reshape(5, 4)
A

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]])

In [34]:
A.T

tensor([[ 0,  4,  8, 12, 16],
        [ 1,  5,  9, 13, 17],
        [ 2,  6, 10, 14, 18],
        [ 3,  7, 11, 15, 19]])

In [36]:
# 张量
A = torch.arange(20).reshape(5, 4)
B = A.clone() # 深拷贝

In [37]:
# 按元素相乘，哈达玛积（Hadamard product）点乘
A * B

tensor([[  0,   1,   4,   9],
        [ 16,  25,  36,  49],
        [ 64,  81, 100, 121],
        [144, 169, 196, 225],
        [256, 289, 324, 361]])

In [40]:
# 求和
x = torch.arange(4)
x, x.sum(), A.sum(axis=0)

(tensor([0, 1, 2, 3]), tensor(6), tensor([40, 45, 50, 55]))

In [46]:
# 平均值
A.float().mean()

tensor(9.5000)

In [52]:
# 向量点积 dot product
x = torch.arange(4, dtype=torch.float32)
y = torch.ones(4, dtype=torch.float32)
torch.dot(x, y)

tensor(6.)

In [64]:
# 矩阵-向量积
A.shape, x.shape, torch.mv(A.float(), x)

(torch.Size([5, 4]), torch.Size([4]), tensor([ 14.,  38.,  62.,  86., 110.]))

In [66]:
# 矩阵-矩阵积
B = torch.ones(4, 3)
torch.mm(A.float(), B.float())

tensor([[ 6.,  6.,  6.],
        [22., 22., 22.],
        [38., 38., 38.],
        [54., 54., 54.],
        [70., 70., 70.]])

In [67]:
# 范数
u = torch.tensor([3.0, -4.0])
torch.norm(u)

tensor(5.)

In [68]:
# 弗罗贝尼乌斯范数（Frobenius norm） 是矩阵元素的平方和的平方根
torch.norm(torch.ones(4, 9))

tensor(6.)

# 自动求导

In [76]:
x = torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

In [77]:
x.requires_grad_(True), x.grad  # 默认为None

(tensor([0., 1., 2., 3.], requires_grad=True), None)

In [78]:
y = 2 * torch.dot(x, x)
y

tensor(28., grad_fn=<MulBackward0>)

In [79]:
y.backward(retain_graph=True)
x.grad

tensor([ 0.,  4.,  8., 12.])

In [80]:
# pytorch 默认为累积梯度，因此需要清除
x.grad.zero_()
y = x.sum()
y.backward(retain_graph=True)
x.grad

tensor([1., 1., 1., 1.])

In [82]:
# 深度学习中我们计算的是批量中每个样本单独计算的偏导数之和
x.grad.zero_()
y = x * x
y.sum().backward()
x.grad  # 即每个样本的偏导数

tensor([0., 2., 4., 6.])

## 分离计算

In [85]:
x.grad.zero_()
y = x * x
u = y.detach()  # 去除y的梯度影响，视为常数
z = u * x
z.sum().backward()
x.grad  # z=u*x的导数为u，即y=x*x

tensor([0., 1., 4., 9.])

In [86]:
# 然后再计算y的导数
x.grad.zero_()
y.sum().backward()
x.grad  # y的导数为2*x

tensor([0., 2., 4., 6.])