In [2]:
import torch

# Pytorch基础

## 张量

In [3]:
# 行向量
x = torch.arange(12)
x

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [4]:
x.shape

torch.Size([12])

In [5]:
# 改变形状
X = x.reshape(3, 4)
X

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [6]:
# 全0张量
torch.zeros((2, 3, 4))

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [7]:
# 全1张量
torch.ones((2, 3, 4))

tensor([[[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]],

        [[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]])

In [8]:
# 随机张量，标准正态分布
torch.randn(3, 4, 3)

tensor([[[-0.3340, -0.4388, -0.1683],
         [-2.1561, -0.6048, -1.9206],
         [-0.7382,  1.6729,  0.9507],
         [-0.9718,  0.1321, -0.6283]],

        [[-2.0756,  0.6226,  0.1155],
         [ 1.1870,  0.3528, -0.4045],
         [ 0.8364,  0.8946, -0.1226],
         [-0.3641,  0.4853,  0.9198]],

        [[ 0.6292, -0.2504,  0.0718],
         [-1.5598, -1.8521,  0.2623],
         [-0.1267,  0.1338, -0.8041],
         [-1.3804,  0.0369, -0.7286]]])

In [9]:
# 自定义张量，外层为0轴，内层为1轴
torch.tensor([[2, 1, 4, 3], 
              [1, 2, 3, 4],
              [4, 3, 2, 1]])

tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])

## 运算

In [10]:
# 按元素操作 elementwise
x = torch.tensor([1, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])
x + y, x / y

(tensor([ 3,  4,  6, 10]), tensor([0.5000, 1.0000, 2.0000, 4.0000]))

In [11]:
# 拼接张量
X = torch.arange(12).reshape(3, 4)
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
torch.cat((X, Y), dim=0)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  1.,  4.,  3.],
        [ 1.,  2.,  3.,  4.],
        [ 4.,  3.,  2.,  1.]])

In [12]:
# 求和
X.sum()

tensor(66)

## 广播机制

In [13]:
a = torch.arange(3).reshape(3, 1)
b = torch.arange(2).reshape(1, 2)
a, b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]))

In [14]:
# 行列不匹配，a复制列，b复制行
a + b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

## 节省内存

In [15]:
# 内存地址
id(Y)

140228164484288

In [16]:
Y = Y + X
id(Y)

140228164499072

In [17]:
# 原地更新
Z = torch.zeros_like(Y)
Z[:] = X + Y

## 转化为其他对象

In [18]:
A = X.numpy()
A

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [19]:
# 标量转化
a = torch.tensor([3.5])
a.item()

3.5

# 线性代数

In [20]:
# 标量
x = torch.tensor([3.0])
y = torch.tensor([2.0])
x, y

(tensor([3.]), tensor([2.]))

In [21]:
# 矩阵
A = torch.arange(20).reshape(5, 4)
A

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]])

In [22]:
A.T

tensor([[ 0,  4,  8, 12, 16],
        [ 1,  5,  9, 13, 17],
        [ 2,  6, 10, 14, 18],
        [ 3,  7, 11, 15, 19]])

In [23]:
# 张量
A = torch.arange(20).reshape(5, 4)
B = A.clone() # 深拷贝

In [24]:
# 按元素相乘，哈达玛积（Hadamard product）点乘
A * B

tensor([[  0,   1,   4,   9],
        [ 16,  25,  36,  49],
        [ 64,  81, 100, 121],
        [144, 169, 196, 225],
        [256, 289, 324, 361]])

In [25]:
# 求和
x = torch.arange(4)
x, x.sum(), A.sum(axis=0)

(tensor([0, 1, 2, 3]), tensor(6), tensor([40, 45, 50, 55]))

In [26]:
# 平均值
A.float().mean()

tensor(9.5000)

In [27]:
# 向量点积 dot product
x = torch.arange(4, dtype=torch.float32)
y = torch.ones(4, dtype=torch.float32)
torch.dot(x, y)

tensor(6.)

In [28]:
# 矩阵-向量积
A.shape, x.shape, torch.mv(A.float(), x)

(torch.Size([5, 4]), torch.Size([4]), tensor([ 14.,  38.,  62.,  86., 110.]))

In [29]:
# 矩阵-矩阵积
B = torch.ones(4, 3)
torch.mm(A.float(), B.float())

tensor([[ 6.,  6.,  6.],
        [22., 22., 22.],
        [38., 38., 38.],
        [54., 54., 54.],
        [70., 70., 70.]])

In [30]:
# 范数
u = torch.tensor([3.0, -4.0])
torch.norm(u)

tensor(5.)

In [31]:
# 弗罗贝尼乌斯范数（Frobenius norm） 是矩阵元素的平方和的平方根
torch.norm(torch.ones(4, 9))

tensor(6.)

# 自动求导

In [32]:
x = torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

In [33]:
x.requires_grad_(True), x.grad  # 默认为None

(tensor([0., 1., 2., 3.], requires_grad=True), None)

In [34]:
y = 2 * torch.dot(x, x)
y

tensor(28., grad_fn=<MulBackward0>)

In [35]:
y.backward(retain_graph=True)
x.grad

tensor([ 0.,  4.,  8., 12.])

In [36]:
# pytorch 默认为累积梯度，因此需要清除
x.grad.zero_()
y = x.sum()
y.backward(retain_graph=True)
x.grad

tensor([1., 1., 1., 1.])

In [37]:
# 深度学习中我们计算的是批量中每个样本单独计算的偏导数之和
x.grad.zero_()
y = x * x
y.sum().backward()
x.grad  # 即每个样本的偏导数

tensor([0., 2., 4., 6.])

## 分离计算

In [38]:
x.grad.zero_()
y = x * x
u = y.detach()  # 去除y的梯度影响，视为常数
z = u * x
z.sum().backward()
x.grad  # z=u*x的导数为u，即y=x*x

tensor([0., 1., 4., 9.])

In [39]:
# 然后再计算y的导数
x.grad.zero_()
y.sum().backward()
x.grad  # y的导数为2*x

tensor([0., 2., 4., 6.])

# 概率

In [40]:
from torch.distributions import multinomial

In [43]:
fair_probs = torch.ones([6]) / 6
multinomial.Multinomial(1, fair_probs).sample() # 抽样1次

tensor([0., 0., 1., 0., 0., 0.])

In [45]:
multinomial.Multinomial(1000, fair_probs).sample()  # 抽样1000次

tensor([165., 177., 179., 175., 157., 147.])