In [1]:
import torch
import numpy as np

加：add

减：sub

乘：mul

除：div

# 矩阵加法

In [2]:
a = torch.rand(3, 4)
b = torch.rand(4) # [4] ==> [1, 4] ==> [3, 4]

a + b

tensor([[0.8086, 1.1837, 0.8097, 1.1421],
        [1.1543, 1.0996, 1.0537, 1.3399],
        [1.3184, 0.9018, 0.7067, 0.7804]])

In [3]:
torch.add(a, b)

tensor([[0.8086, 1.1837, 0.8097, 1.1421],
        [1.1543, 1.0996, 1.0537, 1.3399],
        [1.3184, 0.9018, 0.7067, 0.7804]])

In [4]:
torch.all(torch.eq(a - b, torch.sub(a, b)))
# torch.eq(a, b): 看a与b是否相等
# torch.all（...）: 看括号里的是否都为真

tensor(True)

In [5]:
torch.all(torch.eq(a * b, torch.mul(a, b)))

tensor(True)

In [6]:
torch.all(torch.eq(a / b, torch.div(a, b)))

tensor(True)

### 建议直接使用运算符

# 矩阵相乘

* : element wise（相同位置的元素进行相乘）

矩阵相乘：matrix multiplication

注意：.matmul与*不一样

## 矩阵相乘的三种表达形式

1.torch.mm(只适用于二维矩阵，不推荐)

2.torch.matmul

3.@

torch.matmul与@是完全一样的

In [24]:
a = torch.tensor([[3., 3], [3, 3]])
# a = torch.Tensor(2, 2).fill_(3)
a

tensor([[3., 3.],
        [3., 3.]])

In [15]:
b = torch.ones(2, 2)
b

tensor([[1., 1.],
        [1., 1.]])

In [16]:
torch.mm(a, b)

tensor([[6., 6.],
        [6., 6.]])

In [17]:
torch.matmul(a, b)

tensor([[6., 6.],
        [6., 6.]])

In [18]:
a @ b

tensor([[6., 6.],
        [6., 6.]])

In [26]:
# 以一个线性层相加为例
a = torch.rand(4, 784)
x = torch.rand(4, 784) # x是4张被打平了的相片
w = torch.rand(512,784)  # 对x进行降维

(x @ w.t()).shape
# 注意：.t()只适用于二维，高维度的还得用transpose

torch.Size([4, 512])

# 对于>2d的矩阵的运算

In [33]:
a = torch.rand(4, 3, 28, 64)
b = torch.rand(4, 3, 64, 32)

torch.mm(a, b).shape

RuntimeError: self must be a matrix

In [34]:
torch.matmul(a, b).shape  # 只取后面两维进行计算
# 其实就是支持对多个矩阵对并行相乘

torch.Size([4, 3, 28, 32])

In [35]:
b = torch.rand(4, 1, 64, 32)
torch.matmul(a, b).shape

torch.Size([4, 3, 28, 32])

In [36]:
b = torch.rand(4, 64, 32) # [4, 64, 32] ==> [1, 4, 64, 32] ==> [4, 4, 64, 32] 4与3不匹配，不能广播
torch.matmul(a, b).shape

RuntimeError: The size of tensor a (3) must match the size of tensor b (4) at non-singleton dimension 1

# 次方运算(power)

In [38]:
a = torch.full([2, 2], 3)
a.pow(2)

tensor([[9, 9],
        [9, 9]])

In [39]:
a**2

tensor([[9, 9],
        [9, 9]])

In [40]:
aa = a**2
aa.sqrt() # 平方根

tensor([[3., 3.],
        [3., 3.]])

In [41]:
aa.rsqrt() # 平方根的倒数

tensor([[0.3333, 0.3333],
        [0.3333, 0.3333]])

In [42]:
aa**(0.5)

tensor([[3., 3.],
        [3., 3.]])

# 对数运算（exp）

In [43]:
a = torch.exp(torch.ones(2, 2)) # e^1的二维矩阵
a

tensor([[2.7183, 2.7183],
        [2.7183, 2.7183]])

In [44]:
torch.log(a) # loge 1

tensor([[1., 1.],
        [1., 1.]])

# 近似值

In [45]:
a = torch.tensor(3.14)
a.floor(), a.ceil(), a.trunc(), a.frac()
# floor:往下取整
# ceil:往上取整
# trunc:取整数部分
# frac:取小数部分

(tensor(3.), tensor(4.), tensor(3.), tensor(0.1400))

In [46]:
a = torch.tensor(3.499)
a.round() # 四舍五入

tensor(3.)

In [47]:
a = torch.tensor(3.5)
a.round()

tensor(4.)

# clamp（裁剪）

多用于梯度裁剪

梯度弥散：梯度非常小，接近于0

梯度爆炸：梯度非常大（100多）

In [48]:
grad = torch.rand(2, 3) * 15
grad.max() # 找最大值

tensor(14.5829)

In [49]:
grad.median() # 找中间值

tensor(2.0717)

In [50]:
grad.clamp(10) # 小于10的都变为10

tensor([[10.0000, 10.0000, 14.5829],
        [10.0000, 10.0000, 10.0000]])

In [51]:
grad

tensor([[ 0.3026,  0.8506, 14.5829],
        [ 9.5310,  2.0717,  7.2538]])

In [52]:
grad.clamp(0, 10) # 下限为0，上限为10

tensor([[ 0.3026,  0.8506, 10.0000],
        [ 9.5310,  2.0717,  7.2538]])