refer to: 
- [PyTorch API](https://pytorch.org/docs/stable/torch.html)
- [《动手学深度学习》第二版](https://zh.d2l.ai/)

todo: 
- [线性代数](https://d2l.ai/chapter_appendix-mathematics-for-deep-learning/geometry-linear-algebraic-ops.html)
- [随机变量](https://d2l.ai/chapter_appendix-mathematics-for-deep-learning/random-variables.html)

In [3]:
import torch
import numpy

# 数学运算

## torch.add 加法
[torch.add(input, other, \*, alpha=1, out=None)](https://pytorch.org/docs/stable/generated/torch.add.html)  
$out_i = input_i + alpha \times other_i$

In [66]:
tensorX = torch.arange(1, 7).reshape(2, 3)
tensorY = torch.ones_like(tensorX)
print(tensorX)
print(tensorY)

tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([[1, 1, 1],
        [1, 1, 1]])


In [67]:
torch.add(tensorX, tensorY, alpha=2)

tensor([[3, 4, 5],
        [6, 7, 8]])

### X数值不变, 内存不变, 开辟新的内存地址存储结果

In [None]:
"""
tensorZ = tensorX + tensorY
tensorZ = torch.add(tensorX, tensorY)
tensorZ = tensorX.add(tensorY)
"""

In [68]:
tensorX = torch.arange(1, 7).reshape(2, 3)
tensorY = torch.ones_like(tensorX)
print(tensorX)
print(tensorY)

tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([[1, 1, 1],
        [1, 1, 1]])


In [69]:
print("before add: X =\n", tensorX, "\nid=", id(tensorX))
tensorZ = tensorX + tensorY
print("\nafter add: X =\n", tensorX, "\nid=", id(tensorX))
print('\nafter add: Z =\n', tensorZ, "\nid=", id(tensorZ))

before add: X =
 tensor([[1, 2, 3],
        [4, 5, 6]]) 
id= 140415026638576

after add: X =
 tensor([[1, 2, 3],
        [4, 5, 6]]) 
id= 140415026638576

after add: Z =
 tensor([[2, 3, 4],
        [5, 6, 7]]) 
id= 140415026230992


In [70]:
print("before add: X =\n", tensorX, "\nid=", id(tensorX))
tensorZ = torch.add(tensorX, tensorY)
print("\nafter add: X =\n", tensorX, "\nid=", id(tensorX))
print('\nafter add: Z =\n', tensorZ, "\nid=", id(tensorZ))

before add: X =
 tensor([[1, 2, 3],
        [4, 5, 6]]) 
id= 140415026638576

after add: X =
 tensor([[1, 2, 3],
        [4, 5, 6]]) 
id= 140415026638576

after add: Z =
 tensor([[2, 3, 4],
        [5, 6, 7]]) 
id= 140415026377488


In [71]:
print("before add: X =\n", tensorX, "\nid=", id(tensorX))
tensorZ = tensorX.add(tensorY)
print("\nafter add: X =\n", tensorX, "\nid=", id(tensorX))
print('\nafter add: Z =\n', tensorZ, "\nid=", id(tensorZ))

before add: X =
 tensor([[1, 2, 3],
        [4, 5, 6]]) 
id= 140415026638576

after add: X =
 tensor([[1, 2, 3],
        [4, 5, 6]]) 
id= 140415026638576

after add: Z =
 tensor([[2, 3, 4],
        [5, 6, 7]]) 
id= 140415026616576


### X数值改变, 内存改变(不推荐)
1. 首先，我们不想总是不必要地分配内存。在机器学习中，我们可能有数百兆的参数，并且在一秒内多次更新所有参数。通常情况下，我们希望原地执行这些更新;
2. 如果我们不原地更新，其他引用仍然会指向旧的内存位置，这样我们的某些代码可能会无意中引用旧的参数

In [None]:
"""
tensorX = tensorX + tensorY
"""

In [72]:
tensorX = torch.arange(1, 7).reshape(2, 3)
tensorY = torch.ones_like(tensorX)
print(tensorX)
print(tensorY)

tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([[1, 1, 1],
        [1, 1, 1]])


In [73]:
print("before add: X =\n", tensorX, "\nid=", id(tensorX))
tensorX = tensorX + tensorY
print("\nafter add: X =\n", tensorX, "\nid=", id(tensorX))

before add: X =
 tensor([[1, 2, 3],
        [4, 5, 6]]) 
id= 140415026637776

after add: X =
 tensor([[2, 3, 4],
        [5, 6, 7]]) 
id= 140415026230592


### X数值改变, 内存不变, 节约内存开销(推荐)

In [None]:
"""
tensorX += tensorY
tensorX[:] = tensorX + tensorY
tensorX.add_(tensorY)
torch.add(tensorX, tensorY, out=tensorX)
"""

In [74]:
tensorX = torch.arange(1, 7).reshape(2, 3)
tensorY = torch.ones_like(tensorX)
print(tensorX)
print(tensorY)

tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([[1, 1, 1],
        [1, 1, 1]])


In [78]:
print("before add: X =\n", tensorX, "\nid=", id(tensorX))
tensorX += tensorY
print("\nafter add: X =\n", tensorX, "\nid=", id(tensorX))

before add: X =
 tensor([[4, 5, 6],
        [7, 8, 9]]) 
id= 140415026638736

after add: X =
 tensor([[ 5,  6,  7],
        [ 8,  9, 10]]) 
id= 140415026638736


In [75]:
print("before add: X =\n", tensorX, "\nid=", id(tensorX))
tensorX[:] = tensorX + tensorY
print("\nafter add: X =\n", tensorX, "\nid=", id(tensorX))

before add: X =
 tensor([[1, 2, 3],
        [4, 5, 6]]) 
id= 140415026638736

after add: X =
 tensor([[2, 3, 4],
        [5, 6, 7]]) 
id= 140415026638736


In [76]:
print("before add: X =\n", tensorX, "\nid=", id(tensorX))
torch.add(tensorX, tensorY, out=tensorX)
print("\nafter add: X =\n", tensorX, "\nid=", id(tensorX))

before add: X =
 tensor([[2, 3, 4],
        [5, 6, 7]]) 
id= 140415026638736

after add: X =
 tensor([[3, 4, 5],
        [6, 7, 8]]) 
id= 140415026638736


In [77]:
print("before add: X =\n", tensorX, "\nid=", id(tensorX))
tensorX.add_(tensorY)
print("\nafter add: X =\n", tensorX, "\nid=", id(tensorX))

before add: X =
 tensor([[3, 4, 5],
        [6, 7, 8]]) 
id= 140415026638736

after add: X =
 tensor([[4, 5, 6],
        [7, 8, 9]]) 
id= 140415026638736


### 广播机制(broadcasting mechanism)
todo: https://pytorch.org/docs/stable/notes/broadcasting.html#broadcasting-semantics   

1. 通过适当复制元素来扩展一个或两个数组，以便在转换之后，两个张量具有相同的形状;
2. 对生成的数组执行按元素操作。

In [83]:
print("X:", torch.arange(1, 4).reshape(3, 1))
print("Y:", torch.arange(1, 4))
print("X + Y =", torch.tensor([[1], [2], [3]]) + torch.tensor([1, 2, 3]))

X: tensor([[1],
        [2],
        [3]])
Y: tensor([1, 2, 3])
X + Y = tensor([[2, 3, 4],
        [3, 4, 5],
        [4, 5, 6]])


In [84]:
torch.equal(torch.tensor([[1], [2], [3]]) + torch.tensor([1, 2, 3]),
            torch.tensor([1, 2, 3]) + torch.tensor([[1], [2], [3]]))

True

## 乘法

### 按元素乘法
i.e.,  element-wise multiplication/Hadamard积/Hadamard product  
$\begin{split}\mathbf{A} \odot \mathbf{B} =
\begin{bmatrix}
    a_{11}  b_{11} & a_{12}  b_{12} & \dots  & a_{1n}  b_{1n} \\
    a_{21}  b_{21} & a_{22}  b_{22} & \dots  & a_{2n}  b_{2n} \\
    \vdots & \vdots & \ddots & \vdots \\
    a_{m1}  b_{m1} & a_{m2}  b_{m2} & \dots  & a_{mn}  b_{mn}
\end{bmatrix}.\end{split}$
#### [torch.mul(input, other, *, out=None)](https://pytorch.org/docs/stable/generated/torch.mul.html) $\iff$ [torch.multiply(input, other, *, out=None)](https://pytorch.org/docs/stable/generated/torch.multiply.html)

1. other is int

In [30]:
tensor_1 = torch.arange(1, 7).reshape(2, 3)
print(tensor_1)

tensor([[1, 2, 3],
        [4, 5, 6]])


In [None]:
"""
tensor_1 * 3
tensor_1.mul(3)
torch.mul(tensor_1, 3)
torch.multiply(tensor_1, 3)
"""

In [32]:
tensor_1 * 3

tensor([[ 3,  6,  9],
        [12, 15, 18]])

In [33]:
tensor_1.mul(3)

tensor([[ 3,  6,  9],
        [12, 15, 18]])

In [34]:
torch.mul(tensor_1, 3)

tensor([[ 3,  6,  9],
        [12, 15, 18]])

In [35]:
torch.multiply(tensor_1, 3)

tensor([[ 3,  6,  9],
        [12, 15, 18]])

2. other is Tensor

In [None]:
"""
tensor_1 * tensor_2
tensor_1.mul(tensor_2)
torch.mul(tensor_1, tensor_2)
torch.multiply(tensor_1, tensor_2)
torch.mul(tensor_1, tensor_2, out=torch.zeros((tensor_1.shape[0], tensor_1.shape[1]), dtype=tensor_1.dtype))
"""

In [254]:
tensor_1 = torch.arange(1, 7).reshape(2, 3)
tensor_2 = torch.arange(6, 0, step=-1).reshape(2, 3)
print(tensor_1)
print(tensor_2)

tensor([[1, 2, 3],
        [4, 5, 6]])
tensor([[6, 5, 4],
        [3, 2, 1]])


In [255]:
tensor_1 * tensor_2

tensor([[ 6, 10, 12],
        [12, 10,  6]])

In [256]:
tensor_1.mul(tensor_2)

tensor([[ 6, 10, 12],
        [12, 10,  6]])

In [257]:
torch.mul(tensor_1, tensor_2)

tensor([[ 6, 10, 12],
        [12, 10,  6]])

In [258]:
torch.multiply(tensor_1, tensor_2)

tensor([[ 6, 10, 12],
        [12, 10,  6]])

In [259]:
torch.mul(tensor_1, tensor_2, out=torch.zeros((tensor_1.shape[0], tensor_1.shape[1]), dtype=tensor_1.dtype))

tensor([[ 6, 10, 12],
        [12, 10,  6]])

#### torch.addcmul
[torch.addcmul(input, tensor1, tensor2, *, value=1, out=None)](https://pytorch.org/docs/stable/generated/torch.addcmul.html)
$$ out_i = input_i + value \times tensor1_i * tensor2_i $$

In [261]:
torch.addcmul(torch.ones_like(tensor_1), tensor_1, tensor_2, value=2)

tensor([[13, 21, 25],
        [25, 21, 13]])

In [262]:
torch.ones_like(tensor_1).addcmul(tensor_1, tensor_2, value=2)

tensor([[13, 21, 25],
        [25, 21, 13]])

#### broadcasting

In [93]:
print("X:", torch.arange(1, 4).reshape(3, 1))
print("Y:", torch.arange(1, 4))
print("X * Y =", torch.tensor([[1], [2], [3]]) * torch.tensor([1, 2, 3]))

X: tensor([[1],
        [2],
        [3]])
Y: tensor([1, 2, 3])
X * Y = tensor([[1, 2, 3],
        [2, 4, 6],
        [3, 6, 9]])


In [94]:
torch.equal(torch.tensor([[1], [2], [3]]) * torch.tensor([1, 2, 3]),
            torch.tensor([1, 2, 3]) * torch.tensor([[1], [2], [3]]))

True

### 向量-向量积/vector(n) × vector(n)
i.e., 点积/Dot Product/内积 = 按元素(elementwise)乘法之和  
$\mathbf{x}^\top \mathbf{y} = \sum_{i=1}^{d} x_i y_i, \mathbf{x},\mathbf{y}\in\mathbb{R}^d$  
#### [torch.dot(input, other, *, out=None)](https://pytorch.org/docs/stable/generated/torch.dot.html)

In [None]:
"""
tensor_1 @ tensor_2
tensor_1.matmul(tensor_2)
torch.matmul(tensor_1, tensor_2)
tensor_1.dot(tensor_2)
torch.dot(tensor_1, tensor_2)
torch.sum(tensor_1 * tensor_2)
torch.sum(torch.mul(tensor_1, tensor_2))
"""

In [107]:
tensor_1 = torch.tensor([1, 2, 3])
tensor_2 = torch.tensor([3, 2, 1])
print("size of tensor_1 =", tensor_1.size())
print("size of tensor_2 =", tensor_2.size())
print("size of tensor_1 @ tensor_2 =", torch.matmul(tensor_1, tensor_2).size())

size of tensor_1 = torch.Size([3])
size of tensor_2 = torch.Size([3])
size of tensor_1 @ tensor_2 = torch.Size([])


In [102]:
tensor_1 @ tensor_2

tensor(10)

In [103]:
tensor_1.matmul(tensor_2)

tensor(10)

In [104]:
torch.matmul(tensor_1, tensor_2)

tensor(10)

In [68]:
tensor_1.dot(tensor_2)

tensor(10)

In [67]:
torch.dot(tensor_1, tensor_2)

tensor(10)

In [60]:
torch.sum(tensor_1 * tensor_2)

tensor(10)

In [61]:
torch.sum(torch.mul(tensor_1, tensor_2))

tensor(10)

### 矩阵-向量积/matrix(m * n) × vector(n)
i.e., matrix-vector product  
$\begin{split}\mathbf{A}\mathbf{x}
= \begin{bmatrix}
\mathbf{a}^\top_{1} \\
\mathbf{a}^\top_{2} \\
\vdots \\
\mathbf{a}^\top_m \\
\end{bmatrix}\mathbf{x}
= \begin{bmatrix}
 \mathbf{a}^\top_{1} \mathbf{x}  \\
 \mathbf{a}^\top_{2} \mathbf{x} \\
\vdots\\
 \mathbf{a}^\top_{m} \mathbf{x}\\
\end{bmatrix}\end{split}, \mathbf{A} \in \mathbb{R}^{m \times n}, \mathbf{x} \in \mathbb{R}^n$

#### [torch.mv(input, vec, *, out=None)](https://pytorch.org/docs/stable/generated/torch.mv.html)
> **note**: This function does not broadcast.

In [None]:
"""
tensor_1 @ tensor_2
tensor_1.matmul(tensor_2)
torch.matmul(tensor_1, tensor_2)
torch.mv(tensor_1, tensor_2)
"""

In [236]:
tensor_1 = torch.arange(20., dtype=torch.float).reshape(5,4)
tensor_2 = torch.tensor([3., 1., 2., 0.])
print("size of tensor_1 =", tensor_1.size())
print("size of tensor_2 =", tensor_2.size())
print("size of tensor_1 @ tensor_2 =", torch.matmul(tensor_1, tensor_2).size())

size of tensor_1 = torch.Size([5, 4])
size of tensor_2 = torch.Size([4])
size of tensor_1 @ tensor_2 = torch.Size([5])


In [237]:
tensor_1 @ tensor_2

tensor([  5.,  29.,  53.,  77., 101.])

In [238]:
tensor_1.matmul(tensor_2)

tensor([  5.,  29.,  53.,  77., 101.])

In [239]:
torch.matmul(tensor_1, tensor_2)

tensor([  5.,  29.,  53.,  77., 101.])

In [240]:
torch.mv(tensor_1, tensor_2)

tensor([  5.,  29.,  53.,  77., 101.])

In [241]:
for ii in range(tensor_1.size(0)):
    print("验证:", tensor_1[ii] @ tensor_2)

验证: tensor(5.)
验证: tensor(29.)
验证: tensor(53.)
验证: tensor(77.)
验证: tensor(101.)


#### [torch.addmv(input, mat, vec, *, beta=1, alpha=1, out=None)](https://pytorch.org/docs/stable/generated/torch.addmv.html)
$out=\beta \cdot input+\alpha (mat@vec)$

In [243]:
torch.ones(tensor_1.size(0))

tensor([1., 1., 1., 1., 1.])

In [244]:
torch.addmv(torch.ones(tensor_1.size(0)), tensor_1, tensor_2, beta=2, alpha=1)

tensor([  7.,  31.,  55.,  79., 103.])

### 矩阵-矩阵乘法/[torch.matmul(input, other, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.matmul.html)
i.e., matrix-matrix multiplication

#### matrix(m * n) × matrix(n * p)
##### [torch.mm(input, mat2, *, out=None)](https://pytorch.org/docs/stable/generated/torch.mm.html)

In [None]:
"""
tensor_1 @ tensor_2
tensor_1.matmul(tensor_2)
torch.matmul(tensor_1, tensor_2)
torch.mm(tensor_1, tensor_2)
"""

In [114]:
tensor_1 = torch.arange(1., 7.).reshape(2, 3)
tensor_2 = tensor_1.T
print("size of tensor_1 =", tensor_1.size())
print("size of tensor_2 =", tensor_2.size())
print("size of tensor_1 @ tensor_2 =", torch.matmul(tensor_1, tensor_2).size())

size of tensor_1 = torch.Size([2, 3])
size of tensor_2 = torch.Size([3, 2])
size of tensor_1 @ tensor_2 = torch.Size([2, 2])


In [115]:
tensor_1 @ tensor_2

tensor([[14., 32.],
        [32., 77.]])

In [116]:
tensor_1.matmul(tensor_1.T)

tensor([[14., 32.],
        [32., 77.]])

In [117]:
torch.matmul(tensor_1, tensor_1.T)

tensor([[14., 32.],
        [32., 77.]])

In [118]:
torch.mm(tensor_1, tensor_1.T)

tensor([[14., 32.],
        [32., 77.]])

##### [torch.addmm(input, mat1, mat2, *, beta=1, alpha=1, out=None)](https://pytorch.org/docs/stable/generated/torch.addmm.html)  
$out=\beta \cdot input + \alpha (mat1_i @ mat2_i)$

In [93]:
torch.addmm(-5 * torch.ones(tensor_1.size(0), tensor_2.size(1)), mat1=tensor_1, mat2=tensor_2, beta=2, alpha=1)

tensor([[ 4., 22.],
        [22., 67.]])

#### batched matrix(b * m * n) × broadcasted vector(n)

In [131]:
tensor_1 = torch.arange(1, 25).reshape(2, 3, 4)
tensor_2 = torch.arange(1, 5)
print("size of tensor_1 =", tensor_1.size())
print("size of tensor_2 =", tensor_2.size())
print("size of tensor_1 @ tensor_2 =", torch.matmul(tensor_1, tensor_2).size())

size of tensor_1 = torch.Size([2, 3, 4])
size of tensor_2 = torch.Size([4])
size of tensor_1 @ tensor_2 = torch.Size([2, 3])


In [132]:
tensor_1 @ tensor_2

tensor([[ 30,  70, 110],
        [150, 190, 230]])

In [133]:
torch.matmul(tensor_1, tensor_2)

tensor([[ 30,  70, 110],
        [150, 190, 230]])

In [134]:
for ii in range(tensor_1.size(0)):
    vec = []
    for jj in range(tensor_1[ii].size(0)):
        vec.append(tensor_1[ii][jj] @ tensor_2)
    print("验证:", vec)

验证: [tensor(30), tensor(70), tensor(110)]
验证: [tensor(150), tensor(190), tensor(230)]


#### batched matrix(b * m * n) × broadcasted matrix(n * p)

In [139]:
tensor_1 = torch.arange(1, 25).reshape(2, 3, 4)
tensor_2 = torch.arange(1, 21).reshape(4, 5)
print("size of tensor_1 =", tensor_1.size())
print("size of tensor_2 =", tensor_2.size())
print("size of tensor_1 @ tensor_2 =", torch.matmul(tensor_1, tensor_2).size())

size of tensor_1 = torch.Size([2, 3, 4])
size of tensor_2 = torch.Size([4, 5])
size of tensor_1 @ tensor_2 = torch.Size([2, 3, 5])


In [140]:
tensor_1 @ tensor_2

tensor([[[ 110,  120,  130,  140,  150],
         [ 246,  272,  298,  324,  350],
         [ 382,  424,  466,  508,  550]],

        [[ 518,  576,  634,  692,  750],
         [ 654,  728,  802,  876,  950],
         [ 790,  880,  970, 1060, 1150]]])

In [141]:
torch.matmul(tensor_1, tensor_2)

tensor([[[ 110,  120,  130,  140,  150],
         [ 246,  272,  298,  324,  350],
         [ 382,  424,  466,  508,  550]],

        [[ 518,  576,  634,  692,  750],
         [ 654,  728,  802,  876,  950],
         [ 790,  880,  970, 1060, 1150]]])

In [142]:
for ii in range(tensor_1.size(0)):
    print("验证:", tensor_1[ii] @ tensor_2)

验证: tensor([[110, 120, 130, 140, 150],
        [246, 272, 298, 324, 350],
        [382, 424, 466, 508, 550]])
验证: tensor([[ 518,  576,  634,  692,  750],
        [ 654,  728,  802,  876,  950],
        [ 790,  880,  970, 1060, 1150]])


#### batched matrix(b * m * n) × batched matrix(b * n * p)
##### torch.bmm
[torch.bmm(input, mat2, *, out=None)](https://pytorch.org/docs/stable/generated/torch.bmm.html)  

In [None]:
"""
tensor_1 @ tensor_2
tensor_1.matmul(tensor_2)
torch.matmul(tensor_1, tensor_2)
tensor_1.bmm(tensor_2)
torch.bmm(tensor_1, tensor_2)
"""

In [171]:
tensor_1 = torch.arange(1., 25.).reshape(2, 3, 4)
tensor_2 = torch.arange(1., 41.).reshape(2, 4, 5)
print("size of tensor_1 =", tensor_1.size())
print("size of tensor_2 =", tensor_2.size())
print("size of tensor_1 @ tensor_2 =", torch.matmul(tensor_1, tensor_2).size())

size of tensor_1 = torch.Size([2, 3, 4])
size of tensor_2 = torch.Size([2, 4, 5])
size of tensor_1 @ tensor_2 = torch.Size([2, 3, 5])


In [172]:
tensor_1 @ tensor_2

tensor([[[ 110.,  120.,  130.,  140.,  150.],
         [ 246.,  272.,  298.,  324.,  350.],
         [ 382.,  424.,  466.,  508.,  550.]],

        [[1678., 1736., 1794., 1852., 1910.],
         [2134., 2208., 2282., 2356., 2430.],
         [2590., 2680., 2770., 2860., 2950.]]])

In [173]:
tensor_1.matmul(tensor_2)

tensor([[[ 110.,  120.,  130.,  140.,  150.],
         [ 246.,  272.,  298.,  324.,  350.],
         [ 382.,  424.,  466.,  508.,  550.]],

        [[1678., 1736., 1794., 1852., 1910.],
         [2134., 2208., 2282., 2356., 2430.],
         [2590., 2680., 2770., 2860., 2950.]]])

In [174]:
torch.matmul(tensor_1, tensor_2)

tensor([[[ 110.,  120.,  130.,  140.,  150.],
         [ 246.,  272.,  298.,  324.,  350.],
         [ 382.,  424.,  466.,  508.,  550.]],

        [[1678., 1736., 1794., 1852., 1910.],
         [2134., 2208., 2282., 2356., 2430.],
         [2590., 2680., 2770., 2860., 2950.]]])

In [175]:
tensor_1.bmm(tensor_2)

tensor([[[ 110.,  120.,  130.,  140.,  150.],
         [ 246.,  272.,  298.,  324.,  350.],
         [ 382.,  424.,  466.,  508.,  550.]],

        [[1678., 1736., 1794., 1852., 1910.],
         [2134., 2208., 2282., 2356., 2430.],
         [2590., 2680., 2770., 2860., 2950.]]])

In [176]:
torch.bmm(tensor_1, tensor_2)

tensor([[[ 110.,  120.,  130.,  140.,  150.],
         [ 246.,  272.,  298.,  324.,  350.],
         [ 382.,  424.,  466.,  508.,  550.]],

        [[1678., 1736., 1794., 1852., 1910.],
         [2134., 2208., 2282., 2356., 2430.],
         [2590., 2680., 2770., 2860., 2950.]]])

In [177]:
for ii in range(tensor_1.size(0)):
    print(tensor_1[ii] @ tensor_2[ii])

tensor([[110., 120., 130., 140., 150.],
        [246., 272., 298., 324., 350.],
        [382., 424., 466., 508., 550.]])
tensor([[1678., 1736., 1794., 1852., 1910.],
        [2134., 2208., 2282., 2356., 2430.],
        [2590., 2680., 2770., 2860., 2950.]])


##### torch.baddbmm
[torch.baddbmm(input, batch1, batch2, *, beta=1, alpha=1, out=None)](https://pytorch.org/docs/stable/generated/torch.baddbmm.html)  
$out_i = \beta \cdot input_i + \alpha(batch1_i @ batch2_i)$

In [188]:
print("size of output =", torch.ones(tensor_1.size(0), tensor_1.size(1), tensor_2.size(2)).size())

size of output = torch.Size([2, 3, 5])


In [189]:
torch.baddbmm(-50 * torch.ones(tensor_1.size(0), tensor_1.size(1), tensor_2.size(2)), tensor_1, tensor_2, beta=2, alpha=1)

tensor([[[  10.,   20.,   30.,   40.,   50.],
         [ 146.,  172.,  198.,  224.,  250.],
         [ 282.,  324.,  366.,  408.,  450.]],

        [[1578., 1636., 1694., 1752., 1810.],
         [2034., 2108., 2182., 2256., 2330.],
         [2490., 2580., 2670., 2760., 2850.]]])

##### torch.addbmm
[torch.addbmm(input, batch1, batch2, *, beta=1, alpha=1, out=None)](https://pytorch.org/docs/stable/generated/torch.addbmm.html)  
$out=\beta \cdot input + \alpha(\sum_{i=1}^{b-1} batch1_i @ batch2_i)$

In [204]:
print("size of output =", torch.ones(tensor_1.size(1), tensor_2.size(2)).size())

size of output = torch.Size([3, 5])


In [205]:
torch.addbmm(-100 * torch.ones(tensor_1.size(1), tensor_2.size(2)), tensor_1, tensor_2, beta=2, alpha=1)

tensor([[1588., 1656., 1724., 1792., 1860.],
        [2180., 2280., 2380., 2480., 2580.],
        [2772., 2904., 3036., 3168., 3300.]])

In [219]:
tensor_3 = torch.zeros(tensor_1.size(1), tensor_2.size(2))
tensor_4 = torch.baddbmm(-50 * torch.ones(tensor_1.size(0), tensor_1.size(1), tensor_2.size(2)), tensor_1, tensor_2, beta=2, alpha=1)
tensor_5 = torch.addbmm(-50 * tensor_4.size(0) * torch.ones(tensor_1.size(1), tensor_2.size(2)), tensor_1, tensor_2, beta=2, alpha=1)
for ii in range(tensor_4.size(0)):
    tensor_3 += tensor_4[ii]
print("验证:", torch.equal(tensor_3, tensor_5))

验证: True


#### broadcasting
##### (j×1×n×n) × (k×n×n) = (j×k×n×n)
if input is a $(j×1×n×n)$ tensor and other is a $(k×n×n)$ tensor, out will be a $(j×k×n×n)$ tensor.

In [220]:
tensor_1 = torch.arange(1, 3 * 4 * 4 + 1).reshape(3, 1, 4, 4)
tensor_2 = torch.arange(1, 2 * 4 * 4 + 1).reshape(2, 4, 4)
print("size of tensor_1 =", tensor_1.size())
print("size of tensor_2 =", tensor_2.size())
print("size of tensor_1 @ tensor_2 =", torch.matmul(tensor_1, tensor_2).size())

size of tensor_1 = torch.Size([3, 1, 4, 4])
size of tensor_2 = torch.Size([2, 4, 4])
size of tensor_1 @ tensor_2 = torch.Size([3, 2, 4, 4])


In [221]:
torch.matmul(tensor_1, tensor_2)

tensor([[[[  90,  100,  110,  120],
          [ 202,  228,  254,  280],
          [ 314,  356,  398,  440],
          [ 426,  484,  542,  600]],

         [[ 250,  260,  270,  280],
          [ 618,  644,  670,  696],
          [ 986, 1028, 1070, 1112],
          [1354, 1412, 1470, 1528]]],


        [[[ 538,  612,  686,  760],
          [ 650,  740,  830,  920],
          [ 762,  868,  974, 1080],
          [ 874,  996, 1118, 1240]],

         [[1722, 1796, 1870, 1944],
          [2090, 2180, 2270, 2360],
          [2458, 2564, 2670, 2776],
          [2826, 2948, 3070, 3192]]],


        [[[ 986, 1124, 1262, 1400],
          [1098, 1252, 1406, 1560],
          [1210, 1380, 1550, 1720],
          [1322, 1508, 1694, 1880]],

         [[3194, 3332, 3470, 3608],
          [3562, 3716, 3870, 4024],
          [3930, 4100, 4270, 4440],
          [4298, 4484, 4670, 4856]]]])

In [222]:
for ii in range(tensor_1.size(0)):
    for jj in range(tensor_2.size(0)):
        print("验证:", tensor_1[ii][0] @ tensor_2[jj])

验证: tensor([[ 90, 100, 110, 120],
        [202, 228, 254, 280],
        [314, 356, 398, 440],
        [426, 484, 542, 600]])
验证: tensor([[ 250,  260,  270,  280],
        [ 618,  644,  670,  696],
        [ 986, 1028, 1070, 1112],
        [1354, 1412, 1470, 1528]])
验证: tensor([[ 538,  612,  686,  760],
        [ 650,  740,  830,  920],
        [ 762,  868,  974, 1080],
        [ 874,  996, 1118, 1240]])
验证: tensor([[1722, 1796, 1870, 1944],
        [2090, 2180, 2270, 2360],
        [2458, 2564, 2670, 2776],
        [2826, 2948, 3070, 3192]])
验证: tensor([[ 986, 1124, 1262, 1400],
        [1098, 1252, 1406, 1560],
        [1210, 1380, 1550, 1720],
        [1322, 1508, 1694, 1880]])
验证: tensor([[3194, 3332, 3470, 3608],
        [3562, 3716, 3870, 4024],
        [3930, 4100, 4270, 4440],
        [4298, 4484, 4670, 4856]])


##### (j×1×n×m) × (k×m×p) = (j×k×n×p)
if input is a $(j×1×n×m)$ tensor and other is a $(k×m×p)$ tensor, out will be a $(j×k×n×p)$ tensor.

In [224]:
tensor_1 = torch.arange(1, 3 * 4 * 5 + 1).reshape(3, 1, 4, 5)
tensor_2 = torch.arange(1, 2 * 5 * 6 + 1).reshape(2, 5, 6)
print("size of tensor_1 =", tensor_1.size())
print("size of tensor_2 =", tensor_2.size())
print("size of tensor_1 @ tensor_2 =", torch.matmul(tensor_1, tensor_2).size())

size of tensor_1 = torch.Size([3, 1, 4, 5])
size of tensor_2 = torch.Size([2, 5, 6])
size of tensor_1 @ tensor_2 = torch.Size([3, 2, 4, 6])


In [225]:
torch.matmul(tensor_1, tensor_2)

tensor([[[[  255,   270,   285,   300,   315,   330],
          [  580,   620,   660,   700,   740,   780],
          [  905,   970,  1035,  1100,  1165,  1230],
          [ 1230,  1320,  1410,  1500,  1590,  1680]],

         [[  705,   720,   735,   750,   765,   780],
          [ 1780,  1820,  1860,  1900,  1940,  1980],
          [ 2855,  2920,  2985,  3050,  3115,  3180],
          [ 3930,  4020,  4110,  4200,  4290,  4380]]],


        [[[ 1555,  1670,  1785,  1900,  2015,  2130],
          [ 1880,  2020,  2160,  2300,  2440,  2580],
          [ 2205,  2370,  2535,  2700,  2865,  3030],
          [ 2530,  2720,  2910,  3100,  3290,  3480]],

         [[ 5005,  5120,  5235,  5350,  5465,  5580],
          [ 6080,  6220,  6360,  6500,  6640,  6780],
          [ 7155,  7320,  7485,  7650,  7815,  7980],
          [ 8230,  8420,  8610,  8800,  8990,  9180]]],


        [[[ 2855,  3070,  3285,  3500,  3715,  3930],
          [ 3180,  3420,  3660,  3900,  4140,  4380],
          [ 3505

In [226]:
for ii in range(tensor_1.size(0)):
    for jj in range(tensor_2.size(0)):
        print("验证:", tensor_1[ii][0] @ tensor_2[jj])

验证: tensor([[ 255,  270,  285,  300,  315,  330],
        [ 580,  620,  660,  700,  740,  780],
        [ 905,  970, 1035, 1100, 1165, 1230],
        [1230, 1320, 1410, 1500, 1590, 1680]])
验证: tensor([[ 705,  720,  735,  750,  765,  780],
        [1780, 1820, 1860, 1900, 1940, 1980],
        [2855, 2920, 2985, 3050, 3115, 3180],
        [3930, 4020, 4110, 4200, 4290, 4380]])
验证: tensor([[1555, 1670, 1785, 1900, 2015, 2130],
        [1880, 2020, 2160, 2300, 2440, 2580],
        [2205, 2370, 2535, 2700, 2865, 3030],
        [2530, 2720, 2910, 3100, 3290, 3480]])
验证: tensor([[5005, 5120, 5235, 5350, 5465, 5580],
        [6080, 6220, 6360, 6500, 6640, 6780],
        [7155, 7320, 7485, 7650, 7815, 7980],
        [8230, 8420, 8610, 8800, 8990, 9180]])
验证: tensor([[2855, 3070, 3285, 3500, 3715, 3930],
        [3180, 3420, 3660, 3900, 4140, 4380],
        [3505, 3770, 4035, 4300, 4565, 4830],
        [3830, 4120, 4410, 4700, 4990, 5280]])
验证: tensor([[ 9305,  9520,  9735,  9950, 10165, 10380],

##### (j×n×m) × (k×m×p) = (j×k×n×p)

In [227]:
tensor_1 = torch.arange(1, 3 * 4 * 5 + 1).reshape(3, 4, 5)
tensor_2 = torch.arange(1, 2 * 5 * 3 + 1).reshape(2, 5, 3)
print("size of tensor_1 =", tensor_1.size())
print("size of tensor_2 =", tensor_2.size())
print("size of tensor_1 @ tensor_2 =", torch.matmul(tensor_1, tensor_2).size())

size of tensor_1 = torch.Size([3, 4, 5])
size of tensor_2 = torch.Size([2, 5, 3])


RuntimeError: The size of tensor a (3) must match the size of tensor b (2) at non-singleton dimension 0

In [228]:
tensor_1 = torch.unsqueeze(torch.arange(1, 3 * 4 * 5 + 1).reshape(3, 4, 5), dim = 1)
tensor_2 = torch.arange(1, 2 * 5 * 3 + 1).reshape(2, 5, 3)
print("size of tensor_1 =", tensor_1.size())
print("size of tensor_2 =", tensor_2.size())
print("size of tensor_1 @ tensor_2 =", torch.matmul(tensor_1, tensor_2).size())

size of tensor_1 = torch.Size([3, 1, 4, 5])
size of tensor_2 = torch.Size([2, 5, 3])
size of tensor_1 @ tensor_2 = torch.Size([3, 2, 4, 3])


## 除法

### torch.div 除法
torch.div(input, other, \*, rounding_mode=None, out=None)  
$\iff$ torch.divide(input, other, *, rounding_mode=None, out=None)
1. rounding_mode=None: 四舍五入 $\iff$ torch.true_divide(dividend, divisor, *, out) 
2. rounding_mode="trunc": 向0取整
3. rounding_mode="floor": 向下取整

* other is int, rounding_mode=None

In [None]:
"""
torch.div(torch.tensor([-4., 1., 4.]), 3)
torch.divide(torch.tensor([-4., 1., 4.]), 3)
torch.true_divide(torch.tensor([-4., 1., 4.]), 3)
torch.tensor([-4., 1., 4.]).div(3)
torch.tensor([-4., 1., 4.]).divide(3)
torch.tensor([-4., 1., 4.]).true_divide(3)
"""

In [312]:
torch.div(torch.tensor([-4., 1., 4.]), 3)

tensor([-1.3333,  0.3333,  1.3333])

In [313]:
torch.divide(torch.tensor([-4., 1., 4.]), 3)

tensor([-1.3333,  0.3333,  1.3333])

In [314]:
torch.true_divide(torch.tensor([-4., 1., 4.]), 3)

tensor([-1.3333,  0.3333,  1.3333])

In [315]:
torch.tensor([-4., 1., 4.]).div(3)

tensor([-1.3333,  0.3333,  1.3333])

In [316]:
torch.tensor([-4., 1., 4.]).divide(3)

tensor([-1.3333,  0.3333,  1.3333])

In [317]:
torch.tensor([-4., 1., 4.]).true_divide(3)

tensor([-1.3333,  0.3333,  1.3333])

* other is int, rounding_mode="trunc"

In [306]:
torch.div(torch.tensor([-4., 1., 4.]), 3, rounding_mode="trunc")

tensor([-1.,  0.,  1.])

* other is int, rounding_mode="floor"

In [307]:
torch.div(torch.tensor([-4., 1., 4.]), 3, rounding_mode="floor")

tensor([-2.,  0.,  1.])

* other is Tensor

In [261]:
tensor_1 = torch.arange(-6, 6).reshape(4, 3)
tensor_2 = torch.ones(3, dtype=torch.int) * 3

In [265]:
torch.div(tensor_1, tensor_2)

tensor([[-2.0000, -1.6667, -1.3333],
        [-1.0000, -0.6667, -0.3333],
        [ 0.0000,  0.3333,  0.6667],
        [ 1.0000,  1.3333,  1.6667]])

In [267]:
for ii in range(tensor_1.size(0)):
    print(tensor_1[ii].div(tensor_2))

tensor([-2.0000, -1.6667, -1.3333])
tensor([-1.0000, -0.6667, -0.3333])
tensor([0.0000, 0.3333, 0.6667])
tensor([1.0000, 1.3333, 1.6667])


### torch.addcdiv
torch.addcdiv(input, tensor1, tensor2, *, value=1, out=None)
$$ out_i = input_i + value \times \frac{tensor1_i}{tensor2_i} $$

In [353]:
torch.addcdiv(input=torch.tensor([4.,5.]),
              tensor1=torch.tensor([3.,4.]),
              tensor2=torch.tensor([2.,3.]),
              value=2)

tensor([7.0000, 7.6667])

In [354]:
torch.tensor([4.,5.]).addcdiv(tensor1=torch.tensor([3.,4.]),
                              tensor2=torch.tensor([2.,3.]),
                              value=2)

tensor([7.0000, 7.6667])

## 绝对值/近似

In [280]:
tensor_1 = torch.tensor([-4.2, -0.1, 0.2, 4.6])

### torch.abs 绝对值
torch.abs(input, \*, out=None)  
$\iff$ torch.absolute(input, \*, out=None)

In [None]:
"""
torch.abs(tensor_1)
tensor_1.abs()
torch.absolute(tensor_1)
tensor_1.absolute()
"""

In [281]:
torch.abs(tensor_1)

tensor([4.2000, 0.1000, 0.2000, 4.6000])

In [282]:
tensor_1.abs()

tensor([4.2000, 0.1000, 0.2000, 4.6000])

In [283]:
torch.absolute(tensor_1)

tensor([4.2000, 0.1000, 0.2000, 4.6000])

In [284]:
tensor_1.absolute()

tensor([4.2000, 0.1000, 0.2000, 4.6000])

### torch.round 四舍五入
torch.round(input, \*, decimals=0, out=None)  

In [319]:
torch.round(tensor_1)

tensor([-4., -0.,  0.,  5.])

>**notice**: 与两个整数等距的值, 四舍五入到最近的偶数值<br>
Values equidistant from two integers are rounded towards the nearest even value 

In [323]:
tensor_2 = torch.arange(-2.5, 3.0, step=1)
print(tensor_2)

tensor([-2.5000, -1.5000, -0.5000,  0.5000,  1.5000,  2.5000])


In [324]:
torch.round(tensor_2)

tensor([-2., -2., -0.,  0.,  2.,  2.])

In [320]:
torch.round(torch.tensor([0.1234567]), decimals=3)

tensor([0.1230])

In [321]:
torch.round(torch.tensor([1200.1234567]), decimals=-3)

tensor([1000.])

### torch.ceil 向上取整
torch.ceil(input, *, out=None)

In [286]:
print(tensor_1)

tensor([-4.2000, -0.1000,  0.2000,  4.6000])


In [285]:
torch.ceil(tensor_1)

tensor([-4., -0.,  1.,  5.])

In [287]:
tensor_1.ceil()

tensor([-4., -0.,  1.,  5.])

### torch.floor 向下取整
torch.floor(input, *, out=None)

In [288]:
torch.floor(tensor_1)

tensor([-5., -1.,  0.,  4.])

In [289]:
tensor_1.floor()

tensor([-5., -1.,  0.,  4.])

### torch.trunc 向0取整
torch.trunc(input, \*, out=None)  
$\iff$ torch.fix(input, \*, out=None)

In [290]:
print(tensor_1)

tensor([-4.2000, -0.1000,  0.2000,  4.6000])


In [None]:
"""
torch.trunc(tensor_1)
tensor_1.trunc()
torch.fix(tensor_1)
tensor_1.fix()
"""

In [291]:
torch.trunc(tensor_1)

tensor([-4., -0.,  0.,  4.])

In [292]:
tensor_1.trunc()

tensor([-4., -0.,  0.,  4.])

In [293]:
torch.fix(tensor_1)

tensor([-4., -0.,  0.,  4.])

In [294]:
tensor_1.fix()

tensor([-4., -0.,  0.,  4.])

## sum

### torch.sum
torch.sum(input, *, dtype=None)  
torch.sum(input, dim, keepdim=False, *, dtype=None)

In [39]:
tensor_1 = torch.arange(1, 2 * 3 * 4 + 1, dtype=torch.float).view(2, 3, 4)
print(tensor_1)

tensor([[[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]],

        [[13., 14., 15., 16.],
         [17., 18., 19., 20.],
         [21., 22., 23., 24.]]])


In [17]:
torch.sum(tensor_1)

tensor(300.)

In [18]:
tensor_1.sum()

tensor(300.)

In [21]:
torch.sum(tensor_1, dim=2)

tensor([[10., 26., 42.],
        [58., 74., 90.]])

In [19]:
tensor_1.sum(axis=2)

tensor([[10., 26., 42.],
        [58., 74., 90.]])

In [23]:
torch.sum(tensor_1, (1, 2)) # 对dim=1和dim=2求和

tensor([ 78., 222.])

In [34]:
tensor_1.sum(axis=(1, 2))

tensor([ 78., 222.])

In [24]:
print("验证:", (1 + 12) * 6, ",", (13 + 24) * 6)

验证: 78 , 222


In [16]:
torch.sum(tensor_1, (2, 0)) # 对dim=0和dim=2求和

tensor([ 68., 100., 132.])

In [25]:
print("验证:", (1 + 2 + 3 + 4) + (13 + 14 + 15 + 16), ",",
      (5 + 6 + 7 + 8) + (17 + 18 + 19 + 20), ",",
      (9 + 10 + 11 + 12) + (21 + 22 + 23 + 24))

验证: 68 , 100 , 132


In [18]:
torch.sum(tensor_1, (1, 2), keepdim=True)

tensor([[[ 78.]],

        [[222.]]])

### torch.nansum
torch.nansum(input, *, dtype=None)  
torch.nansum(input, dim, keepdim=False, *, dtype=None)

In [19]:
tensor_2 = torch.tensor([[1., 2., float('nan'), 4.], [5., float('nan'), 7., 8.]])

In [20]:
torch.nansum(tensor_2)

tensor(27.)

In [21]:
tensor_2.nansum()

tensor(27.)

In [22]:
torch.nansum(tensor_2, dim=(0, 1))

tensor(27.)

In [23]:
torch.sum(tensor_2)

tensor(nan)

In [24]:
torch.nansum(tensor_2, dim=0)

tensor([ 6.,  2.,  7., 12.])

In [25]:
torch.nansum(tensor_2, dim=1)

tensor([ 7., 20.])

### torch.cumsum
torch.cumsum(input, dim, *, dtype=None, out=None)
$$y_i = x_1 + x_2 + x_3 + \cdots + x_i $$

In [40]:
print(tensor_1)

tensor([[[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]],

        [[13., 14., 15., 16.],
         [17., 18., 19., 20.],
         [21., 22., 23., 24.]]])


In [41]:
torch.cumsum(tensor_1, dim=0)

tensor([[[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]],

        [[14., 16., 18., 20.],
         [22., 24., 26., 28.],
         [30., 32., 34., 36.]]])

In [42]:
tensor_1.cumsum(axis=0)

tensor([[[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]],

        [[14., 16., 18., 20.],
         [22., 24., 26., 28.],
         [30., 32., 34., 36.]]])

In [28]:
torch.cumsum(tensor_1, dim=2)

tensor([[[ 1.,  3.,  6., 10.],
         [ 5., 11., 18., 26.],
         [ 9., 19., 30., 42.]],

        [[13., 27., 42., 58.],
         [17., 35., 54., 74.],
         [21., 43., 66., 90.]]])

### torch.logsumexp
torch.logsumexp(input, dim, keepdim=False, *, out=None)
$$logcumsumexp(x)_i = log\sum_j \exp(x_{ij}) $$ 

In [29]:
torch.logsumexp(tensor_1/100, dim=2)

tensor([[1.4114, 1.4514, 1.4914],
        [1.5314, 1.5714, 1.6114]])

In [30]:
import math
print("上式的[0, 0]元素 =", math.log(math.exp(0.0100) + math.exp(0.0200) + math.exp(0.0300) + math.exp(0.0400)))

上式的[0, 0]元素 = 1.4113568602344964


### torch.logcumsumexp
torch.logcumsumexp(input, dim, *, out=None)
$$logcumsumexp(x)_{ij} = log\sum_{j=0}^i \exp(x_{ij}) $$ 

In [31]:
torch.logcumsumexp(tensor_1/100, dim=2)

tensor([[[0.0100, 0.7082, 1.1186, 1.4114],
         [0.0500, 0.7482, 1.1586, 1.4514],
         [0.0900, 0.7882, 1.1986, 1.4914]],

        [[0.1300, 0.8282, 1.2386, 1.5314],
         [0.1700, 0.8682, 1.2786, 1.5714],
         [0.2100, 0.9082, 1.3186, 1.6114]]])

In [32]:
print("上式的[0, 0, 2]元素 =", math.log(math.exp(0.0100) + math.exp(0.0200)))
print("上式的[0, 0, 3]元素 =", math.log(math.exp(0.0100) + math.exp(0.0200) + math.exp(0.0300)))

上式的[0, 0, 2]元素 = 0.7081596805078623
上式的[0, 0, 3]元素 = 1.118645621723669


### torch.trace(input) 迹
**迹**: 二维矩阵对角线上元素的总和

In [33]:
torch.arange(1., 10.).view(3, 3)

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [34]:
torch.trace(torch.arange(1., 10.).view(3, 3))

tensor(15.)

In [35]:
1 + 5 + 9

15

## mean

### torch.mean
torch.mean(input, *, dtype=None)  
torch.mean(input, dim, keepdim=False, *, dtype=None, out=None)

In [36]:
print(tensor_1)

tensor([[[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]],

        [[13., 14., 15., 16.],
         [17., 18., 19., 20.],
         [21., 22., 23., 24.]]])


In [37]:
torch.mean(tensor_1)

tensor(12.5000)

In [38]:
tensor_1.mean()

tensor(12.5000)

In [39]:
torch.mean(tensor_1, (1, 2)) # 对dim=1和dim=2求平均

tensor([ 6.5000, 18.5000])

In [38]:
tensor_1.mean(axis=(1, 2))

tensor([ 6.5000, 18.5000])

In [36]:
print("验证:", (1 + 12) * 6 / 12, ",", (13 + 24) * 6 / 12)

验证: 6.5 , 18.5


In [41]:
torch.mean(tensor_1, (2, 0)) # 对dim=0和dim=2求和

tensor([ 8.5000, 12.5000, 16.5000])

In [37]:
print("验证:", ((1 + 2 + 3 + 4) + (13 + 14 + 15 + 16)) / 8, ",",
      ((5 + 6 + 7 + 8) + (17 + 18 + 19 + 20)) / 8, ",",
      ((9 + 10 + 11 + 12) + (21 + 22 + 23 + 24)) / 8)

验证: 8.5 , 12.5 , 16.5


### torch.nanmean
torch.nanmean(input, dim=None, keepdim=False, *, dtype=None, out=None)

In [43]:
print(tensor_2)

tensor([[1., 2., nan, 4.],
        [5., nan, 7., 8.]])


In [44]:
torch.nanmean(tensor_2)

tensor(4.5000)

In [45]:
tensor_2.nanmean()

tensor(4.5000)

In [46]:
torch.nanmean(tensor_2, dim=(0, 1))

tensor(4.5000)

In [47]:
(1 + 2 + 4 + 5 + 7 + 8) / 6

4.5

In [48]:
torch.mean(tensor_2)

tensor(nan)

In [49]:
torch.nanmean(tensor_2, dim=0)

tensor([3., 2., 7., 6.])

In [50]:
torch.nanmean(tensor_2, dim=1)

tensor([2.3333, 6.6667])

### torch.var_mean 方差和均值
torch.var_mean(input, dim=None, \*, correction=1, keepdim=False, out=None)
$$\sigma^2 = \frac{1}{N-\delta N}\sum_{i=1}^{N-1} (x_i-\bar{x})^2$$
where $x_i$ is the sample set of elements, $\bar{x}$ is the sample mean, $N$ is the number of samples, $\delta N$ is the correction.

Returns: A tuple (var, mean) containing the variance and mean.

In [51]:
print(tensor_1)

tensor([[[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]],

        [[13., 14., 15., 16.],
         [17., 18., 19., 20.],
         [21., 22., 23., 24.]]])


In [52]:
variance, mean = torch.var_mean(tensor_1, dim=2, correction=1)

In [53]:
variance

tensor([[1.6667, 1.6667, 1.6667],
        [1.6667, 1.6667, 1.6667]])

In [54]:
count = 0
for ii in range(1, 5):
    count += (ii - 2.5)**2
print("上式的[0, 0]元素 =", count / (4 - 1))

上式的[0, 0]元素 = 1.6666666666666667


In [55]:
mean

tensor([[ 2.5000,  6.5000, 10.5000],
        [14.5000, 18.5000, 22.5000]])

In [56]:
torch.mean(tensor_1, dim=2)

tensor([[ 2.5000,  6.5000, 10.5000],
        [14.5000, 18.5000, 22.5000]])

In [57]:
torch.var_mean(tensor_1, dim=(1, 2), correction=1)

(tensor([13., 13.]), tensor([ 6.5000, 18.5000]))

In [58]:
count = 0
for ii in range(1, 13):
    count += (ii - 6.5)**2
print("方差的第一元素 =", count / (12 - 1))

方差的第一元素 = 13.0


### torch.std_mean 标准差和均值
torch.std_mean(input, dim=None, *, correction=1, keepdim=False, out=None)
$$\sigma = \sqrt{\frac{1}{N-\delta N}\sum_{i=1}^{N-1} (x_i-\bar{x})^2}$$
where $x_i$ is the sample set of elements, $\bar{x}$ is the sample mean, $N$ is the number of samples, $\delta N$ is the correction.

Returns: A tuple (std, mean) containing the standard deviation and mean.

In [59]:
std, mean = torch.std_mean(tensor_1, dim=2, correction=1)

In [60]:
std

tensor([[1.2910, 1.2910, 1.2910],
        [1.2910, 1.2910, 1.2910]])

In [61]:
count = 0
for ii in range(1, 5):
    count += (ii - 2.5)**2
print("上式的[0, 0]元素 =", (count / (4 - 1))**0.5)

上式的[0, 0]元素 = 1.2909944487358056


In [62]:
mean

tensor([[ 2.5000,  6.5000, 10.5000],
        [14.5000, 18.5000, 22.5000]])

In [63]:
torch.mean(tensor_1, dim=2)

tensor([[ 2.5000,  6.5000, 10.5000],
        [14.5000, 18.5000, 22.5000]])

In [64]:
torch.std_mean(tensor_1, dim=(1, 2), correction=1)

(tensor([3.6056, 3.6056]), tensor([ 6.5000, 18.5000]))

In [65]:
count = 0
for ii in range(1, 13):
    count += (ii - 6.5)**2
print("方差的第一元素 =", (count / (12 - 1))**0.5)

方差的第一元素 = 3.605551275463989


## max/min

### torch.max
- [torch.max(input)](https://pytorch.org/docs/stable/generated/torch.max.html)<br>
returns the maximum value 
- [torch.max(input, dim, keepdim=False, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.max.html)<br>
returns tuple of two output tensors (max, max_indices)

In [66]:
print(tensor_1)

tensor([[[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]],

        [[13., 14., 15., 16.],
         [17., 18., 19., 20.],
         [21., 22., 23., 24.]]])


In [67]:
torch.max(tensor_1)

tensor(24.)

In [68]:
torch.max(tensor_1, dim=2)

torch.return_types.max(
values=tensor([[ 4.,  8., 12.],
        [16., 20., 24.]]),
indices=tensor([[3, 3, 3],
        [3, 3, 3]]))

### torch.amax
[torch.amax(input, dim, keepdim=False, *, out=None)](https://pytorch.org/docs/stable/generated/torch.amax.html)

In [69]:
torch.amax(tensor_1)

tensor(24.)

In [70]:
torch.amax(tensor_1, dim=2)

tensor([[ 4.,  8., 12.],
        [16., 20., 24.]])

In [71]:
torch.amax(tensor_1, dim=(1, 2))

tensor([12., 24.])

### torch.min
- [torch.min(input)](https://pytorch.org/docs/stable/generated/torch.min.html)<br>
returns the minimum value 
- [torch.min(input, dim, keepdim=False, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.min.html)<br>
returns tuple of two output tensors (min, min_indices)

In [72]:
torch.min(tensor_1)

tensor(1.)

In [73]:
torch.min(tensor_1, dim=2)

torch.return_types.min(
values=tensor([[ 1.,  5.,  9.],
        [13., 17., 21.]]),
indices=tensor([[0, 0, 0],
        [0, 0, 0]]))

### torch.amin
[torch.amin(input, dim, keepdim=False, *, out=None)](https://pytorch.org/docs/stable/generated/torch.amin.html)

In [74]:
torch.amin(tensor_1)

tensor(1.)

In [75]:
torch.amin(tensor_1, dim=2)

tensor([[ 1.,  5.,  9.],
        [13., 17., 21.]])

In [76]:
torch.amin(tensor_1, dim=(1, 2))

tensor([ 1., 13.])

### torch.aminmax
[torch.aminmax(input, *, dim=None, keepdim=False, out=None)](https://pytorch.org/docs/stable/generated/torch.aminmax.html)

returns minimum and maximum values

In [77]:
torch.aminmax(tensor_1)

torch.return_types.aminmax(
min=tensor(1.),
max=tensor(24.))

In [78]:
torch.aminmax(tensor_1, dim=2)

torch.return_types.aminmax(
min=tensor([[ 1.,  5.,  9.],
        [13., 17., 21.]]),
max=tensor([[ 4.,  8., 12.],
        [16., 20., 24.]]))

### torch.argmax 最大值的索引
[torch.argmax(input, dim, keepdim=False)](https://pytorch.org/docs/stable/generated/torch.argmax.html)

In [79]:
torch.argmax(tensor_1)

tensor(23)

In [80]:
torch.argmax(tensor_1, dim=2)

tensor([[3, 3, 3],
        [3, 3, 3]])

### torch.argmin 最小值的索引
[torch.argmin(input, dim=None, keepdim=False)](https://pytorch.org/docs/stable/generated/torch.argmin.html)

In [81]:
torch.argmin(tensor_1)

tensor(0)

In [82]:
torch.argmin(tensor_1, dim=2)

tensor([[0, 0, 0],
        [0, 0, 0]])

### 取大/取小
1. [torch.maximum(input, other, *, out=None)](https://pytorch.org/docs/stable/generated/torch.maximum.html) $\iff$ [torch.max(input, other, *, out=None)](https://pytorch.org/docs/stable/generated/torch.max.html)
2. [torch.minimum(input, other, *, out=None)](https://pytorch.org/docs/stable/generated/torch.minimum.html) $\iff$ [torch.min(input, other, *, out=None)](https://pytorch.org/docs/stable/generated/torch.min.html)
3. [torch.fmax(input, other, *, out=None)](https://pytorch.org/docs/stable/generated/torch.fmax.html)
4. [torch.fmin(input, other, *, out=None)](https://pytorch.org/docs/stable/generated/torch.fmin.html)

In [83]:
torch.maximum(torch.tensor((1, 2, -1)),
              torch.tensor((3, 0, 4)))

tensor([3, 2, 4])

In [84]:
torch.max(torch.tensor((1, 2, -1)),
          torch.tensor((3, 0, 4)))

tensor([3, 2, 4])

In [85]:
torch.minimum(torch.tensor((1, 2, -1)),
              torch.tensor((3, 0, 4)))

tensor([ 1,  0, -1])

In [86]:
torch.min(torch.tensor((1, 2, -1)),
          torch.tensor((3, 0, 4)))

tensor([ 1,  0, -1])

In [87]:
torch.fmax(torch.tensor([9.7, float('nan'), 3.1, float('nan')]),
           torch.tensor([-2.2, 0.5, float('nan'), float('nan')]))

tensor([9.7000, 0.5000, 3.1000,    nan])

In [88]:
torch.fmin(torch.tensor([9.7, float('nan'), 3.1, float('nan')]),
           torch.tensor([-2.2, 0.5, float('nan'), float('nan')]))

tensor([-2.2000,  0.5000,  3.1000,     nan])

### torch.cummax
[torch.cummax(input, dim, *, out=None)](https://pytorch.org/docs/stable/generated/torch.cummax.html)
$$y_i=max(x_1, x_2, x_3, \dots, x_i)$$ 

In [89]:
print(tensor_1)

tensor([[[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]],

        [[13., 14., 15., 16.],
         [17., 18., 19., 20.],
         [21., 22., 23., 24.]]])


In [93]:
torch.cummax(tensor_1, dim=2)

torch.return_types.cummax(
values=tensor([[[ 1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.],
         [ 9., 10., 11., 12.]],

        [[13., 14., 15., 16.],
         [17., 18., 19., 20.],
         [21., 22., 23., 24.]]]),
indices=tensor([[[0, 1, 2, 3],
         [0, 1, 2, 3],
         [0, 1, 2, 3]],

        [[0, 1, 2, 3],
         [0, 1, 2, 3],
         [0, 1, 2, 3]]]))

### torch.cummin
[torch.cummin(input, dim, *, out=None)](https://pytorch.org/docs/stable/generated/torch.cummin.html)
$$y_i=min(x_1, x_2, x_3, \dots, x_i)$$ 

In [95]:
torch.cummin(tensor_1, dim=2)

torch.return_types.cummin(
values=tensor([[[ 1.,  1.,  1.,  1.],
         [ 5.,  5.,  5.,  5.],
         [ 9.,  9.,  9.,  9.]],

        [[13., 13., 13., 13.],
         [17., 17., 17., 17.],
         [21., 21., 21., 21.]]]),
indices=tensor([[[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],

        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]]]))

## 角度(degree)与弧度(radian)

### torch.deg2rad
[torch.deg2rad(input, *, out=None)](https://pytorch.org/docs/stable/generated/torch.deg2rad.html) 角度$\to$弧度
### torch.rad2deg
[torch.rad2deg(input, *, out=None)](https://pytorch.org/docs/stable/generated/torch.rad2deg.html#torch.rad2deg) 弧度$\to$角度

In [66]:
torch.pi

3.141592653589793

In [67]:
degrees = torch.tensor([0., 30., 45., 60., 90.])

In [68]:
torch.deg2rad(degrees)

tensor([0.0000, 0.5236, 0.7854, 1.0472, 1.5708])

In [69]:
radians = torch.tensor([0., torch.pi/6., torch.pi/4., torch.pi/3., torch.pi/2.])
print(radians)

tensor([0.0000, 0.5236, 0.7854, 1.0472, 1.5708])


In [70]:
radians / torch.pi * 180

tensor([ 0., 30., 45., 60., 90.])

In [71]:
torch.rad2deg(torch.tensor([0., torch.pi/6., torch.pi/4., torch.pi/3., torch.pi/2.]))

tensor([ 0.0000, 30.0000, 45.0000, 60.0000, 90.0000])

## 初等函数

命令 | 用法 | 公式
:- | :- | :-:
torch.sin | [torch.sin(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.sin.html) |  $$out_i = \sin(input_i)$$
torch.asin | [torch.asin(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.asin.html)<br>$\iff$ [torch.arcsin(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.arcsin.html) | $$out_i = \sin^{-1}(input_i)$$
torch.cos | [torch.cos(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.cos.html) |  $$out_i = \cos(input_i)$$
torch.acos | [torch.acos(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.acos.html)<br>$\iff$ [torch.arccos(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.arccos.html) | $$out_i = \cos^{-1}(input_i)$$
torch.tan | [torch.tan(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.tan.html) |  $$out_i = \tan(input_i)$$
torch.atan<span id='id_atan'></span> | [torch.atan(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.atan.html)<br>$\iff$ [torch.arctan(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.arctan.html) | $$out_i = \tan^{-1}(input_i), input_i = \frac{y_i}{x_i} = \frac{\sin(out_i)}{\cos(out_i)}$$
torch.atan2<span id='id_atan2'></span> | [torch.atan2(input, other, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.atan2.html)<br>$\iff$[torch.arctan2(input, other, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.arctan2.html) | $$out_i = \tan^{-1}(\frac{input_i}{other_i})$$ $$input_i = y_i = \sin(out_i), other_i = x_i = \cos(out_i)$$
torch.angle<span id='id_angle'></span> | [torch.angle(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.angle.html) | $$ out_i = angle(input_i) \iff out_i = tan^{-1}\frac{y_i}{x_i}, input_i = x_i + y_i*j$$ $$x_i = \cos(out_i), y_i = \sin(out_i)$$
---- | ---- | ---- 
torch.sinh | [torch.sinh(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.sinh.html) |  $$out_i = \sinh(input_i)$$
torch.asinh | [torch.asinh(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.asinh.html)<br>$\iff$ [torch.arcsinh(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.arcsinh.html) | $$out_i = \sinh^{-1}(input_i)$$
torch.cosh | [torch.cosh(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.cosh.html) |  $$out_i = \cosh(input_i)$$
torch.acosh | [torch.acosh(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.acosh.html)<br>$\iff$ [torch.arccosh(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.arccosh.html) | $$out_i = \cosh^{-1}(input_i)$$
torch.tanh | [torch.tanh(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.tanh.html) |  $$out_i = \tanh(input_i)$$
torch.atanh | [torch.atanh(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.atanh.html)<br>$\iff$ [torch.arctanh(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.arctanh.html) | $$out_i = \tanh^{-1}(input_i)$$
---- | ---- | ---- 
torch.sqrt | [torch.sqrt(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.sqrt.html) | $$out_i = \sqrt{input_i} = {input_i}^{\frac{1}{2}}$$
torch.rsqrt | [torch.rsqrt(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.rsqrt.html) | $$out_i = \frac{1}{\sqrt{input_i}} = {input_i}^{-\frac{1}{2}}$$
torch.square | [torch.square(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.square.html) | $$out_i = input_i^2$$
torch.pow<span id='id_pow'></span> | [torch.pow(input, exponent, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.pow.html) | $$out_i = {input_i}^{exponent},\ \ i.e., y = x^a$$
torch.float_power<span id='id_float_power'></span> | [torch.float_power(input, exponent, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.float_power.html) | 如果两个输入都不是复数, 则返回torch.foat64张量;<br>如果一个或多个输入是复数, 则会返回torch.complex128张量
---- | ---- | ---- 
torch.exp | [torch.exp(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.exp.html) | $$out_i = e^{input_i}$$
torch.exp2 | [torch.exp2(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.exp2.html)<br>$\iff$ [torch.special.exp2(input, \*, out=None)](https://pytorch.org/docs/stable/special.html#torch.special.exp2) | $$out_i = 2^{input_i}$$
torch.ldexp | [torch.ldexp(input, other, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.ldexp.html) | $$out_i = input_i * 2^{other_i}$$
torch.frexp<span id='id_frexp'></span> | [torch.frexp(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.frexp.html) | 将输入分解为mantissa和exponent张量, 即 $$input_i=mantissa_i * 2^{exponent_i}$$
torch.logaddexp | [torch.logaddexp(input, other, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.logaddexp.html) | $$out_i = \log_e(e^{input_i} + e^{other_i})$$
torch.logaddexp2 | [torch.logaddexp2(input, other, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.logaddexp2.html) | $$out_i = \log_2(2^{input_i} + 2^{other_i})$$
torch.expm1 | [torch.expm1(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.expm1.html)<br>$\iff$ [torch.special.expm1(input, \*, out=None)](https://pytorch.org/docs/stable/special.html#torch.special.expm1) | $$out_i = e^{input_i} - 1$$
torch.sigmoid | [torch.sigmoid(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.sigmoid.html)<br>$\iff$ [torch.special.expit(input, \*, out=None)](https://pytorch.org/docs/stable/special.html#torch.special.expit) | $$out_i = \frac{1}{1+e^{-input_i}}$$
---- | ---- | ---- 
torch.log | [torch.log(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.log.html) | $$out_i = \log_e(input_i)$$
torch.log2 | [torch.log2(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.log2.html) | $$out_i = \log_2(input_i)$$
torch.log10 | [torch.log10(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.log10.html) | $$out_i = \log_{10}(input_i)$$
torch.log1p | [torch.log1p(input, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.log1p.html) | $$out_i = \log_e(input_i+1)$$
torch.logit | [torch.logit(input, eps=None, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.logit.html)<br>$\iff$ [torch.special.logit(input, eps=None, \*, out=None)](https://pytorch.org/docs/stable/special.html#torch.special.logit) | $$out_i = \log_e(\frac{z_i}{1-z_i})$$, $$z_i = \begin{cases} \begin{array}{l,l}
input_i, & if\ \ eps=NaN \\
eps, & if\ \ input_i<eps \\
input_i, & if\ \ eps\leq input_i\leq 1-eps \\
1-eps, & if\ \ input_i>1-eps \\
\end{array} \end{cases}$$
torch.xlogy | [torch.xlogy(input, other, \*, out=None)](https://pytorch.org/docs/stable/generated/torch.xlogy.html)<br>$\iff$ [torch.special.xlogy(input, other, \*, out=None)](https://pytorch.org/docs/stable/special.html#torch.special.xlogy) | $$out_i = \begin{cases} \begin{array}{l,l}
NaN, & if\ \ other_i=NaN \\
0, & if\ \ input_i=0.0 \\
input_i * \log(other_i), & otherwise \\
\end{array} \end{cases}$$
note: | $\log_e(x) = \ln(x)$, $\log_{10}(x) = \lg(x)$

* [torch.atan](#id_atan) vs [torch.atan2](#id_atan2) vs [torch.angle](#id_angle)

In [80]:
print("radians =", radians)

radians = tensor([0.0000, 0.5236, 0.7854, 1.0472, 1.5708])


In [90]:
x = torch.cos(radians)

In [91]:
y = torch.sin(radians)

In [92]:
torch.atan(torch.abs(y/x)) / torch.pi * 180

tensor([ 0.0000, 30.0000, 45.0000, 60.0000, 90.0000])

In [93]:
torch.atan2(y, x) / torch.pi * 180

tensor([ 0., 30., 45., 60., 90.])

In [95]:
torch.angle(torch.complex(x, y)) / torch.pi * 180

tensor([ 0., 30., 45., 60., 90.])

* [torch.pow](#id_pow) vs [torch.float_power](#id_float_power)

In [15]:
tensor_1 = torch.arange(1., 9.)

In [16]:
tensor_1

tensor([1., 2., 3., 4., 5., 6., 7., 8.])

In [17]:
torch.pow(tensor_1, tensor_1)

tensor([1.0000e+00, 4.0000e+00, 2.7000e+01, 2.5600e+02, 3.1250e+03, 4.6656e+04,
        8.2354e+05, 1.6777e+07])

In [18]:
torch.float_power(tensor_1, tensor_1)

tensor([1.0000e+00, 4.0000e+00, 2.7000e+01, 2.5600e+02, 3.1250e+03, 4.6656e+04,
        8.2354e+05, 1.6777e+07], dtype=torch.float64)

* [torch.frexp](#id_frexp)

In [19]:
torch.frexp(tensor_1)

torch.return_types.frexp(
mantissa=tensor([0.5000, 0.5000, 0.7500, 0.5000, 0.6250, 0.7500, 0.8750, 0.5000]),
exponent=tensor([1, 2, 2, 3, 3, 3, 3, 4], dtype=torch.int32))

## 阶跃函数

### torch.heaviside
torch.heaviside(input, values, *, out=None)
$$ heaviside(input,values) = \begin{cases} \begin{array}{l,l}
0, & input < 0 \\
values, & input = 0\\
1, & input > 0 \\
\end{array} \end{cases} $$

In [329]:
torch.heaviside(input=torch.tensor([-1.5, 0, 2.0]), values=torch.tensor([3.]))

tensor([0., 3., 1.])

In [330]:
torch.heaviside(input=torch.tensor([-1.5, 0, 2.0]), values=torch.tensor([1.2, -2.0, 3.5]))

tensor([ 0., -2.,  1.])

### torch.clamp
torch.clamp(input, min=None, max=None, *, out=None)  
$\iff$ torch.clip(input, min=None, max=None, *, out=None)  
$$ out_i = \begin{cases} \begin{array}{l,l}
min_i, & input_i <= min_i \\
input_i, &  min_i < input_i < max_i \\
max_i, & input_i >= max_i \\
\end{array} \end{cases} $$

In [332]:
torch.clamp(input=torch.tensor([-1.5, 0, 2.0]), min=-0.5, max=0.5)

tensor([-0.5000,  0.0000,  0.5000])

In [347]:
torch.clamp(input=torch.tensor([-1.5, 0, 2.0]), min=torch.tensor([-0.5, -1.0, 0.0]), max=torch.tensor([0.5, 1.0, 1.0]))

tensor([-0.5000,  0.0000,  1.0000])

### torch.sign
torch.sign(input, *, out=None)
$$ out_i = \begin{cases} \begin{array}{l,l}
-1, & input_i < 0 \\
0, & input_i = 0\\
1, & input_i > 0 \\
\end{array} \end{cases} $$

In [348]:
torch.sign(input=torch.tensor([-1.5, -0.0, 0.0, 2.0]))

tensor([-1.,  0.,  0.,  1.])

### torch.signbit
torch.signbit(input, *, out=None)
$$ out_i = \begin{cases} \begin{array}{l,l}
True, & input_i < 0 \\
False, & input_i >= 0 \\
\end{array} \end{cases} $$

In [340]:
torch.signbit(input=torch.tensor([-1.5, -0.0, 0.0, 2.0]))

tensor([ True, False, False, False])

### torch.copysign
torch.copysign(input, other, *, out=None)
$$ out_i = \begin{cases} \begin{array}{l,l}
-\left| input_i \right|, & other_i<= -0.0 \\
\left| input_i \right|, & other_i >= 0.0\\
\end{array} \end{cases} $$

In [349]:
torch.copysign(input=torch.tensor([-1.5, 1.5]), other=1.0)

tensor([1.5000, 1.5000])

In [345]:
torch.copysign(input=torch.tensor([-1.5, -1.5, -1.5, -1.5]),
               other=torch.tensor([-1.5, -0.0, 0.0, 2.0]))

tensor([-1.5000, -1.5000,  1.5000,  1.5000])

In [346]:
torch.copysign(input=torch.tensor([2.0, 2.0, 2.0, 2.0]),
               other=torch.tensor([-1.5, -0.0, 0.0, 2.0]))

tensor([-2., -2.,  2.,  2.])

## 范数

### torch.linalg.vector_norm
[torch.linalg.vector_norm(x, ord=2, dim=None, keepdim=False, *, dtype=None, out=None)](https://pytorch.org/docs/stable/generated/torch.linalg.vector_norm.html)

ord | vector norm
:- | :-
2 (default) | $$\|\mathbf{x}\|_2 = \sqrt{\sum_{i=1}^n x_i^2}$$
1 | $$\|\mathbf{x}\|_1 = \sum_{i=1}^n \left|x_i \right|$$
float('inf') | $$\max_{i}(\left|x_i\right|)$$
-float('inf') | $$\min_{i}(\left|x_i\right|)$$
0 | $$\sum_{i=1}^n(x_i \ne 0)$$
other int or float | $$\|\mathbf{x}\|_{ord} = \left(\sum_{i=1}^n \left|x_i \right|^{ord} \right)^{\frac{1}{ord}}$$

* $L_2$范数/欧几里得距离  

In [339]:
tensor_1 = torch.arange(9, dtype=torch.float) - 4
print(tensor_1)

tensor([-4., -3., -2., -1.,  0.,  1.,  2.,  3.,  4.])


In [340]:
torch.linalg.vector_norm(tensor_1, ord=2)

tensor(7.7460)

In [341]:
(tensor_1**2).sum()**0.5

tensor(7.7460)

* 无穷范数

In [328]:
torch.linalg.vector_norm(tensor_1, ord=float('inf'))

tensor(4.)

In [336]:
tensor_1.abs().max()

tensor(4.)

In [329]:
torch.linalg.vector_norm(tensor_1, ord=-float('inf'))

tensor(0.)

In [337]:
tensor_1.abs().min()

tensor(0.)

* 0范数

In [330]:
torch.linalg.vector_norm(tensor_1, ord=0)

tensor(8.)

* $L_p$范数

In [331]:
torch.linalg.vector_norm(tensor_1, ord=3.5)

tensor(5.4345)

In [332]:
torch.linalg.vector_norm(tensor_1.reshape(3, 3), ord=3.5) # 如果dim=None，则在计算范数之前，x将被展平

tensor(5.4345)

In [333]:
(torch.abs(tensor_1)**3.5).sum()**(1/3.5)

tensor(5.4345)

In [335]:
count = 0
ord = 3.5
for num in tensor_1:
    count += abs(num.item()**ord)
print("验证:", count**(1/ord))

验证: 5.434488008821392


### torch.linalg.matrix_norm
[torch.linalg.matrix_norm(A, ord='fro', dim=(- 2, - 1), keepdim=False, *, dtype=None, out=None)](https://pytorch.org/docs/stable/generated/torch.linalg.matrix_norm.html#torch.linalg.matrix_norm)

ord | matrix norm
:- | :-
'fro' (default) | Frobenius norm
'nuc' | nuclear norm
float('inf') | max(sum(abs(x), dim=1))
-float('inf') | min(sum(abs(x), dim=1))
1 | max(sum(abs(x), dim=0))
-1 | min(sum(abs(x), dim=0))
2 | largest singular value
-2 | smallest singular value

* 矩阵的Frobenius范数  
$\|\mathbf{X}\|_F = \sqrt{\sum_{i=1}^m \sum_{j=1}^n x_{ij}^2}$

In [344]:
tensor_1 = torch.arange(9, dtype=torch.float).reshape(3, 3)
print(tensor_1)

tensor([[0., 1., 2.],
        [3., 4., 5.],
        [6., 7., 8.]])


In [345]:
torch.linalg.matrix_norm(tensor_1)

tensor(14.2829)

In [347]:
(tensor_1**2).sum()**0.5

tensor(14.2829)

* ord=float("inf")

In [314]:
torch.linalg.matrix_norm(tensor_1, ord=float("inf"))

tensor(21.)

In [348]:
tensor_1.abs().sum(dim=1).max()

tensor(21.)

* ord = -1

In [350]:
torch.linalg.matrix_norm(tensor_1, ord=-1)

tensor(9.)

In [352]:
tensor_1.abs().sum(dim=0).min()

tensor(9.)

* Multiple matrices

In [355]:
tensor_2 = tensor_1.expand(2, -1, -1)
print(tensor_2)

tensor([[[0., 1., 2.],
         [3., 4., 5.],
         [6., 7., 8.]],

        [[0., 1., 2.],
         [3., 4., 5.],
         [6., 7., 8.]]])


In [356]:
torch.linalg.matrix_norm(tensor_2)

tensor([14.2829, 14.2829])

In [357]:
torch.linalg.matrix_norm(tensor_2, dim=(0, 2)) # 对第0维和第2维组成的矩阵，计算其Frobenius norm

tensor([ 3.1623, 10.0000, 17.2627])

In [363]:
for jj in range(tensor_2.size(1)):
    print("\n"+str(jj)+"-th matrix:")
    print( tensor_2[:,jj,:] )
    print("its Frobenius norm =", (tensor_2[:,jj,:]**2).sum()**0.5 )


0-th matrix:
tensor([[0., 1., 2.],
        [0., 1., 2.]])
its Frobenius norm = tensor(3.1623)

1-th matrix:
tensor([[3., 4., 5.],
        [3., 4., 5.]])
its Frobenius norm = tensor(10.)

2-th matrix:
tensor([[6., 7., 8.],
        [6., 7., 8.]])
its Frobenius norm = tensor(17.2627)


todo:   
https://pytorch.org/docs/stable/generated/torch.linalg.norm.html#torch.linalg.norm  
https://pytorch.org/docs/stable/generated/torch.dist.html#torch.dist  
https://pytorch.org/docs/stable/generated/torch.cdist.html#torch.cdist  
https://pytorch.org/docs/stable/generated/torch.renorm.html#torch.renorm  

## todo: 复数
- https://pytorch.org/docs/stable/generated/torch.sgn.html
- https://pytorch.org/docs/stable/generated/torch.view_as_complex.html
- https://pytorch.org/docs/stable/generated/torch.is_complex.html
- https://pytorch.org/docs/stable/generated/torch.complex.html
- https://pytorch.org/docs/stable/generated/torch.polar.html
- 
- https://pytorch.org/docs/stable/generated/torch.is_conj.html
- https://pytorch.org/docs/stable/generated/torch.conj.html
- https://pytorch.org/docs/stable/generated/torch.adjoint.html
- https://pytorch.org/docs/stable/generated/torch.conj_physical.html
- https://pytorch.org/docs/stable/generated/torch.resolve_conj.html
- 
- https://pytorch.org/docs/stable/generated/torch.rot90.html
-
- https://pytorch.org/docs/stable/torch.html

In [None]:
"""
torch.complex(real, imag, *, out=None)  'out = real + imag*j'
"""
real = torch.tensor([1, 2], dtype=torch.float32)
imag = torch.tensor([3, 4], dtype=torch.float32)
print( torch.complex(real, imag) )
"""
torch.polar(abs, angle, *, out=None)  'out = abs*cos(angle)+abs*sin(angle)*j
"""
abs = torch.tensor([1, 2], dtype=torch.float64)
angle = torch.tensor([3.1415926/2, 5*3.1415926/4], dtype=torch.float64)
print( torch.polar(abs, angle) ) 
"""
torch.conj(input, *, out=None)
共轭复数，两个实部相等，虚部互为相反数
"""
torch.conj(torch.tensor([-1 + 1j, -2 + 2j, 3 - 3j]))

## todo: 位运算

In [None]:
"""
torch.bitwise_and(input, other, *, out=None)
按位与
"""
print( torch.bitwise_and(torch.tensor([-1, -2, 3]), torch.tensor([1, 0, 3])) )
print( torch.bitwise_and(torch.tensor([True, True, False]), torch.tensor([False, True, False])) )
"""
torch.bitwise_or(input, other, *, out=None)
按位或
"""
print( torch.bitwise_or(torch.tensor([-1, -2, 3]), torch.tensor([1, 0, 3])) )
print( torch.bitwise_or(torch.tensor([True, True, False]), torch.tensor([False, True, False])) )
"""
torch.bitwise_xor(input, other, *, out=None)
按位异或
"""
print( torch.bitwise_xor(torch.tensor([-1, -2, 3]), torch.tensor([1, 0, 3])) )
print( torch.bitwise_xor(torch.tensor([True, True, False]), torch.tensor([False, True, False])) )
"""
torch.bitwise_not(input, *, out=None)
按位取反，即 ~x = -（x+1）
"""
print( torch.bitwise_not(torch.tensor([-1, -2, 3])) )


todo:
https://pytorch.org/docs/stable/generated/torch.logical_and.html#torch.logical_and
https://pytorch.org/docs/stable/generated/torch.logical_or.html#torch.logical_or
https://pytorch.org/docs/stable/generated/torch.logical_not.html#torch.logical_not
https://pytorch.org/docs/stable/generated/torch.logical_xor.html#torch.logical_xor