**pytorch实用技能**

@ Follow: "动手学深度学习-第二章 预备知识"

In [2]:
from pathlib import Path
import numpy as np
import pandas as pd
import torch

dir_chapter = Path("/Users/rui/Code/Astronote/32_PyTorch/data/chapter2")
dir_chapter.mkdir(exist_ok=True, parents=True)

---
# 基本数据结构-Tensor
- Tensor与numpy array非常相似, 但有2个新功能
- Tensor支持GPU, 支持自动微分

## 创建Tensor

In [3]:
x = torch.arange(12)  # 创建一个行向量
X_all0 = torch.zeros((2, 3))  # 创建一个2行3列的矩阵，元素全为0
X_all1 = torch.ones((2, 3))  # 创建一个2行3列的矩阵，元素全为1
X_rand = torch.randn(3, 2, 1)  # 创建一个2行3列的矩阵，元素为随机数

print(x)
print(X_all0)
print(X_all1)
print(X_rand)

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[[ 0.3124],
         [ 0.3354]],

        [[ 2.6748],
         [ 0.4485]],

        [[-0.1018],
         [-1.1281]]])


In [4]:
ls = [[2, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]]
X = torch.tensor(ls)  # 通过列表创建张量

arr = np.array(ls)
X_arr = torch.from_numpy(arr)  # 通过numpy数组创建张量

print(X)
print(X_arr)

tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])
tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])


## 查看Tensor的属性

In [5]:
print(x.shape)  # 查看形状, 即沿每个轴的元素数量
print(x.numel())  # 查看元素数量

torch.Size([12])
12


## Tensor的操作与运算

### 更改张量形状

In [6]:
X = x.reshape(3, 4)  # 将行向量x的形状改为(3, 4), 即3行4列的矩阵
X1 = x.reshape(-1, 3)  # 指定形状为3列, 行数自动推断

print(X)
print(X1)

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
tensor([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11]])


### 按元素运算
- 按元素位置单独操作或运算

In [7]:
x = torch.tensor([1.0, 2, 4, 8])
y = torch.tensor([2, 2, 2, 2])
x + y, x - y, x * y, x / y, x ** y  # 加减乘除乘方

(tensor([ 3.,  4.,  6., 10.]),
 tensor([-1.,  0.,  2.,  6.]),
 tensor([ 2.,  4.,  8., 16.]),
 tensor([0.5000, 1.0000, 2.0000, 4.0000]),
 tensor([ 1.,  4., 16., 64.]))

In [8]:
torch.exp(x)  # 指数运算

tensor([2.7183e+00, 7.3891e+00, 5.4598e+01, 2.9810e+03])

广播机制: 即使两个张量的形状不同, 也可以执行元素操作

In [9]:
a = torch.arange(3).reshape((3, 1))
b = torch.arange(2).reshape((1, 2))
a, b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]))

In [10]:
a + b

tensor([[0, 1],
        [1, 2],
        [2, 3]])

### 张量连接

In [11]:
X = torch.arange(12, dtype=torch.float32).reshape((3, 4))
Y = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
print(X)
print(Y)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])
tensor([[2., 1., 4., 3.],
        [1., 2., 3., 4.],
        [4., 3., 2., 1.]])


In [12]:
torch.cat((X, Y), dim=0)  # 沿行（轴0）拼接X和Y

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  1.,  4.,  3.],
        [ 1.,  2.,  3.,  4.],
        [ 4.,  3.,  2.,  1.]])

In [13]:
torch.cat((X, Y), dim=1)  # 沿列（轴1）拼接X和Y

tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
        [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
        [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]])

## 节省内存

In [14]:
"""
直接赋值相同的变量名仍然分配了不同的内存
"""
X = torch.arange(12, dtype=torch.float32).reshape((3, 4))
before = id(X)
print(f"id(X): {before}")

X = X + 1
print(f"id(X): {id(X)}")  # id(X)与before不同

id(X): 5064406064
id(X): 5064403504


In [15]:
"""
使用切片表示法覆盖内存
"""
X = torch.arange(12, dtype=torch.float32).reshape((3, 4))
before = id(X)
print(f"id(X): {before}")

X[:] = X + 1
print(f"id(X): {id(X)}")

id(X): 5064394864
id(X): 5064394864


In [16]:
"""
使用 += 覆盖内存
"""
X = torch.arange(12, dtype=torch.float32).reshape((3, 4))
before = id(X)
print(f"id(X): {before}")

X += 1
print(f"id(X): {id(X)}")

id(X): 5064607536
id(X): 5064607536


## Tensor和Numpy array转换

In [17]:
A = torch.arange(12, dtype=torch.float32).reshape((3, 4))
B = A.numpy()
print(type(A), type(B))
print(id(A), id(B))  # 内存地址不同
print(A)
print(B)

# tensor和numpy数组共享内存, 修改一个会影响另一个(仅限就地操作, 如 +=, A[:])
B[:] = B + 1
print(A)
print(B)

<class 'torch.Tensor'> <class 'numpy.ndarray'>
5064408464 5043389232
tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])
[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]]
tensor([[ 1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.],
        [ 9., 10., 11., 12.]])
[[ 1.  2.  3.  4.]
 [ 5.  6.  7.  8.]
 [ 9. 10. 11. 12.]]


---
# 通过Pandas读取数据
- 步骤: 数据读取、缺失值处理、转换成Tensor

In [18]:
"""
创建测试数据文件
"""
data_file = dir_chapter / "house_tiny.csv"
with open(data_file, 'w') as f:
    f.write('NumRooms,Alley,Price\n')  # 列名
    f.write('NA,Pave,127500\n')  # 每行表示一个数据样本
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

In [19]:
"""
load the data
"""
data = pd.read_csv(data_file)
print(data)

   NumRooms Alley   Price
0       NaN  Pave  127500
1       2.0   NaN  106000
2       4.0   NaN  178100
3       NaN   NaN  140000


In [20]:
"""
replace the missing value
"""
# 均值填充数值
inputs, outputs = data.iloc[:, 0:2], data.iloc[:, 2]
inputs['NumRooms'] = inputs['NumRooms'].fillna(inputs['NumRooms'].mean())

# 单一类别分类缺失处理
inputs = pd.get_dummies(inputs, dummy_na=True)
# bool转换
inputs['Alley_Pave'] = inputs['Alley_Pave'].astype('int')
inputs['Alley_nan'] = inputs['Alley_nan'].astype('int')
inputs

Unnamed: 0,NumRooms,Alley_Pave,Alley_nan
0,3.0,1,0
1,2.0,0,1
2,4.0,0,1
3,3.0,0,1


In [21]:
"""
数据转换成Tensor
"""
X, y = torch.tensor(inputs.values), torch.tensor(outputs.values)
X, y

(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([127500, 106000, 178100, 140000]))

---
# 线性代数的pytorch实现

## 向量
- 列向量是向量的默认方向

In [25]:
x = torch.arange(4)
x

tensor([0, 1, 2, 3])

## 矩阵
- 在表格数据集中, 一般将每个样本作为矩阵中的行向量, 这样沿着张量的最外轴可以访问或遍历小批量样本

In [28]:
A = torch.arange(20).reshape(5, 4)
A_trans = A.T

print(A)
print(f"Transpose of A: \n{A_trans}")

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15],
        [16, 17, 18, 19]])
Transpose of A: 
tensor([[ 0,  4,  8, 12, 16],
        [ 1,  5,  9, 13, 17],
        [ 2,  6, 10, 14, 18],
        [ 3,  7, 11, 15, 19]])


## 哈达玛积(Hadamard product): 两个矩阵的按元素乘法

In [30]:
A = torch.arange(20, dtype=torch.float32).reshape(5, 4)
B = A.clone()

print(A)
print(A + B)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]])
tensor([[ 0.,  2.,  4.,  6.],
        [ 8., 10., 12., 14.],
        [16., 18., 20., 22.],
        [24., 26., 28., 30.],
        [32., 34., 36., 38.]])


## 求和

In [33]:
A = torch.arange(20, dtype=torch.float32).reshape(5, 4)
print(A)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]])


In [39]:
print(f"对所有元素求和: {A.sum()}")
print(f"沿着行方向对列求和: {A.sum(axis=0)}")
print(f"沿着列方向对行求和: {A.sum(axis=1)}")
print(f"沿着行列对矩阵求和: {A.sum(axis=[0, 1])}")  # 等价于A.sum()

对所有元素求和: 190.0
沿着行方向对列求和: tensor([40., 45., 50., 55.])
沿着列方向对行求和: tensor([ 6., 22., 38., 54., 70.])
沿着行列对矩阵求和: 190.0


In [44]:
"""
以上求和后都是向量, 如果需要保持矩阵形状, 可以指定keepdims=True
"""
print(f"对所有元素求和: \n{A.sum(axis=0, keepdims=True)}")
print(f"沿着行方向对列求和: \n{A.sum(axis=1, keepdims=True)}")

对所有元素求和: 
tensor([[40., 45., 50., 55.]])
沿着行方向对列求和: 
tensor([[ 6.],
        [22.],
        [38.],
        [54.],
        [70.]])


In [46]:
"""
计算累计求和
"""
A = torch.ones(3, 4)
print(A)
print(A.cumsum(axis=0))
print(A.cumsum(axis=1))

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([[1., 1., 1., 1.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.]])
tensor([[1., 2., 3., 4.],
        [1., 2., 3., 4.],
        [1., 2., 3., 4.]])


## 点积
- 对应元素相乘再求和

In [50]:
x = torch.arange(4, dtype=torch.float32)
y = torch.ones(4, dtype=torch.float32)

dot = torch.dot(x, y)

print(f"x: {x}")
print(f"y: {y}")
print(f"dot: {dot}")
print(f"sum(x * y): {torch.sum(x * y)}")  # 等价于torch.dot(x, y)

x: tensor([0., 1., 2., 3.])
y: tensor([1., 1., 1., 1.])
dot: 6.0
sum(x * y): 6.0


## 矩阵-向量积

In [52]:
A = torch.arange(20, dtype=torch.float32).reshape(5, 4)
x = torch.arange(4, dtype=torch.float32)
print(A)
print(x)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]])
tensor([0., 1., 2., 3.])


In [53]:
torch.mv(A, x)

tensor([ 14.,  38.,  62.,  86., 110.])

## 矩阵-矩阵乘法

In [55]:
A = torch.arange(4).reshape(2, 2)
B = torch.arange(4).reshape(2, 2)
print(A)
print(B)

tensor([[0, 1],
        [2, 3]])
tensor([[0, 1],
        [2, 3]])


In [56]:
torch.mm(A, B)

tensor([[ 2,  3],
        [ 6, 11]])

## 范数
- 非正式的说, 范数表示张量的长度
- 范数$L_P$的一般形式:<br>
    $||x||_p = (\Sigma^n_{i=1}|x_i|^P)^{1/P}$
- L2范数即欧式距离: $||x||_2 = \sqrt{\Sigma^n_{i=1}|x_i|^2}$
- L1范数即元素的绝对值求和, 与L2范数相比, L1范数受异常值影响更小

In [74]:
A = torch.arange(4, dtype=torch.float16).reshape(2, -1)
print(A)
print(torch.norm(A, p=1))  # L1范数
print(torch.norm(A, p=2))  # L2范数

tensor([[0., 1.],
        [2., 3.]], dtype=torch.float16)
tensor(6., dtype=torch.float16)
tensor(3.7422, dtype=torch.float16)


---
# 自动微分
- pytorch可以自动计算导数

In [87]:
x = torch.arange(4, dtype=torch.float16, requires_grad=True)
# 定义函数形式
y = 2 * torch.dot(x, x)
# 反向传播计算函数y关于每个参数x的梯度
y.backward()
print(x)
print(y)
print(x.grad)
print(x.grad == 4 * x)  # 与手动计算的梯度值比较

tensor([0., 1., 2., 3.], dtype=torch.float16, requires_grad=True)
tensor(28., dtype=torch.float16, grad_fn=<MulBackward0>)
tensor([ 0.,  4.,  8., 12.], dtype=torch.float16)
tensor([True, True, True, True])
