+ 朴素线性回归类
1. 导入torch.nn库
2. 继承nn.Module类
3. forward 运算

$120 * 92$  
$cm*cm$

In [2]:
import torch
import torch.nn as nn 

class LinearModel(nn.Module):
    def __init__(self, ndim):
        super().__init__()
        self.ndim = ndim

        self.weight = nn.Parameter(torch.randn(ndim, 1))  # Weight
        self.bias = nn.Parameter(torch.randn(1))  # Bias

    def forward(self, x):
        return x.mm(self.weight) + self.bias  # x @ W + b


lm = LinearModel(5)  # 定义线性回归模型， 特征数5
x = torch.randn(4, 5)
lm(x)

tensor([[-1.9276],
        [-1.7968],
        [-0.1114],
        [ 0.5583]], grad_fn=<AddBackward0>)

+ 模块方法调用

In [28]:
# 获取模型参数(带名字)的生成器
lm.named_parameters()
print(list(lm.named_parameters()), '\n')

# 获取模型参数(不带名字)的生成器
lm.parameters()
print(list(lm.parameters()))

# 将模型参数转移到GPU上(我的vscode默认放在了GPU上)
lm.cuda()
print(list(lm.parameters()))

# 转换模型参数为半/双精度浮点数
lm.half()
print(list(lm.parameters()))

lm.double()
print(list(lm.named_parameters()))

[('weight', Parameter containing:
tensor([[ 0.9766],
        [ 0.0713],
        [-0.0754],
        [-0.3015],
        [-0.4956]], device='cuda:0', dtype=torch.float64, requires_grad=True)), ('bias', Parameter containing:
tensor([-0.4626], device='cuda:0', dtype=torch.float64, requires_grad=True))] 

[Parameter containing:
tensor([[ 0.9766],
        [ 0.0713],
        [-0.0754],
        [-0.3015],
        [-0.4956]], device='cuda:0', dtype=torch.float64, requires_grad=True), Parameter containing:
tensor([-0.4626], device='cuda:0', dtype=torch.float64, requires_grad=True)]
[Parameter containing:
tensor([[ 0.9766],
        [ 0.0713],
        [-0.0754],
        [-0.3015],
        [-0.4956]], device='cuda:0', dtype=torch.float64, requires_grad=True), Parameter containing:
tensor([-0.4626], device='cuda:0', dtype=torch.float64, requires_grad=True)]
[Parameter containing:
tensor([[ 0.9766],
        [ 0.0713],
        [-0.0754],
        [-0.3015],
        [-0.4956]], device='cuda:0', dtype=tor

+ 自动求导机制

In [2]:
t1 = torch.randn(3, 3, requires_grad=True)
print(t1)
t2 = t1.pow(2).sum()
print(t2)

t2.backward()
print(t1.grad)  # t2对t1求导，这样就可以理解了就是Andrew所提到的d_t1 = t2对t1求偏导

# 梯度积累，此处的t2定义是必要的
t2 = t1.pow(2).sum()
t2.backward()
print(t1.grad)

# 单个tensor清零梯度
t1.grad.zero_()

tensor([[-1.5095,  0.1374, -0.8820],
        [ 0.9715, -0.0199,  0.2490],
        [ 0.5697,  0.4074,  0.5472]], requires_grad=True)
tensor(4.8715, grad_fn=<SumBackward0>)
tensor([[-3.0189,  0.2749, -1.7639],
        [ 1.9430, -0.0398,  0.4980],
        [ 1.1394,  0.8147,  1.0944]])
tensor([[-6.0379,  0.5497, -3.5278],
        [ 3.8861, -0.0796,  0.9959],
        [ 2.2788,  1.6294,  2.1889]])


tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [4]:
t2 = t1.pow(2).sum()
torch.autograd.grad(t2, t1)  # derivative t2 with t1

(tensor([[-3.0189,  0.2749, -1.7639],
         [ 1.9430, -0.0398,  0.4980],
         [ 1.1394,  0.8147,  1.0944]]),)

+ 空值计算图的方法示例

In [12]:
t1 = torch.randn(3, 3, requires_grad=True)
t2 = t1.sum()
print('t2: ', t2)  # t2的计算已经构建了计算图，输出结果带有grad_fn

with torch.no_grad():
    t3 = t1.sum()
print('t3: ', t3)  # t3d的计算没有构建计算图，输出结果并没有grad_fn

t1.sum()  # 保持计算图
t1.sum().detach()  # 和原来的计算图分离

t2:  tensor(4.3507, grad_fn=<SumBackward0>)
t3:  tensor(4.3507)


tensor(4.3507)

+ 损失函数以及优化器

In [42]:
mse = nn.MSELoss()  # 初始化平方损失函数模板
t1 = torch.randn(5, requires_grad=True)
t2 = torch.randn(5, requires_grad=True)
mse(t1, t2)  # 计算t1&t2的MSE

t1 = torch.randn(5, requires_grad=True)
t1s = torch.sigmoid(t1)  # sigmoid=1/(1+exp(-x))
print('t1: ', t1, '\n', 't1s: ', t1s)

t2 = torch.randint(0, 2, (5, )).float()  # 浮点整数向量
bce = nn.BCELoss()  # Binary Cross Entrophy Loss，接受2个参数，第一个参数是正标签的概率(sigmoided prob)，
print('bce: ', bce(t1s, t2))  # 此处是 t1_sigmoid & t2 的计算 

bce_logits = nn.BCEWithLogitsLoss()  # 交叉熵对数损失函数，先对t1求sigmoid再进行计算
print('bce_logit: ', bce_logits(t1, t2))  # 此处是 t1 & t2 的计算

N = 10 # 多分类问题 类别数目
t1 = torch.randn(5, N, requires_grad=True)
t2 = torch.randint(0, N, (5, ))
t1s = torch.nn.functional.log_softmax(t1, -1)  # log(softmax())

nll = nn.NLLLoss()  # Nagetive Log Likelihood Loss
nll(t1s, t2)  # NLLLos接受一个log(softmax())的概率值以及一个onehot编码的label，element-wise product and sum it.
ce = nn.CrossEntropyLoss()  #定义交叉熵损失函数, 相当于对NLLLoss与log(softmax())的整合concordance
ce(t1, t2) == nll(t1s, t2)  # input original perceptron's output and label

t1:  tensor([-0.5820,  1.4371,  1.6877, -0.1407, -0.4090], requires_grad=True) 
 t1s:  tensor([0.3585, 0.8080, 0.8439, 0.4649, 0.3992], grad_fn=<SigmoidBackward>)
bce:  tensor(0.6797, grad_fn=<BinaryCrossEntropyBackward>)
bce_logit:  tensor(0.6797, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)


tensor(True)

+ Optimizer 


In [5]:
from sklearn.datasets import load_boston
boston = load_boston()

lm = LinearModel(13)
criterion = nn.MSELoss()
optim = torch.optim.SGD(lm, parameters(), lr=1e-6)  # 定义优化器

{'data': array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
         4.9800e+00],
        [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
         9.1400e+00],
        [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
         4.0300e+00],
        ...,
        [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         5.6400e+00],
        [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
         6.4800e+00],
        [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         7.8800e+00]]),
 'target': array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
        18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
        15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
        13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
        21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
        35.4, 24.7, 3