In [4]:
import torch

print(torch.__version__)
print(torch.cuda.is_available())
print(torch.cuda.device_count())
# print(torch.cuda.get_device_name(0))
import torchvision
print(torchvision.__version__)

1.10.0
False
0
0.11.1


numpy版
同时一个三阶多项式来预测y=sin(x)的值

In [10]:
import numpy as np
import math

# 需要预测的值以及输入的值
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# 随机初始会权重参数
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

# 学习率
learning_rate = 1e-6

for t in range(2000):
    # 前向传播：计算预测值
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # 损失函数：计算损失值并打印
    loss = np.square(y_pred - y)
    if t % 100 == 55:
        print(t, loss)

    # 反向传播计算参数的梯度值
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # 更新权重参数
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')




55 [6.22953421 6.14295877 6.05715161 ... 3.0037072  3.05216095 3.10110646]
155 [3.94455685 3.89056732 3.83704235 ... 1.89982415 1.92964967 1.95976566]
255 [2.51367811 2.48017972 2.44695612 ... 1.15768187 1.17538698 1.1932562 ]
355 [1.58350267 1.56312736 1.54290837 ... 0.69114965 0.70133617 0.71161037]
455 [0.98267178 0.97061106 0.95863415 ... 0.40136987 0.40697448 0.4126219 ]
555 [0.59783226 0.59096298 0.58413441 ... 0.22429055 0.22717272 0.23007256]
655 [0.35410177 0.350408   0.34673043 ... 0.11854267 0.11986778 0.12119755]
755 [0.20210331 0.20029437 0.19848874 ... 0.05750884 0.05799725 0.05848461]
855 [0.10935456 0.10861307 0.1078691  ... 0.02414477 0.02423331 0.02431938]
955 [0.05455155 0.05436751 0.05417941 ... 0.00760254 0.00754893 0.00749376]
1055 [0.02377578 0.02383754 0.02389603 ... 0.00103201 0.000982   0.00093254]
1155 [0.00797915 0.00810202 0.00822364 ... 0.00015204 0.00018245 0.00021599]
1255 [0.00131783 0.00139747 0.00147845 ... 0.00232399 0.00247002 0.00262187]
1355 [4.96

tensor 版本：
numpy 数组和 PyTorch 张量之间的最大区别在于，PyTorch 张量可以在 CPU 或 GPU 上运行

In [12]:
import torch
import math

# 设置张量在CPU还是在GPU上运行
dtype = torch.float
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

# 需要预测的值以及输入的值
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# 初始化权重参数
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

# 设置超参数
learning_rate = 1e-6
for t in range(2000):
    # 前向传播：计算预测值
    y_pred = a + b * x + c * x ** 2 + d * x ** 3
    
    # 损失函数计算loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 88:
        print(t, loss)

    # 反向传播：计算参数的梯度
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # 更新权重参数
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

88 160.5891571044922
188 109.75704956054688
288 75.97692108154297
388 53.5190315246582
488 38.58336639404297
588 28.646530151367188
688 22.03302001953125
788 17.629478454589844
888 14.696233749389648
988 12.741451263427734
1088 11.438112258911133
1188 10.568672180175781
1288 9.988371849060059
1388 9.600838661193848
1488 9.341889381408691
1588 9.168758392333984
1688 9.052924156188965
1788 8.97536849975586
1888 8.923412322998047
1988 8.888579368591309
Result: y = 0.004361842758953571 + 0.8497730493545532 x + -0.0007524921675212681 x^2 + -0.09233927726745605 x^3


PyTorch Autograd 来计算梯度值
- 张量表示计算图的一个节点
- x为张量，该张量自带一个requires_grad属性，当该属性设置为True的时候表示可以通过autograd的方式来计算梯度
- x.grad来保存x张量下的梯度值

In [14]:
import torch
import math

# 设置张量在CPU还是在GPU上运行
dtype = torch.float
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

# 需要预测的值以及输入的值
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# 设置反向传播需要计算对应参数对应的梯度值grad
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    # 前向传播：计算预测值
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # 损失函数计算loss
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 88:
        print(t, loss.item())

    # 利用PyTorch de 自动梯度机制来进行反向传播计算
    # 当调用backward后就会自动计算所有tensor中设置了requires_grad=true的张量的梯度
    # 当调用a.grad, b.grad...就可以获得对应的张量的梯度的值，而且该值会自动保留
    # 如果不清空，会累加上去
    loss.backward()

    # 需要使用torch.no_grad()
    # 不希望在下一步的梯度计算中记录这些操作
    with torch.no_grad():
        # 调用 grad来获得对应的张量的梯度的值
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

         # 如果不清空，会累加上去   
         # 将梯度设置为零，以便为下一个循环做好准备。 否则，我们的梯度会记录所有已发生操作的运行记录
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None
        

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

88 3805.037841796875
188 2520.091796875
288 1670.126220703125
388 1107.87353515625
488 735.93310546875
588 489.880859375
688 327.1033020019531
788 219.41323852539062
888 148.16537475585938
988 101.0260238647461
1088 69.83599853515625
1188 49.19840621948242
1288 35.5422477722168
1388 26.505373001098633
1488 20.524967193603516
1588 16.56706428527832
1688 13.947528839111328
1788 12.213678359985352
1888 11.065972328186035
1988 10.30620002746582
Result: y = 0.005787726957350969 + 0.8204581141471863 x + -0.000998479314148426 x^2 + -0.08816948533058167 x^3


自定义新的 Autograd 函数
对于比较复杂的传播函数来说的，需要自己计算梯度公式，创建该对应的公式的自动反向传播的类，该类是继承自torch.autograd.Function

In [15]:
import torch
import math


# 自定义的autograd 函数
# 这个是用来对于自己比较复杂的传播函数来说的
class LegendrePolynomial3(torch.autograd.Function):
    # 继承torch.autograd.Function
    # 需要自己实现forward 和 backward

    @staticmethod
    def forward(ctx,input):
        """
        输入：
            input: 是一个输入的Tensor
            ctx: 表示上下文的对象，用来存放反向传播计算的信息
        输出：输出Tensor是前向传播的结果

        ctx使用save_for_backward来保存反向传播的张量的结果
        """
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)

    @staticmethod
    def backward(ctx, grad_output):
        """
        输入：
            grad_output: 损失值的梯度对应输出
            ctx: 表示上下文的对象，用来存放反向传播计算的信息
        输出：将损失值的的梯度乘以input的梯度值

        ctx使用save_for_backward来保存反向传播的张量的结果
        """
        input, = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input ** 2 - 1)

# 设置张量在CPU还是在GPU上运行
dtype = torch.float
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

# 需要预测的值以及输入的值
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# 设置反向传播需要计算对应参数对应的梯度值的属性requires_grad
# 这里使用了full来初始化对应的参数，需要注意的是这个值不能偏离太远了
a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
    # 使用Function中apply来实现自己定义的方法
    # 将这个方法设置为P3
    P3 = LegendrePolynomial3.apply

    # 前向传播来计算预测值y_pred
    # P3将会使用自己定义的autograd 操作来实现
    y_pred = a + b * P3(c + d * x)

        # 损失函数计算loss
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 88:
        print(t, loss.item())

    # 利用PyTorch de 自动梯度机制来进行反向传播计算
    # 当调用backward后就会自动计算所有tensor中设置了requires_grad=true的张量的梯度
    # 当调用a.grad, b.grad...就可以获得对应的张量的梯度的值，而且该值会自动保留
    # 如果不清空，会累加上去
    loss.backward()

    # 这里手动更新weights, 所以需要使用torch.no_grad()
    # 如果是自动更新就不需要torch.no_grad()
    with torch.no_grad():
        # 调用 grad来获得对应的张量的梯度的值
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

         # 如果不清空，会累加上去
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None
        

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')
    



88 218.8590850830078
188 150.64144897460938
288 104.73460388183594
388 73.75899505615234
488 52.82105255126953
588 38.650856018066406
688 29.052364349365234
788 22.546472549438477
888 18.134544372558594
988 15.141712188720703
1088 13.11081314086914
1188 11.73245620727539
1288 10.796770095825195
1388 10.1614990234375
1488 9.730156898498535
1588 9.437203407287598
1688 9.238287925720215
1788 9.103202819824219
1888 9.011453628540039
1988 8.949139595031738
Result: y = -5.423830273798558e-09 + -2.208526849746704 * P3(1.3320399228078372e-09 + 0.2554861009120941 x)


nn构建模型的版本
- 建立模型： 通过nn建立一个线性传播的网络
- 构建损失函数： nn.MSELoss
- 自动反向传播：backward
- 更新模型参数：model.parameters()

In [19]:
import torch
import math


# 需要预测的值以及输入的值
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# 这里开始构建模型
# 该模型是一个线性的神经网络模型
# x.unsqueeze(-1) 将会组成一个张量维度为 (2000, 1)
# p 张量的维度为（3,）但是当与维度为 (2000, 1)进行操作的时候
# 通过广播机制扩展到(2000,3)
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# 定义模型为Sequential，表示按顺序进行的
# 使用Linear表现使用线性函数，将会自带有权重参数和偏置值
# 这里输入的是3表示有3神经元，输出是1
#Flatten这里是展平到1D tensor,为了与y的shape进行匹配
model = torch.nn.Sequential(
    torch.nn.Linear(3,1),
    torch.nn.Flatten(0,1)
)

# nn 自带的常用的损失函数
loss_fn = torch.nn.MSELoss(reduction='sum')


learning_rate = 1e-6
for t in range(2000):

    # Module对象重载了__call__操作，这里可以像函数那样使用
    # 当传入一个tensor的时候，这里也会自动返回一个tensor
    y_pred = model(xx)

    # 损失函数计算loss
    loss = loss_fn(y_pred,y)
    if t % 100 == 88:
        print(t, loss.item())


    # 将模型中反向传播计算的梯度值设置为零
    # 这里是避免上一次的结果累加
    model.zero_grad()

    # 反向传播：将会计算模型中的参数对应的梯度
    loss.backward()

    # 这里手动更新weights, 所以需要使用torch.no_grad()
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad
        

# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')


88 160.43678283691406
188 112.2736587524414
288 79.49169921875
388 57.152862548828125
488 41.912803649902344
588 31.503929138183594
688 24.38653564453125
788 19.51413917541504
888 16.1748104095459
988 13.883589744567871
1088 12.309727668762207
1188 11.227375030517578
1288 10.482193946838379
1388 9.968621253967285
1488 9.614269256591797
1588 9.369522094726562
1688 9.200289726257324
1788 9.083160400390625
1888 9.002005577087402
1988 8.945728302001953
Result: y = 0.010208604857325554 + 0.8513708114624023 x + -0.0017611539224162698 x^2 + -0.09256654232740402 x^3


使用optim来进行相关进行更新权重，Pytorch 优化器中已经集成很多较为常用的优化算法：SGD,RMSProp,Adam等
- 梯度清零：optimizer.zero_grad() 
- 自动反向传播：loss.backward()
- 更新权重：optimizer.step()

In [20]:
import torch
import math


# 需要预测的值以及输入的值
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# 这里开始构建模型
# 该模型是一个线性的神经网络模型
# x.unsqueeze(-1) 将会组成一个张量维度为 (2000, 1)
# p 张量的维度为（3,）但是当与维度为 (2000, 1)进行操作的时候
# 通过广播机制扩展到(2000,3)
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# 定义模型为Sequential，表示按顺序进行的
# 使用Linear表现使用线性函数，将会自带有权重参数和偏置值
# 这里输入的是3表示有3神经元，输出是1
#Flatten这里是展平到1D tensor,为了与y的shape进行匹配
model = torch.nn.Sequential(
    torch.nn.Linear(3,1),
    torch.nn.Flatten(0,1)
)

# nn 自带的常用的损失函数
loss_fn = torch.nn.MSELoss(reduction='sum')

#利用优化器来自动更新参数
# 第一个参数：需要自动更新的参数张量
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
learning_rate = 1e-3

for t in range(2000):

    # Module对象重载了__call__操作，这里可以像函数那样使用
    # 当传入一个tensor的时候，这里也会自动返回一个tensor
    y_pred = model(xx)

    # 损失函数计算loss
    loss = loss_fn(y_pred,y)
    if t % 100 == 88:
        print(t, loss.item())


    # 优化器将需要反向传播中张量的梯度值清理
    # 因为梯度值会累积到缓存中
    optimizer.zero_grad()
    # model.zero_grad()

    # 反向传播：将会计算模型中的参数对应的梯度
    loss.backward()

    # 优化器调用step()将会自动更新参数
    optimizer.step()
    # 这里手动更新weights, 所以需要使用torch.no_grad()
    # with torch.no_grad():
    #     for param in model.parameters():
    #         param -= learning_rate * param.grad
        

# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')


88 95971.625
188 95925.65625
288 95884.1484375
388 95844.2109375
488 95804.296875
588 95764.3828125
688 95724.4921875
788 95684.5859375
888 95644.703125
988 95604.8359375
1088 95564.96875
1188 95525.109375
1288 95485.265625
1388 95445.421875
1488 95405.578125
1588 95365.765625
1688 95325.9453125
1788 95286.125
1888 95246.328125
1988 95206.546875
Result: y = -0.5215063095092773 + -0.35876762866973877 x + 0.5146413445472717 x^2 + -0.48071953654289246 x^3


PyTorch：自定义nn模块

In [30]:
import torch
import math

class Polynomial3(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate four parameters and assign them as
        member parameters.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

    def string(self):
        """
        Just like any class in Python, you can also define custom method on PyTorch modules
        """
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
# 需要预测的值以及输入的值
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# 这里开始构建模型
model = Polynomial3()

# nn 自带的常用的损失函数
criterion = torch.nn.MSELoss(reduction='sum')

#利用优化器来自动更新参数
# 第一个参数：需要自动更新的参数张量
learning_rate = 1e-6
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)


for t in range(2000):

    # Module对象重载了__call__操作，这里可以像函数那样使用
    # 当传入一个tensor的时候，这里也会自动返回一个tensor
    y_pred = model(x)

    # 损失函数计算loss
    loss = criterion(y_pred,y)
    if t % 100 == 88:
        print(t, loss.item())


    # 优化器将需要反向传播中张量的梯度值清理
    # 因为梯度值会累积到缓存中
    optimizer.zero_grad()
    # model.zero_grad()

    # 反向传播：将会计算模型中的参数对应的梯度
    loss.backward()

    # 优化器调用step()将会自动更新参数
    optimizer.step()
        


# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {model.string()}')



88 76240.4921875
188 76201.359375
288 76165.9609375
388 76131.859375
488 76098.1484375
588 76064.4375
688 76030.7421875
788 75997.0546875
888 75963.3671875
988 75929.6953125
1088 75896.0234375
1188 75862.375
1288 75828.7265625
1388 75795.078125
1488 75761.4453125
1588 75727.8125
1688 75694.203125
1788 75660.5859375
1888 75626.984375
1988 75593.390625
Result: y = y = 0.6693885326385498 + 1.1657319068908691 x + -0.17604434490203857 x^2 + 0.3843775689601898 x^3


PyTorch：控制流 + 权重共享

In [29]:
import random
import torch
import math

class DynamicNet(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate five parameters and assign them as members.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        """
        For the forward pass of the model, we randomly choose either 4, 5
        and reuse the e parameter to compute the contribution of these orders.

        Since each forward pass builds a dynamic computation graph, we can use normal
        Python control-flow operators like loops or conditional statements when
        defining the forward pass of the model.

        Here we also see that it is perfectly safe to reuse the same parameter many
        times when defining a computational graph.
        """
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
        for exp in range(4, random.randint(4, 6)):
            y = y + self.e * x ** exp
        return y

    def string(self):
        """
        Just like any class in Python, you can also define custom method on PyTorch modules
        """
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'

# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Construct our model by instantiating the class defined above
model = DynamicNet()

# Construct our loss function and an Optimizer. Training this strange model with
# vanilla stochastic gradient descent is tough, so we use momentum
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(30000):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    if t % 2000 == 1999:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

1999 1466.094970703125
3999 746.9537353515625
5999 346.7632751464844
7999 172.6957550048828
9999 87.82992553710938
11999 44.260196685791016
13999 26.115705490112305
15999 16.93872833251953
17999 12.612433433532715
19999 10.663186073303223
21999 9.607438087463379
23999 9.23609733581543
25999 8.787160873413086
27999 8.930931091308594
29999 8.887451171875
Result: y = -0.006020872853696346 + 0.8541386723518372 x + 0.0005413547623902559 x^2 + -0.09321240335702896 x^3 + 1.148824958363548e-05 x^4 ? + 1.148824958363548e-05 x^5 ?
