## LEARNING PYTORCH WITH EXAMPLES



In [1]:
import torch
import math

In [2]:
class LegendrePolynomial3(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backword(input) 
        return 0.5 * (5 * input ** 3 - 3 * input)
        # 1/2(5x^3 - 3x)
    
    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        # 一次求导
        return grad_output * 1.5 * (5 * input ** 2 - 1)

In [3]:
dtype = torch.float
# >>> torch.device('cuda:0')
# device(type='cuda', index=0) 第0个GPU
device = torch.device("cuda:0") # current cuda device

In [4]:
# create random to hold input and outputs
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)
x[:10], y[:10]

(tensor([-3.1416, -3.1384, -3.1353, -3.1322, -3.1290, -3.1259, -3.1227, -3.1196,
         -3.1164, -3.1133], device='cuda:0'),
 tensor([ 8.7423e-08, -3.1430e-03, -6.2863e-03, -9.4292e-03, -1.2572e-02,
         -1.5715e-02, -1.8858e-02, -2.2000e-02, -2.5143e-02, -2.8285e-02],
        device='cuda:0'))

In [5]:
# create random Tensors for weights
# torch.full 类似声明一个数组 tensor a = 0(cuda,require_grad=True)
a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)
a, b

(tensor(0., device='cuda:0', requires_grad=True),
 tensor(-1., device='cuda:0', requires_grad=True))

In [6]:
# 设置超参数
learning_rate = 5e-6
for t in range(2000):
    # use own function (Function.apply)
    P3 = LegendrePolynomial3.apply
    
    # 计算预测值
    y_pred = a + b * P3(c + d * x)
    
    # compute and print loss
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99: # 199，...
        print(t, loss.item())
        
    # autograd to backward
    loss.backword()

    # Update weights using gradient descent
    with torch.no_grad:
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # Manually zero the gradients after updating weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

AttributeError: 'LegendrePolynomial3Backward' object has no attribute 'save_for_backword'

In [7]:
print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

Result: y = 0.0 + -1.0 * P3(0.0 + 0.30000001192092896 x)


In [9]:
x = torch.linspace(-math.pi, math.pi, 2000) # 从开始到结束的均匀分布
y = torch.sin(x)

In [23]:
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)   # x x^2 x^3
xx

tensor([[ -3.1416,   9.8696, -31.0063],
        [ -3.1384,   9.8499, -30.9133],
        [ -3.1353,   9.8301, -30.8205],
        ...,
        [  3.1353,   9.8301,  30.8205],
        [  3.1384,   9.8499,  30.9133],
        [  3.1416,   9.8696,  31.0063]])

In [12]:
model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

In [13]:
# MSE LOSS
loss_fn = torch.nn.MSELoss(reduction="sum")

In [24]:
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
learning_rate = 1e-6
for t in range(2000):
    y_pred = model(xx)

    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero the gradients
    # 不应该先更新权重后把梯度归0？
    # model.zero_grad()
    # 使用优化器
    optimizer.zero_grad()

    # backward
    loss.backward()

    # update the weights
    # with torch.no_grad():
    #     for param in model.parameters():
    #         param -= learning_rate * param.grad
    # update parameters
    optimizer.step()

99 8.817173957824707
199 8.817168235778809
299 8.817167282104492
399 8.817168235778809
499 8.817167282104492
599 8.817167282104492
699 8.817168235778809
799 8.817167282104492
899 8.817168235778809
999 8.817168235778809
1099 8.817168235778809
1199 8.817168235778809
1299 8.817168235778809
1399 8.817168235778809
1499 8.817168235778809
1599 8.817167282104492
1699 8.817168235778809
1799 8.817167282104492
1899 8.817167282104492
1999 8.817167282104492


In [25]:
linear_layer = model[0]
linear_layer

Linear(in_features=3, out_features=1, bias=True)

In [28]:
# weights and bias
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

Result: y = 5.01128909036197e-07 + 0.8567413687705994 x + 5.009318329030066e-07 x^2 + -0.09332989156246185 x^3


In [29]:
model

Sequential(
  (0): Linear(in_features=3, out_features=1, bias=True)
  (1): Flatten(start_dim=0, end_dim=1)
)

In [32]:
class Polynomial3(torch.nn.Module):
    def __init__(self):
        """
        Instantiate four parameters
        """
        super().__init__()
        # 正态分布
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        """
        Input: a Tensor
        Return: a Tensor of output data
        """
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3

    def string(self):
        """
        define custom method on PyTorch modules
        """
        return f'y = {self.a.item()} + {self.b.item()} * x + {self.c.item()} * x^2 + {self.d.item()} x^3'

In [34]:
# 使用定义的模型
model = Polynomial3()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)
for t in range(2000):
    y_pred = model(x)

    # 预测 - 实际
    loss = criterion(y_pred, y)

    # Zero gradients perform a backward pass , and update the weights
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result : {model.string()}')


Result : y = -0.01847038045525551 + 0.8218784928321838 * x + 0.0031864470802247524 * x^2 + -0.08837152272462845 x^3


In [35]:
# -*- coding: utf-8 -*-
import random
import torch
import math


class DynamicNet(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate five parameters and assign them as members.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        """
        For the forward pass of the model, we randomly choose either 4, 5
        and reuse the e parameter to compute the contribution of these orders.

        Since each forward pass builds a dynamic computation graph, we can use normal
        Python control-flow operators like loops or conditional statements when
        defining the forward pass of the model.

        Here we also see that it is perfectly safe to reuse the same parameter many
        times when defining a computational graph.
        """
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
        for exp in range(4, random.randint(4, 6)):
            y = y + self.e * x ** exp
        return y

    def string(self):
        """
        Just like any class in Python, you can also define custom method on PyTorch modules
        """
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Construct our model by instantiating the class defined above
model = DynamicNet()

# Construct our loss function and an Optimizer. Training this strange model with
# vanilla stochastic gradient descent is tough, so we use momentum
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(30000):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    if t % 2000 == 1999:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')


1999 1076.6103515625
3999 578.549072265625
5999 278.77447509765625
7999 223.59188842773438
9999 71.91000366210938
11999 39.38667297363281
13999 23.1971492767334
15999 16.803993225097656
17999 12.429099082946777
19999 9.967989921569824
21999 9.736001014709473
23999 9.311260223388672
25999 9.080161094665527
27999 8.593727111816406
29999 8.908875465393066
Result: y = 0.0074686575680971146 + 0.8604275584220886 x + -0.0019180409144610167 x^2 + -0.09422893822193146 x^3 + 0.0001222019927809015 x^4 ? + 0.0001222019927809015 x^5 ?
