In [16]:
%%time
import numpy as np
import math
# Create random input and output data
x=np.linspace(-math.pi,math.pi,2000)
y=np.sin(x)
# Randomly initialize weights
a=np.random.randn()
b=np.random.randn()
c=np.random.randn()
d=np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    #迭代2000次
    # Forward pass: compute predicted y
    y_pred=a+b*x+c*x**2+d*x**3
    loss=np.square(y_pred-y).sum()
    # compute and print loss
    if t%100==99:
        print(t,loss)
    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred=2.0*(y_pred-y) #由损失函数求导转化而来，计算梯度必须要现有损失函数
    grad_a=grad_y_pred.sum()
    grad_b=(grad_y_pred*x).sum()
    grad_c=(grad_y_pred*x**2).sum()
    grad_d=(grad_y_pred*x**3).sum()
    #update weights
    a-=learning_rate * grad_a
    b-=learning_rate * grad_b
    c-=learning_rate * grad_c
    d-=learning_rate * grad_d
print(f'Result:y={a}+{b}x+{c}x^2+{d}x^3')

99 1614.9081429692362
199 1101.5135659195748
299 753.0656880145582
399 516.309736198957
499 355.2649457482324
599 245.5967702270655
699 170.8302544745673
799 119.7997819568718
899 84.92992363748047
999 61.075440693313766
1099 44.73781266584258
1199 33.535544451059536
1299 25.84568226961672
1399 20.560945985353396
1499 16.925014223518026
1599 14.420692092799854
1699 12.693899590145714
1799 11.501949396224997
1899 10.678312720340706
1999 10.108589816779588
Result:y=0.032214758966576086+0.8381651865978047x+-0.005557581977637425x^2+-0.0906881636651305x^3
Wall time: 322 ms


In [15]:
%%time
import torch
import math
dtype=torch.float
device = torch.device("cuda:0") # Uncomment this to run on GPU
x=torch.linspace(-math.pi,math.pi,2000,device=device,dtype=dtype)
y=torch.sin(x)
#randomly initialize wights
a=torch.randn((),device=device,dtype=dtype)
b=torch.randn((),device=device,dtype=dtype)
c=torch.randn((),device=device,dtype=dtype)
d=torch.randn((),device=device,dtype=dtype)
learning_rate=1e-6
for t in range(2000):
    y_pred=a+b*x+c*x**2+d*x**3
    #compute and print loss
    loss=(y_pred-y).pow(2).sum().item()
    if t%100==99:
        print(t,loss)
    
    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred=2.0*(y_pred-y)
    grad_a=grad_y_pred.sum()
    grad_b=(grad_y_pred*x).sum()
    grad_c=(grad_y_pred*x**2).sum()
    grad_d=(grad_y_pred*x**3).sum()
    # update weights using gradient descent
    a-=learning_rate * grad_a
    b-=learning_rate * grad_b
    c-=learning_rate * grad_c
    d-=learning_rate * grad_d

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')    

99 2303.4462890625
199 1526.611328125
299 1012.7960205078125
399 672.93701171875
499 448.1352844238281
599 299.4358825683594
699 201.07339477539062
799 136.00643920898438
899 92.96334838867188
999 64.4886245727539
1099 45.651084899902344
1199 33.18854522705078
1299 24.943374633789062
1399 19.488147735595703
1499 15.878701210021973
1599 13.490352630615234
1699 11.909989356994629
1799 10.864179611206055
1899 10.17208194732666
1999 9.714048385620117
Result: y = -0.0040124827064573765 + 0.8278629779815674 x + 0.0006922206957824528 x^2 + -0.08922275900840759 x^3
Wall time: 691 ms


In [20]:
import random
import torch
import math


class DynamicNet(torch.nn.Module):
    def __init__(self):
        """
        In the constructor we instantiate five parameters and assign them as members.
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))

    def forward(self, x):
        """
        For the forward pass of the model, we randomly choose either 4, 5
        and reuse the e parameter to compute the contribution of these orders.

        Since each forward pass builds a dynamic computation graph, we can use normal
        Python control-flow operators like loops or conditional statements when
        defining the forward pass of the model.

        Here we also see that it is perfectly safe to reuse the same parameter many
        times when defining a computational graph.
        """
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
        for exp in range(4, random.randint(4, 6)):
            y = y + self.e * x ** exp
        return y

    def string(self):
        """
        Just like any class in Python, you can also define custom method on PyTorch modules
        """
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# Construct our model by instantiating the class defined above
model = DynamicNet()

# Construct our loss function and an Optimizer. Training this strange model with
# vanilla stochastic gradient descent is tough, so we use momentum
criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
for t in range(30000):
    # Forward pass: Compute predicted y by passing x to the model
    y_pred = model(x)

    # Compute and print loss
    loss = criterion(y_pred, y)
    if t % 5000 == 4999:
        print(t, loss.item())

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

4999 448.6660461425781
9999 65.90621948242188
14999 16.77259063720703
19999 9.877023696899414
24999 8.979098320007324
29999 8.59366226196289
Result: y = -0.0021008430048823357 + 0.8537216186523438 x + -0.00015774245548527688 x^2 + -0.0932685062289238 x^3 + 0.00011178691784152761 x^4 ? + 0.00011178691784152761 x^5 ?
