In [1]:
# -*- coding: utf-8 -*-
import numpy as np
import math

#使用numpy实现神经网络

# Create random input and output data
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# Randomly initialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 4831.929689202033
199 3275.474959028179
299 2223.6535142922944
399 1512.1280324370045
499 1030.3032978073493
599 703.6800300254064
699 482.0273729942425
799 331.4450182313207
899 229.03153453688395
999 159.30024314781744
1099 111.76777655338714
1199 79.33018883726987
1299 57.168407294679575
1399 42.00977755502011
1499 31.62936857432123
1599 24.512856824502933
1699 19.628404565186457
1799 16.272131793204316
1899 13.963328593477577
1999 12.373320177386764
Result: y = 0.05117064437243873 + 0.8227998466414344 x + -0.008827787637443402 x^2 + -0.08850257785889436 x^3


In [2]:
# -*- coding: utf-8 -*-

import torch
import math

#PyTorch Tensor 在概念上与 numpy 数组相同：Tensor 是一个 n 维数组，PyTorch 提供了许多操作这些 Tensor 的函数。
#PyTorch Tensors 可以利用 GPU 来加速其数值计算。要在 GPU 上运行 PyTorch Tensor，您只需指定正确的设备。

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 6095.591796875
199 4116.40087890625
299 2783.484619140625
399 1884.982666015625
499 1278.7406005859375
599 869.2943115234375
699 592.4845581054688
799 405.15325927734375
899 278.2442321777344
999 192.17706298828125
1099 133.74461364746094
1199 94.03007507324219
1299 67.00733947753906
1399 48.5997314453125
1499 36.04637908935547
1599 27.475698471069336
1699 21.61733627319336
1799 17.608327865600586
1899 14.86177921295166
1999 12.977968215942383
Result: y = -0.052901774644851685 + 0.817099928855896 x + 0.009126436896622181 x^2 + -0.08769181370735168 x^3


In [4]:
# -*- coding: utf-8 -*-
import torch
import math

#使用PyTorch中的 autograd包自动计算反向传播梯度

dtype = torch.float
device = torch.device("cpu")

x = torch.linspace(-math.pi, math.pi, 2000, device = device, dtype = dtype)
y = torch.sin(x)

#y = a + b x + c x^2 + d x^3
a = torch.randn((), device = device, dtype = dtype, requires_grad = True)
b = torch.randn((), device = device, dtype = dtype, requires_grad = True)
c = torch.randn((), device = device, dtype = dtype, requires_grad = True)
d = torch.randn((), device = device, dtype = dtype, requires_grad = True)

learning_rate = 1e-6

for t in range(2000):
    y_pred = a + b * x + c * x ** 2 + d * x ** 3
    
    loss = (y_pred - y).pow(2).sum()
    
    if t % 100 == 99:
        print(t, loss.item())
    #使用autograd来计算向后传递
    #这个调用将计算关于requires_grad=True的所有张量的损失梯度
    loss.backward()
    
    with torch.no_grad():
    #torch.no_grad()  不跟踪梯度变化
    #由于在更新权重时候不需要跟踪梯度变化    
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad
        # 更新权重后，手动将梯度置零,否则会叠加之前的值
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1420.5611572265625
199 942.403076171875
299 626.2010498046875
399 417.09765625
499 278.81787109375
599 187.372802734375
699 126.89958190917969
799 86.9080810546875
899 60.4609489440918
999 42.971092224121094
1099 31.40462303161621
1199 23.755273818969727
1299 18.696590423583984
1399 15.35107707977295
1499 13.138484954833984
1599 11.675180435180664
1699 10.707425117492676
1799 10.067357063293457
1899 9.644065856933594
1999 9.364091873168945
Result: y = -0.001541219069622457 + 0.8340513110160828 x + 0.00026588552282191813 x^2 + -0.09010300040245056 x^3


In [8]:
# -*- coding: utf-8 -*-
import torch
import math

# 通过定义torch.autograd.Function和实现forward和backward函数的子类来轻松定义 autograd 运算符。

class LegendrePolynomial3(torch.autograd.Function):
    """
    ctx是context的缩写，上下文，环境
    ctx专门用在静态方法中，调用不需要实例化对象，直接通过类名就可以调用
    自定义的forward()方法和backward()方法的第一个参数必须是ctx; ctx可以保存forward()中的变量,以便在backward()中继续使用
    ctx.save_for_backward(a, b)能够保存forward()静态方法中的张量, 从而可以在backward()静态方法中调用, 具体地, 通过a, b = ctx.saved_tensors重新得到a和b
    ctx.needs_input_grad是一个元组, 元素是True或者False, 表示forward()中对应的输入是否需要求导
    """
    @staticmethod
    def forward(ctx, input): 
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)

    @staticmethod
    def backward(ctx, grag_output):
        input, = ctx.saved_tensors
        return grag_output * 1.5 * (5 * input ** 2 - 1)
        
dtype = torch.float
device = torch.device("cpu")

x = torch.linspace(-math.pi, math.pi, 2000, device = device, dtype = dtype)
y = torch.sin(x)

a = torch.full((), 0.0, device = device, dtype = dtype,requires_grad = True)
b = torch.full((), -1.0, device = device, dtype = dtype,requires_grad = True)
c = torch.full((), 0.0, device = device, dtype = dtype,requires_grad = True)
d = torch.full((), 0.3, device = device, dtype = dtype,requires_grad = True)

learning_rate = 5e-6

for t in range(2000):
    #apply Fuction
    P3 = LegendrePolynomial3.apply
    
    y_pred = a + b * P3(c + d * x)
    loss = (y_pred - y).pow(2).sum()
    
    if t % 100 == 99:
        print(t, loss.item())
    
    loss.backward()
    
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad
    
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 209.95834350585938
199 144.66018676757812
299 100.70249938964844
399 71.03519439697266
499 50.97850799560547
599 37.403133392333984
699 28.206867218017578
799 21.973188400268555
899 17.7457275390625
999 14.877889633178711
1099 12.931766510009766
1199 11.610918045043945
1299 10.714258193969727
1399 10.10548210144043
1499 9.692106246948242
1599 9.411375045776367
1699 9.220745086669922
1799 9.091285705566406
1899 9.003361701965332
1999 8.943639755249023
Result: y = -5.423830273798558e-09 + -2.208526849746704 * P3(1.3320399228078372e-09 + 0.2554861009120941 x)


In [40]:
# -*- coding: utf-8 -*-
import torch
import math


#nn包定义了一组Modules，大致相当于神经网络层。模块接收输入张量并计算输出张量，但也可以保存内部状态，例如包含可学习参数的张量。
#nn包还定义了一组在训练神经网络时常用的有用的损失函数

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
#y = x.unsqueeze(-1)
#print(y.shape)

#广播 (2000,1)  (3，) -> (2000,3)
xx = x.unsqueeze(-1).pow(p)


#nn.Sequential
#一个有序的容器，神经网络模块将按照在传入构造器的顺序依次被添加到计算图中执行
#同时以神经网络模块为元素的有序字典也可以作为传入参数。
#Linear y = xAT + b使用线性函数计算输入的输出，并保存其权重和偏差的内部张量
#Linear(in_features, out_features, bias = True)
#输入特征数，输出特征数
#Flatten(x,y) 从x维到y维推平，保证输出层1维tensor，匹配y
model = torch.nn.Sequential (
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

#m = torch.nn.Linear(3, 1)
#print(m.weight.shape)
#output = m(xx)
#print(output.shape)
#Linear生成(1，3)矩阵，在转置为(3,1)

#选择MSE计算损失函数 y = (1/m) sum(y - y')^2
#reduction sum or mean
loss_fn = torch.nn.MSELoss(reduction = 'sum')

learning_rate = 1e-6


#preds = model(inputs)             ## inference
#loss = criterion(preds, targets)  ## 求解loss
#optimizer.zero_grad()             ## 梯度清零
#loss.backward()                   ## 反向传播求解梯度
#optimizer.step()                  ## 更新权重参数


for t in range(2000):
    y_pred = model(xx)
    
    loss = loss_fn(y_pred, y)
    
    if t % 100 == 99:
        print(t, loss.item())

    #梯度清0
    model.zero_grad()
    
    #反向传播
    loss.backward()
    
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

linear_layer = model[0]

print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 133.08131408691406
199 91.0268783569336
299 63.20665740966797
399 44.80228042602539
499 32.62653732299805
599 24.571210861206055
699 19.241649627685547
799 15.715407371520996
899 13.382174491882324
999 11.838301658630371
1099 10.816669464111328
1199 10.140596389770508
1299 9.69316577911377
1399 9.397064208984375
1499 9.201065063476562
1599 9.071333885192871
1699 8.985456466674805
1799 8.928606986999512
1899 8.890969276428223
1999 8.866048812866211
Result: y = -0.0011633113026618958 + 0.8500290513038635 x + 0.000200691181817092 x^2 + -0.09237569570541382 x^3


In [42]:
import torch
import math

#optim包中有优化函数算法，并提供了常用优化算法的实现。

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3,1),
    torch.nn.Flatten(0,1)
)

loss_fn = torch.nn.MSELoss(reduction = 'sum')

learning_rate = 1e-3
optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate)

for t in range(2000):
    y_pred = model(xx)
    
    loss = loss_fn(y_pred, y)
    
    if t % 100 == 99:
        print(t, loss.item())
        
    optimizer.zero_grad()
    loss.backward()
    
    optimizer.step()

linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')


99 3007.56982421875
199 1284.267578125
299 1019.1074829101562
399 868.1571044921875
499 714.6934814453125
599 569.8775024414062
699 441.0660705566406
799 330.3569641113281
899 237.97438049316406
999 163.3916778564453
1099 105.56173706054688
1199 63.303375244140625
1299 35.096439361572266
1399 18.800708770751953
1499 11.385660171508789
1599 9.182021141052246
1699 8.882740020751953
1799 8.926872253417969
1899 8.942255973815918
1999 8.914430618286133
Result: y = -0.0005012111505493522 + 0.8571780323982239 x + -0.0005012317560613155 x^2 + -0.09284719079732895 x^3


In [53]:
# -*- coding: utf-8 -*-
import torch
import math

#自定义nn模型
#我们需要更复杂的模块， 我们可以通过 继承 nn.Module  
#和定义 forward（用来接收Input tensor和输出output tensor）

class Polynomial3(torch.nn.Module):
    def __init__(self):
        """
        实例化四个参数，并指定为成员参数
        """
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        
    def forward(self, x):
        return self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
    
    def string(self):
                return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
        

x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
model = Polynomial3()

criterion = torch.nn.MSELoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(),lr= 1e-6)

for t in range(2000):
    y_pred = model(x)
    loss= criterion(y_pred,y)
    if t % 100 == 99:
        print(t, loss.item())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

99 1376.782958984375
199 916.1348876953125
299 610.727294921875
399 408.20770263671875
499 273.8888854980469
599 184.78555297851562
699 125.66439056396484
799 86.42768859863281
899 60.38158416748047
999 43.087039947509766
1099 31.600505828857422
1199 23.96929168701172
1299 18.897789001464844
1399 15.52632999420166
1499 13.284313201904297
1599 11.792794227600098
1699 10.800185203552246
1799 10.13931655883789
1899 9.699146270751953
1999 9.40582275390625
Result: y = 0.009646207094192505 + 0.8348827958106995 x + -0.001664131530560553 x^2 + -0.09022127091884613 x^3


In [55]:
import torch
import random
import math

#动态图和权重共享

class DynamicNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        self.e = torch.nn.Parameter(torch.randn(()))
    
    def forward(self, x):
        y = self.a + self.b * x + self.c * x ** 2 + self.d * x **3
        for exp in range(4, random.randint(4, 6)):
            y += self.e * x ** exp
        return y

    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'
    
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = DynamicNet()

criterion = torch.nn.MSELoss(reduction='sum')
#class torch.optim.SGD(params, lr=, momentum=0, dampening=0, weight_decay=0, nesterov=False)
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-8, momentum = 0.9)

for t in range(30000):
    y_pred = model(x)
    
    loss = criterion(y_pred, y)

    if t % 2000 == 1999:
        print(t, loss.item())
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f'Result: {model.string()}')

1999 525.9119873046875
3999 251.20989990234375
5999 127.77450561523438
7999 66.87577819824219
9999 36.3028678894043
11999 23.155181884765625
13999 15.880390167236328
15999 12.707060813903809
17999 10.614063262939453
19999 9.727209091186523
21999 9.292576789855957
23999 9.08722972869873
25999 8.962778091430664
27999 8.662066459655762
29999 8.909375190734863
Result: y = 0.006035753525793552 + 0.8568557500839233 x + -0.0016227232990786433 x^2 + -0.0937441810965538 x^3 + 0.00012499438889790326 x^4 ? + 0.00012499438889790326 x^5 ?
