In [19]:
import math 
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline  

In [20]:
class Value:
    def __init__(self, data, _children = (),_op = ''):
        self.data = data
        self._prev = _children
        self._op = _op
        self.grad = 0.0
        self._backward = lambda : None
    def __mul__(self,other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data * other.data, (self, other), '*')
        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward
        return out
    def __rmul__(self, other):
        return self + other
    def __add__(self,other):
        other = other if isinstance(other, Value) else Value(other)
        out = Value(self.data + other.data, (self, other), '+')
        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad
        out._backward = _backward
        return out
    def __truediv__(self, other):
        return self * other**-1m
    def __neg__(self): # -self
        return self * -1
    
    def __radd__(self, other): # other + self
        return self + other
    def __sub__(self, other): # self - other
        return self + (-other)
    def __pow__(self, other):
        assert isinstance(other,(int, float)),'Only supporting int/float powers for now'
        out = Value(self.data **other,(self,) ,f'**{other}')
        def _backward():
            self.grad += other * self.data ** (other-1) *out.grad #时刻注意链式法则 
        out._backward = _backward
        return out
    def __repr__(self):
        return f"Value(data={self.data})"
    def tanh(self):
        x = self.data
        t = (math.exp(2*x)-1)/(math.exp(2*x) + 1)
        out = Value(t,(self,),'tanh')
        def _backward():
            self.grad += (1 - t**2) * out.grad
        out._backward = _backward
        return out
    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.grad = 1.0
        for node in reversed(topo):
            node._backward();
        

In [21]:
import torch
import random
#python中默认创建float64，所以要使得一致，在torch中设置dtype = double
# Create tensors with requires_grad set correctly
x1 = torch.tensor([2.0], dtype=torch.double, requires_grad=True)
x2 = torch.tensor([0.0], dtype=torch.double, requires_grad=True)
w1 = torch.tensor([-3.0], dtype=torch.double, requires_grad=True)
w2 = torch.tensor([1.0], dtype=torch.double, requires_grad=True)
b = torch.tensor([6.881373580195432], dtype=torch.double, requires_grad=True)

# Perform operations
n = x1 * w1 + x2 * w2 + b
o = torch.tanh(n)

# Perform backward pass
print(o.data.item())
o.backward()

# Print gradients
print('---')
print('x2', x2.grad.item())
print('w2', w2.grad.item())
print('x1', x1.grad.item())
        

0.707106777774492
---
x2 0.5000000048253751
w2 0.0
x1 -1.5000000144761252


In [22]:
class Neuron:
    def __init__(self, nin):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1,1))
    def __call__(self, x):
        act = sum((wi * xi for wi,xi in zip(self.w,x)), self.b)
        out = act.tanh()
        return out
    def parameters(self):
        return self.w + [self.b]
    
class Layer:
    def __init__(self, nin, nout):  #nin（输入数量）和 nout（该层神经元的数量）
        self.neurons = [Neuron(nin) for _ in range(nout)]
    def __call__(self, x):
        outs = [n(x) for n in self.neurons]  #n 代表 Layer 类中的一个 Neuron 对象
        return outs[0] if len(outs) == 1 else outs
    def parameters(self):
        params = []
        for neuron in self.neurons:
            params.extend(neuron.parameters())
        return params
    
class MLP:
    def __init__(self, nin, nouts):
        sz = [nin] +nouts  #nin:输入层的单元数，nouts：一个列表，放置除了输入层以外的其他层的单元数
        self.layers = [Layer(sz[i],sz[i+1]) for i in range(len(nouts))]
    def __call__(self, x):  #x:输入层的单元，用list表示
        for layer in self.layers:
            x = layer(x)
        return x
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]
    

In [23]:
n = MLP(3,[4,4,1])
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]
ys = [6.0, -5.0, -1.0, 1.0]
ypred = [n(x) for x in xs]
ypred

[Value(data=0.09141464019448592),
 Value(data=0.7777402278960779),
 Value(data=0.4752302694998408),
 Value(data=0.2473189789887785)]

In [24]:
n.layers[0].neurons[0].w[0].grad    #特定神经元的特定权重的梯度是正的，可知：对loss的影响也是正的：
#若增加这个权重，那么loss将会增大

0.0

据此对每个参数（即w）进行微调，以减小loss

In [33]:
for k in range(50):
    #forward pass
    ypred = [n(x) for x in xs]
    loss = sum((yout - ygt)**2 for ygt,yout in zip(ys, ypred))
    
    #backward pass
    for p in n.parameters():
        p.grad = 0.0
    loss.backward()
    
    #update 
    for p in n.parameters():
        p.data += -0.08 * p.grad  #根据上面的例子对参数进行微调
    print(k,loss.data)

0 41.00309014984604
1 41.00307899752153
2 41.00306792760993
3 41.003056939192035
4 41.003046031362295
5 41.00303520322866
6 41.00302445391219
7 41.00301378254689
8 41.003003188279486
9 41.00299267026917
10 41.00298222768734
11 41.00297185971746
12 41.00296156555476
13 41.002951344406036
14 41.00294119548947
15 41.00293111803448
16 41.00292111128137
17 41.002911174481255
18 41.00290130689587
19 41.002891507797294
20 41.002881776467866
21 41.00287211219995
22 41.002862514295764
23 41.002852982067246
24 41.00284351483582
25 41.00283411193227
26 41.002824772696634
27 41.002815496477915
28 41.002806282634054
29 41.0027971305317
30 41.0027880395461
31 41.002779009060916
32 41.00277003846814
33 41.002761127167915
34 41.00275227456838
35 41.00274348008559
36 41.00273474314336
37 41.00272606317309
38 41.002717439613726
39 41.00270887191155
40 41.00270035952017
41 41.00269190190024
42 41.00268349851952
43 41.0026751488526
44 41.00266685238093
45 41.00265860859262
46 41.00265041698233
47 41.00264

In [30]:
len(n.parameters())  #n这个神经网络共有41个参数

41

In [34]:
ypred

[Value(data=0.9999515266754302),
 Value(data=-0.9997323439824924),
 Value(data=-0.9996973439917927),
 Value(data=0.9999426659318282)]