In [36]:
import numpy as np

## Variable Like a Tag

> 变量像个标签

In [37]:
class Variable:
    def __init__(self, data):
        self.data = data
        self.grad = None

In [38]:
class Function:
    def __call__(self, input_variable: Variable):
        input_data = input_variable.data
        output_data = self.forward(input_data)
        output_variable = Variable(output_data)
        self.input: Variable = input_variable
        return output_variable
    
    def forward(self, input_data):
        raise NotImplementedError()
    
    def backward(self, output_data):
        """
        
        :param output_data: gradient of the output with respect to input data.
        :return: 
        """
        raise NotImplementedError()

In [39]:
class Square(Function):
    def forward(self, input_data):
        """
        
        :param input_data: any data, or `x`
        :return: output, or `y`
        """
        output = input_data ** 2
        return output
    
    def backward(self, output_data):
        """
        
        :param output_data: `gy`
        :return: grad_input, or `gx`
        """
        input_data = self.input.data
        grad_input = 2 * input_data * output_data
        return grad_input

In [40]:
class Exp(Function):
    def forward(self, input_data):
        return np.exp(input_data)
    
    def backward(self, output_data):
        input_data = self.input.data
        grad_input = np.exp(input_data) * output_data
        return grad_input

In [41]:
A = Square()
B = Exp()
C = Square()

x = Variable(np.array(0.5))
a = A(x)
b = B(a)
y = C(b)

In [42]:
y.grad = np.array(1.0)
print(y.grad)
b.grad = C.backward(y.grad)
print(b.grad)
a.grad = B.backward(b.grad)
print(a.grad)
x.grad = A.backward(a.grad)
print(x.grad)

1.0
2.568050833375483
3.297442541400256
3.297442541400256


In [43]:
def numerical_diff(f, x: Variable, eps=1e-6):
    x0 = Variable(x.data - eps)
    x1 = Variable(x.data + eps)
    y0 = f(x0)
    y1 = f(x1)
    return (y1.data - y0.data) / (2 * eps)

In [44]:
f = Square()
x_nd = Variable(np.array(2.0))
dy = numerical_diff(f, x_nd)
print(dy)

4.000000000115023


In [45]:
def f(x):
    A = Square()
    B = Exp()
    C = Square()
    return C(B(A(x)))

In [46]:
x_nd2 = Variable(np.array(0.5))
dy = numerical_diff(f, x_nd2)
print(dy)

3.297442541350115


In [47]:
dy - x.grad < 1e-6

True

> 数值微分结果与反向传播结果十分接近