In [1]:
import numpy as np

## Variable Like a Tag

> 变量像个标签

In [2]:
class Variable:
    def __init__(self, data):
        self.data = data
        self.grad = None
        self.creator = None

    def set_creator(self, func):
        self.creator = func

    def backward(self):
        f = self.creator  # 1. Get a function
        if f is not None:
            x = f.input  # 2. Get the function's input
            x.grad = f.backward(self.grad)  # 3. Call the function's backward
            x.backward()

In [3]:
class Function:
    def __call__(self, input_variable: Variable):
        input_data = input_variable.data
        output_data = self.forward(input_data)
        output_variable = Variable(output_data)
        output_variable.set_creator(self)  # Set parent(function)
        self.input: Variable = input_variable
        self.output: Variable = output_variable  # Set output
        return output_variable
    
    def forward(self, input_data):
        raise NotImplementedError()
    
    def backward(self, output_data):
        """
        
        :param output_data: gradient of the output with respect to input data.
        :return: 
        """
        raise NotImplementedError()

In [4]:
class Square(Function):
    def forward(self, input_data):
        """
        
        :param input_data: any data, or `x`
        :return: output, or `y`
        """
        output = input_data ** 2
        return output
    
    def backward(self, output_data):
        """
        
        :param output_data: `gy`
        :return: grad_input, or `gx`
        """
        input_data = self.input.data
        grad_input = 2 * input_data * output_data
        return grad_input

In [5]:
class Exp(Function):
    def forward(self, input_data):
        return np.exp(input_data)
    
    def backward(self, output_data):
        input_data = self.input.data
        grad_input = np.exp(input_data) * output_data
        return grad_input

In [6]:
A = Square()
B = Exp()
C = Square()

x = Variable(np.array(0.5))
a = A(x)
b = B(a)
y = C(b)

In [7]:
y.grad = np.array(1.0)
y.backward()
print(x.grad)

3.297442541400256


In [8]:
def numerical_diff(f, x: Variable, eps=1e-6):
    x0 = Variable(x.data - eps)
    x1 = Variable(x.data + eps)
    y0 = f(x0)
    y1 = f(x1)
    return (y1.data - y0.data) / (2 * eps)

In [9]:
f = Square()
x_nd = Variable(np.array(2.0))
dy = numerical_diff(f, x_nd)
print(dy)

4.000000000115023


In [10]:
def f(x):
    A = Square()
    B = Exp()
    C = Square()
    return C(B(A(x)))

In [11]:
x_nd2 = Variable(np.array(0.5))
dy = numerical_diff(f, x_nd2)
print(dy)

3.297442541350115


In [12]:
dy - x.grad < 1e-6

True

> 数值微分结果与反向传播结果十分接近