In [1]:
import toydl
import math

from toydl.core.scalar import Scalar
from toydl.core.scalar.bp import topological_sort
from toydl.network.mlp import MLPConfig, MLPBinaryClassifyNetFactory, ActivationType

In [2]:
toydl.__version__

'0.2.0'

## Simple function backward

$$ y = f(x_1, x_2) = 3x_1 + 5x_2 $$

$$ f'(x_1)  = 3$$
$$ f'(x_2)  = 5$$

In [15]:
x1 = Scalar(1.0, name="x1")
x1.requires_grad_()
x2 = Scalar(2.0, name="x2")
x2.requires_grad_()

y = Scalar(3, name="x1_coef") * x1 + Scalar(5, name="x2_coef") * x2
y.name = "y"

y

Scalar(name=y, unique_id=74, data=13.0000, derivative=None)

In [16]:
topological_sort(y)

[Scalar(name=y, unique_id=74, data=13.0000, derivative=None),
 Scalar(name=73, unique_id=73, data=10.0000, derivative=None),
 Scalar(name=x2, unique_id=69, data=2.0000, derivative=None),
 Scalar(name=71, unique_id=71, data=3.0000, derivative=None),
 Scalar(name=x1, unique_id=68, data=1.0000, derivative=None)]

注意到，除了我们指定的一些 `Scalar` 之外，这里存在一些存储中间计算结果的 `Scalar`.

In [17]:
y.backward(d_output=1.0)

assert math.isclose(x1.derivative, 3.0)
assert math.isclose(x2.derivative, 5.0)

In [18]:
x1

Scalar(name=x1, unique_id=68, data=1.0000, derivative=3.0)

In [19]:
x2

Scalar(name=x2, unique_id=69, data=2.0000, derivative=5.0)

## MLP backward

In [21]:
mlp_config = MLPConfig(
    in_size=2,
    out_size=1,
    hidden_layer_size=3,
    hidden_layer_num=1,
    hidden_activation=ActivationType.SIGMOID,
)
mlp = MLPBinaryClassifyNetFactory(mlp_config)

In [22]:
for p in mlp.named_parameters():
    print(p)

('layer_input_hidden_0.weight_0_0', Scalar(name=weight_0_0, unique_id=75, data=0.6376, derivative=None))
('layer_input_hidden_0.weight_0_1', Scalar(name=weight_0_1, unique_id=76, data=0.1573, derivative=None))
('layer_input_hidden_0.weight_0_2', Scalar(name=weight_0_2, unique_id=77, data=-0.7400, derivative=None))
('layer_input_hidden_0.weight_1_0', Scalar(name=weight_1_0, unique_id=78, data=0.4457, derivative=None))
('layer_input_hidden_0.weight_1_1', Scalar(name=weight_1_1, unique_id=79, data=0.8250, derivative=None))
('layer_input_hidden_0.weight_1_2', Scalar(name=weight_1_2, unique_id=80, data=0.7804, derivative=None))
('layer_input_hidden_0.bias_0', Scalar(name=bias_0, unique_id=81, data=0.4777, derivative=None))
('layer_input_hidden_0.bias_1', Scalar(name=bias_1, unique_id=82, data=0.2580, derivative=None))
('layer_input_hidden_0.bias_2', Scalar(name=bias_2, unique_id=83, data=0.1386, derivative=None))
('layer_hidden_output.weight_0_0', Scalar(name=weight_0_0, unique_id=84, data=

In [23]:
x1 = Scalar(1.0, name="x1")
x2 = Scalar(2.0, name="x2")
xs = (x1, x2)
y = mlp.forward(xs)
y.name = "y"

y

Scalar(name=y, unique_id=124, data=0.9191, derivative=None)

In [24]:
y.backward(d_output=1.0)

In [25]:
for p in mlp.named_parameters():
    print(p)

('layer_input_hidden_0.weight_0_0', Scalar(name=weight_0_0, unique_id=75, data=0.6376, derivative=-0.0006))
('layer_input_hidden_0.weight_0_1', Scalar(name=weight_0_1, unique_id=76, data=0.1573, derivative=0.0031))
('layer_input_hidden_0.weight_0_2', Scalar(name=weight_0_2, unique_id=77, data=-0.7400, derivative=0.002))
('layer_input_hidden_0.weight_1_0', Scalar(name=weight_1_0, unique_id=78, data=0.4457, derivative=-0.0012))
('layer_input_hidden_0.weight_1_1', Scalar(name=weight_1_1, unique_id=79, data=0.8250, derivative=0.0062))
('layer_input_hidden_0.weight_1_2', Scalar(name=weight_1_2, unique_id=80, data=0.7804, derivative=0.0039))
('layer_input_hidden_0.bias_0', Scalar(name=bias_0, unique_id=81, data=0.4777, derivative=-0.0012))
('layer_input_hidden_0.bias_1', Scalar(name=bias_1, unique_id=82, data=0.2580, derivative=0.0062))
('layer_input_hidden_0.bias_2', Scalar(name=bias_2, unique_id=83, data=0.1386, derivative=0.0039))
('layer_hidden_output.weight_0_0', Scalar(name=weight_0_0,

In [26]:
x1

Scalar(name=x1, unique_id=88, data=1.0000, derivative=None)

In [27]:
x2

Scalar(name=x2, unique_id=89, data=2.0000, derivative=None)