In [13]:
import toydl
import math

from toydl.core.scalar import Scalar
from toydl.core.scalar.bp import topological_sort
from toydl.network.mlp import MLPConfig, MLPBinaryClassifyNetFactory, ActivationType

In [2]:
toydl.__version__

'0.2.0'

## Scalar derivative

$$ y = f(x_1, x_2) = 3x_1 + 5x_2 $$

$$ f'(x_1)  = 3$$
$$ f'(x_2)  = 5$$

In [27]:
x1 = Scalar(1.0, name="x1")
x2 = Scalar(2.0, name="x2")

y = Scalar(3, name="x1_coef") * x1 + Scalar(5, name="x2_coef") * x2
y.name = "y"

y

Scalar(name=y, unique_id=134, data=13.0000, derivative=None)

In [28]:
topological_sort(y)

[Scalar(name=y, unique_id=134, data=13.0000, derivative=None),
 Scalar(name=133, unique_id=133, data=10.0000, derivative=None),
 Scalar(name=x2, unique_id=129, data=2.0000, derivative=None),
 Scalar(name=x2_coef, unique_id=132, data=5.0000, derivative=None),
 Scalar(name=131, unique_id=131, data=3.0000, derivative=None),
 Scalar(name=x1, unique_id=128, data=1.0000, derivative=None),
 Scalar(name=x1_coef, unique_id=130, data=3.0000, derivative=None)]

In [22]:
y.backward(d_output=1.0)

assert math.isclose(x1.derivative, 3.0)
assert math.isclose(x2.derivative, 5.0)

In [23]:
x1

Scalar(name=x1, unique_id=121, data=1.0000, derivative=3.0)

In [24]:
x2

Scalar(name=x2, unique_id=122, data=2.0000, derivative=5.0)

[Scalar(name=y, unique_id=127, data=13.0000, derivative=None),
 Scalar(name=126, unique_id=126, data=10.0000, derivative=None),
 Scalar(name=x2, unique_id=122, data=2.0000, derivative=5.0),
 Scalar(name=125, unique_id=125, data=5.0000, derivative=2.0),
 Scalar(name=124, unique_id=124, data=3.0000, derivative=None),
 Scalar(name=x1, unique_id=121, data=1.0000, derivative=3.0),
 Scalar(name=123, unique_id=123, data=3.0000, derivative=1.0)]

## MLP

In [15]:
mlp_config = MLPConfig(
    in_size=2,
    out_size=1,
    hidden_layer_size=3,
    hidden_layer_num=1,
    hidden_activation=ActivationType.SIGMOID,
)
mlp = MLPBinaryClassifyNetFactory(mlp_config)

In [16]:
for p in mlp.named_parameters():
    print(p)

('layer_input_hidden_0.weight_0_0', Scalar(name=weight_0_0, unique_id=71, data=-0.3297, derivative=None))
('layer_input_hidden_0.weight_0_1', Scalar(name=weight_0_1, unique_id=72, data=-0.2846, derivative=None))
('layer_input_hidden_0.weight_0_2', Scalar(name=weight_0_2, unique_id=73, data=0.4919, derivative=None))
('layer_input_hidden_0.weight_1_0', Scalar(name=weight_1_0, unique_id=74, data=-0.5544, derivative=None))
('layer_input_hidden_0.weight_1_1', Scalar(name=weight_1_1, unique_id=75, data=-0.4568, derivative=None))
('layer_input_hidden_0.weight_1_2', Scalar(name=weight_1_2, unique_id=76, data=0.6295, derivative=None))
('layer_input_hidden_0.bias_0', Scalar(name=bias_0, unique_id=77, data=0.9395, derivative=None))
('layer_input_hidden_0.bias_1', Scalar(name=bias_1, unique_id=78, data=0.5333, derivative=None))
('layer_input_hidden_0.bias_2', Scalar(name=bias_2, unique_id=79, data=-0.6797, derivative=None))
('layer_hidden_output.weight_0_0', Scalar(name=weight_0_0, unique_id=80, d

In [17]:
x1 = Scalar(1.0, name="x1")
x2 = Scalar(2.0, name="x2")
xs = (x1, x2)
y = mlp.forward(xs)
y.name = "y"

y

Scalar(name=y, unique_id=120, data=0.5772, derivative=None)

In [18]:
y.backward(d_output=1.0)

In [19]:
for p in mlp.named_parameters():
    print(p)

('layer_input_hidden_0.weight_0_0', Scalar(name=weight_0_0, unique_id=71, data=-0.3297, derivative=0.023))
('layer_input_hidden_0.weight_0_1', Scalar(name=weight_0_1, unique_id=72, data=-0.2846, derivative=-0.0304))
('layer_input_hidden_0.weight_0_2', Scalar(name=weight_0_2, unique_id=73, data=0.4919, derivative=-0.0133))
('layer_input_hidden_0.weight_1_0', Scalar(name=weight_1_0, unique_id=74, data=-0.5544, derivative=0.0459))
('layer_input_hidden_0.weight_1_1', Scalar(name=weight_1_1, unique_id=75, data=-0.4568, derivative=-0.0608))
('layer_input_hidden_0.weight_1_2', Scalar(name=weight_1_2, unique_id=76, data=0.6295, derivative=-0.0266))
('layer_input_hidden_0.bias_0', Scalar(name=bias_0, unique_id=77, data=0.9395, derivative=0.0459))
('layer_input_hidden_0.bias_1', Scalar(name=bias_1, unique_id=78, data=0.5333, derivative=-0.0608))
('layer_input_hidden_0.bias_2', Scalar(name=bias_2, unique_id=79, data=-0.6797, derivative=-0.0266))
('layer_hidden_output.weight_0_0', Scalar(name=weig

In [20]:
x1

Scalar(name=x1, unique_id=84, data=1.0000, derivative=-0.0055)