# Derivatives, basic NN

In [1]:
import numpy as np

from lib.value import Value
from lib.linear_algebra import Vector, Matrix
from lib.nn import Linear, Sigmoid, NN
from lib.metrics.losses import mean_squared_error
from lib.gd_data_loaders import BatchDataLoader, StochasticDataLoader, MiniBatchDataLoader
np.random.seed(1)

### Derivatives

In [2]:
X = 5
W = 2
b = 0
y = 12

In [3]:
Z = X*W + b
L = (Z - y)**2

dZ_dL = 2*Z - 2*y 
dW_dZ = X
dW_dL = dZ_dL * dW_dZ
db_dZ = 1
db_dL = dZ_dL * db_dZ
L, (Z,dZ_dL), (W, dW_dL), (b, db_dL)

(4, (10, -4), (2, -20), (0, -4))

In [4]:
X = Value(5)
W = Value(2)
b = Value(0)

Z = X * W + b

y = Value(12)

L = (Z - y) ** 2
L.grad = 1
L.backward()
L, Z, W, b

({5d2f70d4, 4, 1}, {b927f800, 10, -4}, {2e7f1810, 2, -20}, {f5588b85, 0, -4})

### Linear operations

In [5]:
m1 = Matrix([
    [1, 4, 1],
    [2, 3, 0]
])

m2 = Matrix([
    [1, -2, 4, 4],
    [3, 4, 6, 1],
    [5, 6, 2, 1]
])

v1 = Vector([2, 3, 4, 5]) 

print(m1.matmul(m2) + v1)

Matrix([
[{db289696, 20, 0}, {920f3112, 23, 0}, {162b6eff, 34, 0}, {2f6676da, 14, 0}],
[{0504310f, 13, 0}, {ddd06c4c, 11, 0}, {a58751fd, 30, 0}, {5a81b726, 16, 0}]
])


# NN

In [6]:
X = Matrix(np.random.uniform(size=(10, 5)))
y = Matrix([[1] for _ in range(10)])

nn = NN([
    Linear(5, 3),
    Sigmoid(),
    Linear(3, 1),
])

In [7]:
for data_loader in [
    BatchDataLoader(X, y),
    StochasticDataLoader(X, y),
    MiniBatchDataLoader(X, y, 2)
]:
    print(data_loader.__class__)
    for i in range(30):
        X_b, y_b = data_loader.get_batch()
        out = nn(X_b)
        loss = mean_squared_error(y_b, out)
        print(f"{i} {loss.data:.2f}")
        
        for p in nn.params():
            for v in p.all_values():
                v.zero_grad()
        loss.grad = 1
        loss.backward()
    
        for p in nn.params():
            for v in p.all_values():
                v.data -= 0.1 * v.grad

    out = nn(X) 
    loss = mean_squared_error(y, out)
    print(f"loss: {loss.data:.2f}") 

<class 'lib.gd_data_loaders.BatchDataLoader'>
0 0.29
1 0.17
2 0.12
3 0.11
4 0.10
5 0.10
6 0.10
7 0.09
8 0.09
9 0.09
10 0.09
11 0.09
12 0.08
13 0.08
14 0.08
15 0.08
16 0.08
17 0.08
18 0.07
19 0.07
20 0.07
21 0.07
22 0.07
23 0.07
24 0.07
25 0.06
26 0.06
27 0.06
28 0.06
29 0.06
loss: 0.06
<class 'lib.gd_data_loaders.StochasticDataLoader'>
0 0.03
1 0.03
2 0.02
3 0.02
4 0.02
5 0.06
6 0.36
7 0.06
8 0.18
9 0.00
10 0.03
11 0.02
12 0.02
13 0.01
14 0.02
15 0.06
16 0.30
17 0.05
18 0.14
19 0.00
20 0.03
21 0.01
22 0.01
23 0.01
24 0.01
25 0.06
26 0.25
27 0.04
28 0.11
29 0.00
loss: 0.03
<class 'lib.gd_data_loaders.MiniBatchDataLoader'>
0 0.05
1 0.01
2 0.10
3 0.04
4 0.02
5 0.06
6 0.06
7 0.01
8 0.05
9 0.01
10 0.01
11 0.00
12 0.01
13 0.07
14 0.06
15 0.04
16 0.01
17 0.03
18 0.02
19 0.03
20 0.00
21 0.01
22 0.01
23 0.08
24 0.03
25 0.04
26 0.01
27 0.00
28 0.03
29 0.04
loss: 0.02


In [8]:
nn(X), y

(Matrix([
 [{0d02806b, 0.85, 0}],
 [{c484c8f5, 0.99, 0}],
 [{5892bda6, 0.91, 0}],
 [{af9a2b36, 0.98, 0}],
 [{0877aa81, 0.9, 0}],
 [{66d21a84, 0.73, 0}],
 [{8aa1d8da, 1.23, 0}],
 [{37518e9f, 1.16, 0}],
 [{0e969480, 0.85, 0}],
 [{8dc1bbc4, 0.97, 0}]
 ]),
 Matrix([
 [{1ec51a15, 1, 2.93}],
 [{b12f9b2f, 1, -2.22}],
 [{f5839b3b, 1, 2.09}],
 [{cd80a52f, 1, -1.7}],
 [{03286ad1, 1, 2.28}],
 [{c03da865, 1, 4.84}],
 [{b94a72d7, 1, -8.49}],
 [{bd7659a8, 1, -5.47}],
 [{b1d3c5a7, 1, 4.83}],
 [{99dc7b40, 1, 0.13}]
 ]))