# Derivatives, basic NN

In [1]:
import numpy as np

from lib.value import Value
from lib.linear_algebra import Vector, Matrix
from lib.nn import Linear, Sigmoid, NN
from lib.metrics.losses import mean_squared_error
from lib.gd_data_loaders import BatchDataLoader, StochasticDataLoader, MiniBatchDataLoader
from lib.optimizers import SgdOptimizer, SgdWithMomentumOptimizer, AdaGradOptimizer, RmsPropOptimizer, AdamOptimizer
np.random.seed(1)

### Derivatives

In [2]:
X = 5
W = 2
b = 0
y = 12

In [3]:
Z = X*W + b
L = (Z - y)**2

dZ_dL = 2*Z - 2*y 
dW_dZ = X
dW_dL = dZ_dL * dW_dZ
db_dZ = 1
db_dL = dZ_dL * db_dZ
L, (Z,dZ_dL), (W, dW_dL), (b, db_dL)

(4, (10, -4), (2, -20), (0, -4))

In [4]:
X = Value(5)
W = Value(2)
b = Value(0)

Z = X * W + b

y = Value(12)

L = (Z - y) ** 2
L.grad = 1
L.backward()
L, Z, W, b

({909c388e, 4, 1}, {ae858423, 10, -4}, {4dd5dd1f, 2, -20}, {5ca0192f, 0, -4})

### Linear operations

In [5]:
m1 = Matrix([
    [1, 4, 1],
    [2, 3, 0]
])

m2 = Matrix([
    [1, -2, 4, 4],
    [3, 4, 6, 1],
    [5, 6, 2, 1]
])

v1 = Vector([2, 3, 4, 5]) 

print(m1.matmul(m2) + v1)

Matrix([
[{668bbe71, 20, 0}, {e0e5c082, 23, 0}, {f47df9a2, 34, 0}, {5960a435, 14, 0}],
[{9b2af12d, 13, 0}, {2796d039, 11, 0}, {60052978, 30, 0}, {e84a91ba, 16, 0}]
])


# NN

In [6]:
X = Matrix(np.random.uniform(size=(10, 5)))
y = Matrix([[1] for _ in range(10)])

def init_nn():
    return NN([
        Linear(5, 3),
        Sigmoid(),
        Linear(3, 1),
    ])

In [7]:
data_loaders = [
    BatchDataLoader(X, y),
    StochasticDataLoader(X, y),
    MiniBatchDataLoader(X, y, 2)
]
optimizer_creators = [
    lambda nn: SgdOptimizer(nn, 0.01),
    lambda nn: SgdWithMomentumOptimizer(nn, 0.01, 0.9),
    lambda nn: AdaGradOptimizer(nn, 0.01),
    lambda nn: RmsPropOptimizer(nn, 0.01, 0.9),
    lambda nn: AdamOptimizer(nn, 0.01, 0.95, 0.95),
]

for data_loader in data_loaders: 
    for optimizer_creator in optimizer_creators:
        nn = init_nn()
        optimizer = optimizer_creator(nn)
        print(f"gradient descent: {data_loader.__class__} | optimizer: {optimizer.__class__}")
        for i in range(30):
            X_b, y_b = data_loader.get_batch()
            out = nn(X_b)
            loss = mean_squared_error(y_b, out)
            print(f"{i} {loss.data:.2f}")
    
            optimizer.step(loss)
    
        out = nn(X) 
        loss = mean_squared_error(y, out)
        print(f"loss: {loss.data:.2f}") 

gradient descent: <class 'lib.gd_data_loaders.BatchDataLoader'> | optimizer: <class 'lib.optimizers.SgdOptimizer'>
0 0.29
1 0.27
2 0.26
3 0.25
4 0.24
5 0.22
6 0.21
7 0.21
8 0.20
9 0.19
10 0.18
11 0.18
12 0.17
13 0.16
14 0.16
15 0.16
16 0.15
17 0.15
18 0.14
19 0.14
20 0.14
21 0.13
22 0.13
23 0.13
24 0.13
25 0.12
26 0.12
27 0.12
28 0.12
29 0.12
loss: 0.12
gradient descent: <class 'lib.gd_data_loaders.BatchDataLoader'> | optimizer: <class 'lib.optimizers.SgdWithMomentumOptimizer'>
0 0.19
1 0.19
2 0.18
3 0.18
4 0.18
5 0.17
6 0.16
7 0.16
8 0.15
9 0.14
10 0.14
11 0.13
12 0.12
13 0.12
14 0.11
15 0.10
16 0.10
17 0.09
18 0.09
19 0.08
20 0.08
21 0.07
22 0.07
23 0.06
24 0.06
25 0.06
26 0.06
27 0.05
28 0.05
29 0.05
loss: 0.05
gradient descent: <class 'lib.gd_data_loaders.BatchDataLoader'> | optimizer: <class 'lib.optimizers.AdaGradOptimizer'>
0 0.35
1 0.32
2 0.29
3 0.28
4 0.26
5 0.25
6 0.24
7 0.23
8 0.22
9 0.21
10 0.20
11 0.19
12 0.19
13 0.18
14 0.17
15 0.17
16 0.16
17 0.16
18 0.15
19 0.15
20 0.14

In [8]:
nn(X), y

(Matrix([
 [{c4c98eb2, 0.88, 0}],
 [{be860ea0, 1.28, 0}],
 [{2a43abf3, 0.82, 0}],
 [{11b7f92f, 1.01, 0}],
 [{cbc91cac, 1.35, 0}],
 [{bd4f93fc, 1.71, 0}],
 [{a4f023f9, 1.23, 0}],
 [{6b432c87, 1.19, 0}],
 [{d875bdca, 0.95, 0}],
 [{72b52140, 0.76, 0}]
 ]),
 Matrix([
 [{3fdde4d9, 1, 33.52}],
 [{bceb379e, 1, 28.53}],
 [{ea7a9b40, 1, 25.91}],
 [{cd734ad7, 1, 31.81}],
 [{d1b516a2, 1, 25.62}],
 [{be61bca7, 1, 38.21}],
 [{85d8b57d, 1, 21.08}],
 [{b7485d8b, 1, 28.84}],
 [{088a738f, 1, 27.47}],
 [{42a03a4b, 1, 27.19}]
 ]))