In [2]:
from nn import MLP
import random as r

In [3]:
# creating a tiny dataset
size = 8192
x = [[r.uniform(-1,1) for _ in range(4)] for _ in range(size)]
y = []
for features in x:
    # Create a pattern: y = 2*x1 - 0.5*x2 + 3*x3 - x4 + noise
    y_value = (
        2 * features[0]
        - 0.5 * features[1]
        + 3 * features[2]
        - features[3]
        + r.uniform(-0.1, 0.1)  # Add some noise
    )
    y.append(y_value)

In [4]:
# initialize an MLP with 4 input neurons, hidden_dim of 32, and 1 output neuron
model = MLP(len(x[0]), len(x[0])*8, 1)
model.parameters()

[Value(data=0.591, grad=0.000),
 Value(data=0.686, grad=0.000),
 Value(data=0.750, grad=0.000),
 Value(data=0.171, grad=0.000),
 Value(data=0.000, grad=0.000),
 Value(data=-0.970, grad=0.000),
 Value(data=0.814, grad=0.000),
 Value(data=0.006, grad=0.000),
 Value(data=0.429, grad=0.000),
 Value(data=0.000, grad=0.000),
 Value(data=-0.732, grad=0.000),
 Value(data=-0.847, grad=0.000),
 Value(data=-0.311, grad=0.000),
 Value(data=0.203, grad=0.000),
 Value(data=0.000, grad=0.000),
 Value(data=-0.978, grad=0.000),
 Value(data=0.366, grad=0.000),
 Value(data=-0.787, grad=0.000),
 Value(data=-0.731, grad=0.000),
 Value(data=0.000, grad=0.000),
 Value(data=-0.823, grad=0.000),
 Value(data=0.980, grad=0.000),
 Value(data=-0.180, grad=0.000),
 Value(data=0.590, grad=0.000),
 Value(data=0.000, grad=0.000),
 Value(data=0.297, grad=0.000),
 Value(data=0.679, grad=0.000),
 Value(data=0.606, grad=0.000),
 Value(data=-0.383, grad=0.000),
 Value(data=0.000, grad=0.000),
 Value(data=0.924, grad=0.000)

In [5]:
# initial loss on test set, which will just be the final batch in the dataset
batch_size = 16
x_test = x[-batch_size:]
y_test = y[-batch_size:]

# running the model over the dataset
ypred = []
for j in range(batch_size):
    ypred.append(model(x_test[j]))

# MSE loss function
loss_batch = [(yhat - ytrue)**2 for ytrue, yhat in zip(y_test, ypred)]
loss = 0.0
for k in loss_batch: # sum() doesn't work for some reason so we've gotta do it manually
    loss = loss + k
    
print(f'initial test set loss: {loss}')

initial test set loss: Value(data=174.123, grad=0.000)


In [6]:
# training loop
eta = 0.0001
for i in range(1, (size // batch_size) - batch_size): # 1 bc we already did one step. -batch_size for test set
    x_batch = x[i*batch_size:i*batch_size + batch_size]
    y_batch = y[i*batch_size:i*batch_size + batch_size]

    ## forward pass
    # running the model over the dataset
    ypred = []
    for j in range(batch_size):
        ypred.append(model(x_batch[j]))
    
    # MSE loss function
    loss_batch = [(yhat - ytrue)**2 for ytrue, yhat in zip(y_batch, ypred)]
    loss = 0.0
    for k in loss_batch: # sum() doesn't work for some reason so we've gotta do it manually
        loss = loss + k
    print(f'step {i} loss: {loss}')

    ## backward pass
    #set params to 0
    for p in model.parameters():
        p.grad = 0.0
    # clac gradients
    loss.backward()
    # performing a step of SGD
    for p in model.parameters():
        p.data += -eta * p.grad

step 1 loss: Value(data=230.156, grad=0.000)
step 2 loss: Value(data=278.105, grad=0.000)
step 3 loss: Value(data=191.795, grad=0.000)
step 4 loss: Value(data=138.799, grad=0.000)
step 5 loss: Value(data=77.813, grad=0.000)
step 6 loss: Value(data=93.909, grad=0.000)
step 7 loss: Value(data=172.336, grad=0.000)
step 8 loss: Value(data=182.626, grad=0.000)
step 9 loss: Value(data=147.849, grad=0.000)
step 10 loss: Value(data=137.614, grad=0.000)
step 11 loss: Value(data=88.470, grad=0.000)
step 12 loss: Value(data=194.983, grad=0.000)
step 13 loss: Value(data=152.301, grad=0.000)
step 14 loss: Value(data=161.486, grad=0.000)
step 15 loss: Value(data=108.765, grad=0.000)
step 16 loss: Value(data=145.232, grad=0.000)
step 17 loss: Value(data=100.724, grad=0.000)
step 18 loss: Value(data=71.252, grad=0.000)
step 19 loss: Value(data=171.571, grad=0.000)
step 20 loss: Value(data=155.197, grad=0.000)
step 21 loss: Value(data=162.979, grad=0.000)
step 22 loss: Value(data=142.261, grad=0.000)
s

In [7]:
# final loss on test set
x_test = x[-batch_size:]
y_test = y[-batch_size:]

# running the model over the dataset
ypred = []
for j in range(batch_size):
    ypred.append(model(x_test[j]))

# MSE loss function
loss_batch = [(yhat - ytrue)**2 for ytrue, yhat in zip(y_test, ypred)]
loss = 0.0
for k in loss_batch: # sum() doesn't work for some reason so we've gotta do it manually
    loss = loss + k
print(f'final test loss: {loss}')

final test loss: Value(data=2.415, grad=0.000)


In [14]:
model.parameters()

[Value(data=0.591, grad=0.000),
 Value(data=0.686, grad=0.000),
 Value(data=0.750, grad=0.000),
 Value(data=0.171, grad=0.000),
 Value(data=0.000, grad=0.000),
 Value(data=-0.970, grad=0.000),
 Value(data=0.788, grad=-0.015),
 Value(data=-0.152, grad=1.462),
 Value(data=0.447, grad=0.997),
 Value(data=0.032, grad=-0.629),
 Value(data=-0.732, grad=0.000),
 Value(data=-0.745, grad=0.187),
 Value(data=-0.423, grad=-0.193),
 Value(data=0.248, grad=0.654),
 Value(data=0.054, grad=-0.757),
 Value(data=-0.978, grad=0.000),
 Value(data=0.389, grad=-0.468),
 Value(data=-0.881, grad=1.622),
 Value(data=-0.689, grad=0.716),
 Value(data=0.040, grad=-1.345),
 Value(data=-0.823, grad=0.000),
 Value(data=0.966, grad=-0.254),
 Value(data=-0.265, grad=0.944),
 Value(data=0.596, grad=0.754),
 Value(data=0.025, grad=-1.029),
 Value(data=0.297, grad=0.000),
 Value(data=0.707, grad=0.670),
 Value(data=0.839, grad=-3.749),
 Value(data=-0.472, grad=-1.008),
 Value(data=0.228, grad=-0.403),
 Value(data=0.924,