In [4]:
from nn import MLP
import random as r

ModuleNotFoundError: No module named 'micrograd'

In [3]:
# creating a tiny dataset
size = 8192
x = [[r.uniform(-1,1) for _ in range(4)] for _ in range(size)]
y = []
for features in x:
    # Create a pattern: y = 2*x1 - 0.5*x2 + 3*x3 - x4 + noise
    y_value = (
        2 * features[0]
        - 0.5 * features[1]
        + 3 * features[2]
        - features[3]
        + r.uniform(-0.1, 0.1)  # Add some noise
    )
    y.append(y_value)

In [4]:
# initialize an MLP with 4 input neurons, hidden_dim of 32, and 1 output neuron
model = MLP(len(x[0]), len(x[0])*8, 1)
model.parameters()

[Value(data=-0.985702240884134, grad=0.0),
 Value(data=0.9055957120168918, grad=0.0),
 Value(data=0.365524002542593, grad=0.0),
 Value(data=0.17442747462656394, grad=0.0),
 Value(data=0.0, grad=0.0),
 Value(data=-0.22010930200012502, grad=0.0),
 Value(data=0.8238840604535325, grad=0.0),
 Value(data=0.5740417420229842, grad=0.0),
 Value(data=0.6591262029582623, grad=0.0),
 Value(data=0.0, grad=0.0),
 Value(data=0.8458949266933498, grad=0.0),
 Value(data=-0.8380457030889297, grad=0.0),
 Value(data=0.15417534878301242, grad=0.0),
 Value(data=-0.8042162152019321, grad=0.0),
 Value(data=0.0, grad=0.0),
 Value(data=-0.8190995978058142, grad=0.0),
 Value(data=0.4405593387434992, grad=0.0),
 Value(data=0.0886371625445701, grad=0.0),
 Value(data=-0.7966443911791146, grad=0.0),
 Value(data=0.0, grad=0.0),
 Value(data=-0.18020173213800605, grad=0.0),
 Value(data=-0.1752621671020056, grad=0.0),
 Value(data=-0.8664593399070553, grad=0.0),
 Value(data=-0.5908780140845074, grad=0.0),
 Value(data=0.0,

In [5]:
# initial loss on test set, which will just be the final batch in the dataset
batch_size = 16
x_test = x[-batch_size:]
y_test = y[-batch_size:]

# running the model over the dataset
ypred = []
for j in range(batch_size):
    ypred.append(model(x_test[j]))

# MSE loss function
loss_batch = [(yhat - ytrue)**2 for ytrue, yhat in zip(y_test, ypred)]
loss = 0.0
for k in loss_batch: # sum() doesn't work for some reason so we've gotta do it manually
    loss = loss + k
    
print(f'initial test set loss: {loss}')

initial test set loss: Value(data=114.89460601008064, grad=0.0)


In [10]:
# training loop
eta = 0.0001
for i in range(1, (size // batch_size) - batch_size): # 1 bc we already did one step. -batch_size for test set
    x_batch = x[i*batch_size:i*batch_size + batch_size]
    y_batch = y[i*batch_size:i*batch_size + batch_size]

    ## forward pass
    # running the model over the dataset
    ypred = []
    for j in range(batch_size):
        ypred.append(model(x_batch[j]))
    
    # MSE loss function
    loss_batch = [(yhat - ytrue)**2 for ytrue, yhat in zip(y_batch, ypred)]
    loss = 0.0
    for k in loss_batch: # sum() doesn't work for some reason so we've gotta do it manually
        loss = loss + k
    print(f'step {i} loss: {loss}')

    ## backward pass
    #set params to 0
    for p in model.parameters():
        p.grad = 0.0
    # clac gradients
    loss.backward()
    # performing a step of SGD
    for p in model.parameters():
        p.data += -eta * p.grad

step 1 loss: Value(data=102.4861454754203, grad=0.0)
step 2 loss: Value(data=82.82057175308601, grad=0.0)
step 3 loss: Value(data=100.00733407315165, grad=0.0)
step 4 loss: Value(data=133.98432390108005, grad=0.0)
step 5 loss: Value(data=86.78229654540696, grad=0.0)
step 6 loss: Value(data=106.63538063123342, grad=0.0)
step 7 loss: Value(data=50.13581100489319, grad=0.0)
step 8 loss: Value(data=122.55818099627948, grad=0.0)
step 9 loss: Value(data=122.40644285430507, grad=0.0)
step 10 loss: Value(data=118.04609630374587, grad=0.0)
step 11 loss: Value(data=80.10721409578956, grad=0.0)
step 12 loss: Value(data=82.60633635421839, grad=0.0)
step 13 loss: Value(data=100.33185025800138, grad=0.0)
step 14 loss: Value(data=74.7824293345868, grad=0.0)
step 15 loss: Value(data=118.98993479444492, grad=0.0)
step 16 loss: Value(data=75.42351405569543, grad=0.0)
step 17 loss: Value(data=118.8394766275158, grad=0.0)
step 18 loss: Value(data=81.97656808131947, grad=0.0)
step 19 loss: Value(data=63.33

In [15]:
# final loss on test set
x_test = x[-batch_size:]
y_test = y[-batch_size:]

# running the model over the dataset
ypred = []
for j in range(batch_size):
    ypred.append(model(x_test[j]))

# MSE loss function
loss_batch = [(yhat - ytrue)**2 for ytrue, yhat in zip(y_test, ypred)]
loss = 0.0
for k in loss_batch: # sum() doesn't work for some reason so we've gotta do it manually
    loss = loss + k
print(f'final test loss: {loss}')

final test loss: Value(data=1.5825152545418568, grad=0.0)


In [18]:
model.parameters()

[Value(data=-0.9885137112017245, grad=0.056606532122643854),
 Value(data=0.9060762535548338, grad=-0.007508241012355071),
 Value(data=0.34734582642768086, grad=-0.002182098847789725),
 Value(data=0.17376199073753593, grad=0.019098846266687886),
 Value(data=-0.002900332621623437, grad=-0.07883886984272068),
 Value(data=-0.27918577942137945, grad=0.8476605020118341),
 Value(data=0.8273703587609359, grad=-1.1393260866048127),
 Value(data=0.2326768247480475, grad=1.088807727898387),
 Value(data=0.6373067818913496, grad=-0.05424270996255681),
 Value(data=-0.120899731533268, grad=-2.226895948158038),
 Value(data=0.8961667416211861, grad=-0.5691772495341849),
 Value(data=-0.8429762381978134, grad=-0.8561417915863436),
 Value(data=0.27366276877647955, grad=0.6513478670605626),
 Value(data=-0.8162255919269171, grad=-1.2154538194462228),
 Value(data=0.10192898544386289, grad=1.9181267133571152),
 Value(data=-0.7855258172395352, grad=-1.355718031831505),
 Value(data=0.433061340004016, grad=-0.461