In [1]:
# https://www.wrighters.io/using-autoreload-to-speed-up-ipython-and-jupyter-work/
%load_ext autoreload
%autoreload 2
# micograd_from_scratch_mlp_1, but by applying the modularized Classes from micrograd.nn
from micrograd.tracegraph import draw_dot
from micrograd.topo import build, findLeafNodes
from micrograd.nn import Neuron, Layer, MLP
from micrograd.engine import Value

In [2]:
# Dataset with 4 Input values, assigned each to each of the 3 Neurons of Layer 0.
# The rows in the first column are the Input values assigned to each Neuron 0, 1, 2 and 3 of Layer 0.
# The rows in the second column are the Input values assigned to each Neuron 0, 1, 2 and 3 of Layer 0.
# The rows in the third column are the Input values assigned to each Neuron 0, 1, 2 and 3 of Layer 0.
xs = [
    [2.0, 3.0, -1.0], # Example #0 
    [3.0, -1.0, 0.5], # Example #1
    [0.5, 1.0, 1.0],  # Example #2
    [1.0, 1.0, -1.0], # Example #3
]
# Desired targets for each Example #; a simple binary classifier; Also called the g(round) t(ruths)
# Values labeled gt0, gt1, gt2 and gt3 are the ground truths for each Example
ygts = [Value(1.0, _label='gt0'), Value(-1.0, _label='gt1'), Value(-1.0, _label='gt2'), Value(1.0, _label='gt3')]
# A MLP neuronal network with 3 Input value and, 3 Layers by 4x4x1 Neurons
mlp_nn1 = MLP(3, [4, 4, 1])
# WANT
# The current prediction for each Example
#   The MLP to output  1.0 given Example #0
#   The MLP to output -1.0 given Example #1
#   The MLP to output -1.0 given Example #2
#   The MLP to output  1.0 given Example #3
yspred = [(mlp_nn1(x)) for x in xs]
# Values labeled L2 | N0 | o are the ys predictions for each Example
yspred

[Value(data=0.8024582077087925, grad=0, op=tanh, label=L2|N0|o),
 Value(data=0.5413612523015063, grad=0, op=tanh, label=L2|N0|o),
 Value(data=0.35568112667755925, grad=0, op=tanh, label=L2|N0|o),
 Value(data=0.7577300512482477, grad=0, op=tanh, label=L2|N0|o)]

In [3]:
# WANT
# How do we tune the weights to better predict the desired targets?
# Calculate a single number that measures the total performance of the neural net.
# This single number is called the Loss. 
# So first, we implement the Loss function with a mean squared error Loss. The Loss is the difference between the 
# prediction and the gound truth of y. Squared, to always get a positive number.
losss = [(ypred - ygt)**2 for ygt, ypred in zip(ygts, yspred)]
# label the loss by index for each loss
for idx, loss in enumerate(losss): loss._label = f'loss{idx}'
# The overall loss is
losssum = sum(losss)
losssum._label = 'losssum'
print(f'loss for all ground truths: {losssum}')

loss for all ground truths: Value(data=4.311383315096003, grad=0, op=+, label=losssum)


In [4]:
# draw_dot of losssum before calling backward -> see doc/micograd_from_scratch_mlp_3_withlosssum_beforebackward.svg
# draw_dot(losssum)

In [5]:
losssum.backward()

In [6]:
# draw_dot of losssum after calling backward -> see doc/micograd_from_scratch_mlp_3_withlosssum_afterbackward.svg
# draw_dot(losssum)

In [7]:
# If we look at the 1st Weight of the 1st Neuron of the first Layer ...
mlp_nn1.layers[0].neurons[0].w[0]
# ... at the grad, we see that it's influence is positive
print(f'If we look at the Weight of some Neuron of some Layer: {mlp_nn1.layers[0].neurons[0].w[0]}')
print(f'... at the grad, we see that it\'s influence is ? positive or negative: {mlp_nn1.layers[0].neurons[0].w[0].grad}')

If we look at the Weight of some Neuron of some Layer: Value(data=-0.959487738115776, grad=-0.24558073915563292, op=prim, label=L0|N0|w0)
... at the grad, we see that it's influence is ? positive or negative: -0.24558073915563292


In [28]:
#
# If the Grad of this Weight of this particular Neuron of this particular Layer is positive, the influence of the Weight to 
# the Loss is also positive. So decreasing the Weight of this particular Neuron would make the loss go down.
#
# If the Grad of this Weight of this particular Neuron of this particular Layer is negative, the influence of the Weight to
# the Loss is also negative. So increasing the Weight of this particular Neuron would make the loss go down.
#
# WANT
# A convenience methode, to gather all those parameters called Weights and Biases so we can change them.
from io import StringIO
output = StringIO()
print(*mlp_nn1.parameters(), sep="\n", file=output)
print(f'Parameters of the MLP:\n{output.getvalue()}')
print(f'Number of Parameters of the MLP: {len(mlp_nn1.parameters())}')

Parameters of the MLP:
Value(data=-0.959487738115776, grad=-0.24558073915563292, op=prim, label=L0|N0|w0)
Value(data=0.375987038868441, grad=-0.47746037458081275, op=prim, label=L0|N0|w1)
Value(data=-0.41034046594409146, grad=-0.4603471973615333, op=prim, label=L0|N0|w2)
Value(data=-0.5310559751256543, grad=-0.47132442367378147, op=prim, label=L0|N0|b)
Value(data=-0.2587547667948096, grad=-11.328698606799321, op=prim, label=L0|N1|w0)
Value(data=0.0403237811106445, grad=2.849281943171405, op=prim, label=L0|N1|w1)
Value(data=0.5833304446497909, grad=-3.553646227793297, op=prim, label=L0|N1|w2)
Value(data=0.4031334249509986, grad=-4.752577074474798, op=prim, label=L0|N1|b)
Value(data=-0.08045905136012599, grad=0.5985595195990767, op=prim, label=L0|N2|w0)
Value(data=0.8449829371178346, grad=-0.18795489475913127, op=prim, label=L0|N2|w1)
Value(data=0.08749341362998475, grad=0.10041199251109707, op=prim, label=L0|N2|w2)
Value(data=0.3596692071246599, grad=0.20629667644175093, op=prim, label=