##  Optimization manual steps

In [1]:
import random
import numpy as np
import pprint

from mycrograd_debug.engine_debug import Value
from mycrograd_debug.nn_debug import MLP
from mycrograd_debug.drawviz_debug import draw_dot, draw_nn, print_all_values

np.random.seed(1337)
random.seed(1337)
number_of_iterations=10
pp = pprint.PrettyPrinter(indent=4)
global activation


#### Multi Layer Perceptron 1-2-1

In [2]:
# loss function single MLP
def loss_single(target, output):
    total_loss = (output - target)*(output - target)
    total_loss.type="l"
    return total_loss

In [3]:
# initialize a model
nin = 1  # number of inputs
nout = 1  # number of outputs
Value.value_counter = 0

model = MLP(nin, [2, nout], weightsinit=2, lastReLU=False, debug_bw=False)
xinumbers = list(range(4, 4 + nin))
xinput = [Value(x, type="i") for x in xinumbers]
print("inputs= ", xinput[0].data)
print("parameters")
pp.pprint(model.parameters())

Module nn MLP: structure [1, 2, 1]
inputs=  4
parameters
[   Value(name=v001,layernumber=L1,neuronnumber=N1,weightnumber=,type=w1,data=0.5, grad=0),
    Value(name=v002,layernumber=L1,neuronnumber=N1,weightnumber=,type=b,data=0, grad=0),
    Value(name=v003,layernumber=L1,neuronnumber=N2,weightnumber=,type=w1,data=0.5, grad=0),
    Value(name=v004,layernumber=L1,neuronnumber=N2,weightnumber=,type=b,data=0, grad=0),
    Value(name=v005,layernumber=L2,neuronnumber=N1,weightnumber=,type=w1,data=0.6000000000000001, grad=0),
    Value(name=v006,layernumber=L2,neuronnumber=N1,weightnumber=,type=w2,data=1.2000000000000002, grad=0),
    Value(name=v007,layernumber=L2,neuronnumber=N1,weightnumber=,type=b,data=0, grad=0)]


In [4]:
def act():
    #### forward pass0
    global activation
    activation = model(xinput)

def zeroGrad():
    model.zero_grad()
    for i in xinput:
        i.grad=0
    print("zero'd gradients")
    pp.pprint(model.parameters())

def back():
    #### backward pass
    global activation
    activation.backward()
    print("parameters after backpass")
    pp.pprint(model.parameters())

def upd():
    #### update
    for p in model.parameters():
        p.data += -0.1 * p.grad
    print("updated parameters")
    pp.pprint(model.parameters())

In [5]:
# optimization
makeimg=False
for k in range(number_of_iterations):
    print("start step %d" %k)
    # pp.pprint(model.parameters())

    # forward
    total_loss = act()
    print("step %d loss calc" %k)
    print_all_values(activation)
    if makeimg:
        dot=draw_dot(activation)
        dot.render("images/opt_01_step%d_1loss" % k , format="svg", view=True)

    # backward
    model.zero_grad()
    for i in xinput:
        i.grad=0
    print("step %d zero grad" %k)
    print_all_values(activation)
    if makeimg:
        dot=draw_dot(activation)
        dot.render("images/opt_01_step%d_2zero" % k , format="svg", view=True)
    activation.backward()
    print("step %d backward" %k)
    print_all_values(activation)
    if makeimg:
        dot=draw_dot(activation)
        dot.render("images/opt_01_step%d_3back" % k , format="svg", view=True)
    
    # update
    # global activation
    # for p in model.parameters():
    #     p.data += -0.1 * p.grad
    # print("updated parameters")
    # pp.pprint(model.parameters())    

    print(f"step {k} loss {activation.data}")


start step 0
step 0 loss calc
 name ty   data   grad
 v001 w1   0.50   0.00
 v002  b   0.00   0.00
 v003 w1   0.50   0.00
 v004  b   0.00   0.00
 v005 w1   0.60   0.00
 v006 w2   1.20   0.00
 v007  b   0.00   0.00
 v008  i   4.00   0.00
 v009      2.00   0.00
 v010  a   2.00   0.00
 v011      2.00   0.00
 v012  a   2.00   0.00
 v013      1.20   0.00
 v014      1.20   0.00
 v015      2.40   0.00
 v016  a   3.60   0.00
step 0 zero grad
 name ty   data   grad
 v001 w1   0.50   0.00
 v002  b   0.00   0.00
 v003 w1   0.50   0.00
 v004  b   0.00   0.00
 v005 w1   0.60   0.00
 v006 w2   1.20   0.00
 v007  b   0.00   0.00
 v008  i   4.00   0.00
 v009      2.00   0.00
 v010  a   2.00   0.00
 v011      2.00   0.00
 v012  a   2.00   0.00
 v013      1.20   0.00
 v014      1.20   0.00
 v015      2.40   0.00
 v016  a   3.60   0.00
step 0 backward
 name ty   data   grad
 v001 w1   0.50   2.40
 v002  b   0.00   0.60
 v003 w1   0.50   4.80
 v004  b   0.00   1.20
 v005 w1   0.60   2.00
 v006 w2   1.20  

In [6]:
# # Optimization Step
# def opt_step():
#     loss=act()
#     zeroGrad()
#     back()
#     upd()
#     return loss