In [24]:
# More drills for making Bengio 2003 feed forward language model
#

# Backprop. using only numpy # (Thanks python-engineer!)

import numpy as np

# f = w * x
# f = 2*x
x = np.array([1,2,3,4], dtype=np.float32)
y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0

# 1 - forward
def forward(x):
    return w*x

def loss(y_hat,y):
    return ((y_hat - y)**2).mean()   # MSE

# 2 - backward

# MSE = loss = 1/N * (w*x - y)^2       d(u^2)/du * d(u)/dx
# dL/dw = d(1/N*(w*x - y)^2)          2*u * d(w*x - y)/dw
# dL/dw = (1/N)*2*(w*x - y)* x      2*(w*x - y) * x


def gradients(x,y,y_pred):
    return np.dot(2*x,(y_pred - y)).mean()
  

# Print prediction before training

print(f"pred. f(x) before training: {forward(5):.3f}")

epochs = 10
lr = 0.01

# Training loop

for i in range(epochs):

    #forward
    y_pred = forward(x)

    #loss
    l = loss(y_pred, y)

    #gradient/backward
    dldw = gradients(x,y,y_pred)

    #update weights
    w = w - lr*dldw

    if (i+1) % 1 == 0:
        print(f'epoch {i+1} : w = {w:.3f}, loss = {l:.10f}')

print(f"pred f(5) after training: {forward(5):.3f}")
  



pred. f(x) before training: 0.000
epoch 1 : w = 1.200, loss = 30.0000000000
epoch 2 : w = 1.680, loss = 4.7999992371
epoch 3 : w = 1.872, loss = 0.7680001855
epoch 4 : w = 1.949, loss = 0.1228799969
epoch 5 : w = 1.980, loss = 0.0196608342
epoch 6 : w = 1.992, loss = 0.0031456952
epoch 7 : w = 1.997, loss = 0.0005033241
epoch 8 : w = 1.999, loss = 0.0000805319
epoch 9 : w = 1.999, loss = 0.0000128844
epoch 10 : w = 2.000, loss = 0.0000020614
pred f(5) after training: 9.999


In [22]:
# Backprop. using TORCH!

import torch

# f = w * x
# f = 2*x
x = torch.tensor([1,2,3,4], dtype=torch.float32)
y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# 1 - forward
def forward(x):
    return w*x

def loss(y_hat,y):
    return ((y_hat - y)**2).mean()   # MSE


# Print prediction before training

print(f"pred. f(x) before training: {forward(5):.3f}")

epochs = 100
lr = 0.01

# Training loop

for i in range(epochs):

    #forward
    y_pred = forward(x)

    #loss
    l = loss(y_pred, y)

    #gradient/backward
    l.backward()  # dl/dw

    #update weights
    with torch.no_grad():
        w -= lr*w.grad

    w.grad.zero_()   # zero out gradients


    if (i+1) % 10 == 0:
        print(f'epoch {i+1} : w = {w:.3f}, loss = {l:.10f}')

print(f"pred f(5) after training: {forward(5):.3f}")

pred. f(x) before training: 0.000
epoch 10 : w = 1.606, loss = 1.6093916893
epoch 20 : w = 1.922, loss = 0.0623791441
epoch 30 : w = 1.985, loss = 0.0024177833
epoch 40 : w = 1.997, loss = 0.0000937108
epoch 50 : w = 1.999, loss = 0.0000036323
epoch 60 : w = 2.000, loss = 0.0000001407
epoch 70 : w = 2.000, loss = 0.0000000055
epoch 80 : w = 2.000, loss = 0.0000000002
epoch 90 : w = 2.000, loss = 0.0000000000
epoch 100 : w = 2.000, loss = 0.0000000000
pred f(5) after training: 10.000
