Learning  = Data + Model + Loss + Optimizer

# Manual Fitting

In [1]:
import numpy as np

from micrograd.engine import Value

In [2]:
x1 = Value(0); y1 = Value(2)
x2 = Value(1); y2 = Value(5)

In [3]:
w = Value(1)
b = Value (-2)

lr = 0.1

for i in range(10):
    print("##### iteration " + str(i) + "#####")
    print("==forward pass")
    L = ((x1*w+b - y1)**2 + (x2*w+b - y2)**2)/2
    print("L=" + str(L))

    print("==backward pass")
    w.grad = 0;b.grad = 0
    x1.grad = 0; y1.grad = 0; x2.grad = 0; y2.grad = 0
    L.backward()
    print(w)
    print(b)

    print("==gradient descent")
    w.data += -lr * w.grad
    b.data += -lr * b.grad
    print(w)
    print(b)

##### iteration 0#####
==forward pass
L=Value(data=26.0, grad=0)
==backward pass
Value(data=1, grad=-6.0)
Value(data=-2, grad=-10.0)
==gradient descent
Value(data=1.6, grad=-6.0)
Value(data=-1.0, grad=-10.0)
##### iteration 1#####
==forward pass
L=Value(data=14.180000000000001, grad=0)
==backward pass
Value(data=1.6, grad=-4.4)
Value(data=-1.0, grad=-7.4)
==gradient descent
Value(data=2.04, grad=-4.4)
Value(data=-0.2599999999999999, grad=-7.4)
##### iteration 2#####
==forward pass
L=Value(data=7.737999999999998, grad=0)
==backward pass
Value(data=2.04, grad=-3.2199999999999998)
Value(data=-0.2599999999999999, grad=-5.4799999999999995)
==gradient descent
Value(data=2.362, grad=-3.2199999999999998)
Value(data=0.28800000000000003, grad=-5.4799999999999995)
##### iteration 3#####
==forward pass
L=Value(data=4.226721999999999, grad=0)
==backward pass
Value(data=2.362, grad=-2.3499999999999996)
Value(data=0.28800000000000003, grad=-4.061999999999999)
==gradient descent
Value(data=2.597, grad

# Replicate in PyTorch

In [4]:
import torch

In [5]:
x = torch.tensor([[0.0], [1.0]])
y = torch.tensor([[2.0], [5.0]])

In [6]:
model = torch.nn.Linear(1, 1)
with torch.no_grad():
    model.weight.copy_(torch.tensor([[1.0]]))
    model.bias.copy_(torch.tensor([-2.0]))
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
loss = torch.nn.MSELoss()

for i in range(10):
    print("##### iteration " + str(i) + "#####")
    print("==forward pass")
    L = loss(model(x), y)
    print("L=" + str(L))

    print("==backward pass")
    model.zero_grad()
    L.backward()
    print(model.weight)
    print(model.bias)

    print("==gradient descent")
    optimizer.step()
    print(model.weight)
    print(model.bias)
    

##### iteration 0#####
==forward pass
L=tensor(26., grad_fn=<MseLossBackward0>)
==backward pass
Parameter containing:
tensor([[1.]], requires_grad=True)
Parameter containing:
tensor([-2.], requires_grad=True)
==gradient descent
Parameter containing:
tensor([[1.6000]], requires_grad=True)
Parameter containing:
tensor([-1.], requires_grad=True)
##### iteration 1#####
==forward pass
L=tensor(14.1800, grad_fn=<MseLossBackward0>)
==backward pass
Parameter containing:
tensor([[1.6000]], requires_grad=True)
Parameter containing:
tensor([-1.], requires_grad=True)
==gradient descent
Parameter containing:
tensor([[2.0400]], requires_grad=True)
Parameter containing:
tensor([-0.2600], requires_grad=True)
##### iteration 2#####
==forward pass
L=tensor(7.7380, grad_fn=<MseLossBackward0>)
==backward pass
Parameter containing:
tensor([[2.0400]], requires_grad=True)
Parameter containing:
tensor([-0.2600], requires_grad=True)
==gradient descent
Parameter containing:
tensor([[2.3620]], requires_grad=True