### PyTorch Tutorial 05 - Gradient Descent with Autograd and Backpropagation

https://www.youtube.com/watch?v=E-I2DNVzQLg

In [None]:
import numpy as np

# f = w * x
# f = 2 * x

X = np.array([1,2,3,4], np.float32)
Y = np.array([2,4,6,8], np.float32)

w = 0.0

# model prediction
def forward(x):
    return w * x

# loss = mse
def loss(y, y_predicted):
    return np.average((y - y_predicted)**2)


# gradient
# MSE = (1.0/N) (w*x - y)**2
# dJ/dw = (1.0/N) * 2 * (w*x - y) * x
#       = (1.0/N) * 2x * (w*x - y)

def gradient(x,y, y_predicted):
    return (np.dot(2 * x, y_predicted - y)).mean()

    

In [None]:
## Prediction before training f(5) 
print(f'Prediction before training : f(5) {forward(5):.3f}')

In [None]:
# Training
learning_rate = 0.01
n_iters = 10


for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss( Y, y_pred)
    
    #gradients
    dw = gradient(X,Y, y_pred)
    
    # update weights
    w = w - learning_rate * dw
    
    # print info
    if epoch %1 == 0: 
        print(f'epoch {epoch + 1}: w = {w:.3}f, loss = {l:.8}')

In [None]:
print(f'Prediction before training : f(5) {forward(5):.3f}')

In [17]:
## Lets try doing it using pytoch - we replace gradient calculations
import torch

X = torch.tensor([1,2,3,4], dtype = torch.float32)
Y = torch.tensor([2,4,6,8], dtype = torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad = True)


# model prediction
def forward(x):
    return w * x

# loss = mse
def loss(y, y_predicted):
    return ((y - y_predicted)**2).mean()

learning_rate = 0.01
n_iters = 10

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    #gradients = backward pass
    #dw = gradient(X,Y, y_pred)
    l.backward()  # dl/dw
    
    # update weights
    #w = w - learning_rate * dw
    with torch.no_grad():
        #dw = w.grad
        # Note that w =  w - learning_rate * w.grad doesnt work
        w -=  learning_rate * w.grad
        
    # zero gradients : to avoid accumulation of gradient to next pass
    w.grad.zero_()
    
    # print info
    if epoch %1 == 0: 
        print(f'epoch {epoch + 1}: w = {w:.3}f, loss = {l:.8}')
        

epoch 1: w = 0.3f, loss = 30.0
epoch 2: w = 0.555f, loss = 21.674999
epoch 3: w = 0.772f, loss = 15.660188
epoch 4: w = 0.956f, loss = 11.314487
epoch 5: w = 1.11f, loss = 8.1747169
epoch 6: w = 1.25f, loss = 5.9062324
epoch 7: w = 1.36f, loss = 4.2672529
epoch 8: w = 1.46f, loss = 3.0830898
epoch 9: w = 1.54f, loss = 2.2275321
epoch 10: w = 1.61f, loss = 1.6093917
