### PyTorch Tutorial 05 - Gradient Descent with Autograd and Backpropagation

https://www.youtube.com/watch?v=E-I2DNVzQLg

In [24]:
import numpy as np

# f = w * x
# f = 2 * x

X = np.array([1,2,3,4], np.float32)
Y = np.array([2,4,6,8], np.float32)

w = 0.0

# model prediction
def forward(x):
    return w * x

# loss = mse
def loss(y, y_predicted):
    return np.average((y - y_predicted)**2)


# gradient
# MSE = (1.0/N) (w*x - y)**2
# dJ/dw = (1.0/N) * 2 * (w*x - y) * x
#       = (1.0/N) * 2x * (w*x - y)

def gradient(x,y, y_predicted):
    return (np.dot(2 * x, y_predicted - y)).mean()

    

In [25]:
## Prediction before training f(5) 
print(f'Prediction before training : f(5) {forward(5):.3f}')

Prediction before training : f(5) 0.000


In [26]:
# Training
learning_rate = 0.01
n_iters = 10


for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss( Y, y_pred)
    
    #gradients
    dw = gradient(X,Y, y_pred)
    
    # update weights
    w = w - learning_rate * dw
    
    # print info
    if epoch %1 == 0: 
        print(f'epoch {epoch + 1}: w = {w:.3}f, loss = {l:.8}')

epoch 1: w = 1.2f, loss = 30.0
epoch 2: w = 1.68f, loss = 4.7999992
epoch 3: w = 1.87f, loss = 0.76800019
epoch 4: w = 1.95f, loss = 0.12288
epoch 5: w = 1.98f, loss = 0.019660834
epoch 6: w = 1.99f, loss = 0.0031457357
epoch 7: w = 2.0f, loss = 0.00050330802
epoch 8: w = 2.0f, loss = 8.0531863e-05
epoch 9: w = 2.0f, loss = 1.2884395e-05
epoch 10: w = 2.0f, loss = 2.0613531e-06


In [27]:
print(f'Prediction before training : f(5) {forward(5):.3f}')

Prediction before training : f(5) 9.999


In [29]:
## Lets try doing it using pytoch - we replace gradient calculations
import torch

X = torch.tensor([1,2,3,4], dtype = torch.float32)
Y = torch.tensor([2,4,6,8], dtype = torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad = True)


# model prediction
def forward(x):
    return w * x

# loss = mse
def loss(y, y_predicted):
    return ((y - y_predicted)**2).mean()

learning_rate = 0.01
n_iters = 50

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    #gradients = backward pass
    #dw = gradient(X,Y, y_pred)
    l.backward()  # dl/dw
    
    # update weights
    #w = w - learning_rate * dw
    with torch.no_grad():  
        # torch.no_grad(): We use this as we want this operation not to be part of 
        # gradient tracking step
        # dw = w.grad
        # Note that w =  w - learning_rate * w.grad doesnt work
        w -=  learning_rate * w.grad
        
    # zero gradients : to avoid accumulation of gradient to next pass
    w.grad.zero_()
    
    # print info
    if epoch %2 == 0: 
        print(f'epoch {epoch + 1}: w = {w:.3}f, loss = {l:.8}')
        

epoch 1: w = 0.3f, loss = 30.0
epoch 3: w = 0.772f, loss = 15.660188
epoch 5: w = 1.11f, loss = 8.1747169
epoch 7: w = 1.36f, loss = 4.2672529
epoch 9: w = 1.54f, loss = 2.2275321
epoch 11: w = 1.67f, loss = 1.1627856
epoch 13: w = 1.76f, loss = 0.60698116
epoch 15: w = 1.83f, loss = 0.3168478
epoch 17: w = 1.87f, loss = 0.16539653
epoch 19: w = 1.91f, loss = 0.086338058
epoch 21: w = 1.93f, loss = 0.045068897
epoch 23: w = 1.95f, loss = 0.023526315
epoch 25: w = 1.97f, loss = 0.012280837
epoch 27: w = 1.98f, loss = 0.0064106593
epoch 29: w = 1.98f, loss = 0.0033464201
epoch 31: w = 1.99f, loss = 0.0017468547
epoch 33: w = 1.99f, loss = 0.00091188005
epoch 35: w = 1.99f, loss = 0.00047600627
epoch 37: w = 2.0f, loss = 0.000248478
epoch 39: w = 2.0f, loss = 0.0001297064
epoch 41: w = 2.0f, loss = 6.7704947e-05
epoch 43: w = 2.0f, loss = 3.5343608e-05
epoch 45: w = 2.0f, loss = 1.8447146e-05
epoch 47: w = 2.0f, loss = 9.6315316e-06
epoch 49: w = 2.0f, loss = 5.0273838e-06
