# Gradient Descent using Autograd

We will see how to optimize a model using automatic gradient computation using Pytorch's autograd package. We will see how to implement linear regression from scratch, implement the equations for model prediction and loss function, numerical computation of the gradients and implement the formulae, and implement gradient descent for parameter optimization. We can then see how to replace manually calculated gradients using autograd, and replace the manual loss and optimizer steps, as well as the model with Pytorch's implementations.

In [10]:
# Imports
import numpy as np
import torch

In [15]:
# MANUAL IMPLEMENTATION
# f = w * x (a linear combination of weights and inputs as any function)
# Let's go with f = 2 * x, so weight is 2. Ignore bias.
X = np.array([1, 2, 3, 4], dtype=np.float32)  # 1, 2, 3, 4 are training samples
Y = np.array([2, 4, 6, 8], dtype=np.float32)  # 2, 4, 6, 8 are corresponding outputs

w = 0.0  # initial weight

# model prediction
def forward_pass(x):
    return w * x

# loss
def loss_function(y, y_predicted):
    return ((y_predicted - y) ** 2).mean() # mean squared error

# gradient
# mean squared error = 1/N * (w * x - y) ** 2
# dJ/dw = 1/N 2 * (w * x - y)
def gradient(x, y, y_predicted):
    return np.dot(2 * x, (y_predicted - y)).mean()

print(f'Prediction before training: f(17) = {forward_pass(17):.3f}')

# Parameters
learning_rate = 0.01
num_epochs = 20

# Training loop
for epoch in range(num_epochs):
    # prediction = forward pass
    y_pred = forward_pass(X)
    # loss
    l = loss_function(Y, y_pred)
    # gradients
    dw = gradient(X, Y, y_pred)

    # update weights
    # We go in the negative direction of training of gradient
    w -= learning_rate * dw

    if epoch % 2 == 0:
        print(f'Epoch {epoch + 1}: weight = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(17) = {forward_pass(17):.3f}')


Prediction before training: f(17) = 0.000
Epoch 1: weight = 1.200, loss = 30.00000000
Epoch 3: weight = 1.872, loss = 0.76800019
Epoch 5: weight = 1.980, loss = 0.01966083
Epoch 7: weight = 1.997, loss = 0.00050331
Epoch 9: weight = 1.999, loss = 0.00001288
Epoch 11: weight = 2.000, loss = 0.00000033
Epoch 13: weight = 2.000, loss = 0.00000001
Epoch 15: weight = 2.000, loss = 0.00000000
Epoch 17: weight = 2.000, loss = 0.00000000
Epoch 19: weight = 2.000, loss = 0.00000000
Prediction after training: f(17) = 34.000


In [17]:
# TORCH IMPLEMENTATION
# We still do loss manually without optimization
# f = w * x (a linear combination of weights and inputs as any function)
# Let's go with f = 2 * x, so weight is 2. Ignore bias.
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)  # 1, 2, 3, 4 are training samples
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)  # 2, 4, 6, 8 are corresponding outputs

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)  # initial weight

# model prediction
def forward_pass(x):
    return w * x

# loss
def loss_function(y, y_predicted):
    return ((y_predicted - y) ** 2).mean() # mean squared error

print(f'Prediction before training: f(17) = {forward_pass(17):.3f}')

# Parameters
learning_rate = 0.01
num_epochs = 100  # Numerical gradient computation is more accurate
# Backpropagation less so, but more suited for complex tasks
# We'll just take more epochs for this

# Training loop
for epoch in range(num_epochs):
    # prediction = forward pass
    y_pred = forward_pass(X)
    # loss
    l = loss_function(Y, y_pred)
    # gradients
    l.backward()  # Pytorch's backward pass dl/dw

    # update weights
    # We go in the negative direction of training of gradient
    # In this case it should not be part of the gradient computation graph
    with torch.no_grad():
        w -= learning_rate * w.grad

    # Additionally reset gradients to zero
    w.grad.zero_()

    if epoch % 10 == 0:
        print(f'Epoch {epoch + 1}: weight = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(17) = {forward_pass(17):.3f}')


Prediction before training: f(17) = 0.000
Epoch 1: weight = 0.300, loss = 30.00000000
Epoch 11: weight = 1.665, loss = 1.16278565
Epoch 21: weight = 1.934, loss = 0.04506890
Epoch 31: weight = 1.987, loss = 0.00174685
Epoch 41: weight = 1.997, loss = 0.00006770
Epoch 51: weight = 1.999, loss = 0.00000262
Epoch 61: weight = 2.000, loss = 0.00000010
Epoch 71: weight = 2.000, loss = 0.00000000
Epoch 81: weight = 2.000, loss = 0.00000000
Epoch 91: weight = 2.000, loss = 0.00000000
Prediction after training: f(17) = 34.000
