In [None]:
# Gradient descent and training demonstration by programming from scratch

import numpy as np

# Model output function
# f(x) = w * x 
# where w = 3

# inputs
X = np.array([1, 2, 3, 4, 5], dtype=np.float32)

# outputs for validations
Y = np.array([3, 6, 9, 12, 15], dtype=np.float32)

# strating weight
w = 0.0

# forward pass
def forward(x):
    return w * x

# Loss function: mean squared error - MSE
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()

# Gradient calculation
# d(loss)/dw = 1/N * 2x * (wx - y)
# Formula by chaining of derivatives.
def gradient(x, y, y_predicted):
    return np.dot(2*x, y_predicted - y).mean()

# before training
print(f'Prediction before training for f(10): {forward(10):.3f}')

# training parameters
learning_rate = 0.005
n_iters = 20

# training loop
# - Forward pass
# - backward pass
# - update weight
for epoch in range(n_iters):
    # prediction
    predicted_y = forward(X)

    # loss
    l = loss(Y, predicted_y)

    # gradient
    dw = gradient(X, Y, predicted_y)

    # update weights by gradient
    w -= dw * learning_rate

    if epoch % 2 == 0:
        print(f'epoch {epoch}: w = {w:.3f}, loss = {l:.8f}')

# after training
print(f'Prediction after training for f(10): {forward(10):.3f}')

In [None]:
# Gradient descent demonstration with Pytorch autograd

import torch

# Model output function
# f(x) = w * x 
# where w = 3

# inputs
X = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32)

# outputs for validations
Y = torch.tensor([3, 6, 9, 12, 15], dtype=torch.float32)

# strating weight
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# forward pass
def forward(x):
    return w * x

# Loss function: mean squared error - MSE
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()

# No need to define gradient function now as we would use autograd

# before training
print(f'Prediction before training for f(10): {forward(10):.3f}')

# training parameters
learning_rate = 0.005
n_iters = 100

# training loop
# - Forward pass
# - backward pass
# - update weight
for epoch in range(n_iters):
    # prediction
    predicted_y = forward(X)

    # loss
    l = loss(Y, predicted_y)

    # gradient is now by backwards pass of autograd
    l.backward() # dl/dw

    # update weights
    # should not be part of computational graph, WHY?
    with torch.no_grad():
        w -= w.grad * learning_rate

    # empty the gradient to avoid cumulation in loop
    w.grad.zero_()

    if epoch % 10 == 0:
        print(f'epoch {epoch}: w = {w:.3f}, loss = {l:.8f}')

# after training
print(f'Prediction after training for f(10): {forward(10):.3f}')

In [None]:
# Gradient descent demonstration with autograd & optimizer

import torch
import torch.nn as nn

# Model output function
# f(x) = w * x 
# where w = 3

# inputs
X = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32)

# outputs for validation
Y = torch.tensor([3, 6, 9, 12, 15], dtype=torch.float32)

# starting weight
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# forward pass
def forward(x):
    return w * x

# before training
print(f'Prediction before training for f(10): {forward(10):.3f}')

# training parameters
learning_rate = 0.005
n_iters = 100

# mean squared error - MSE
# no need to define loss function by direct programming, use the one provided by torch.nn
loss = nn.MSELoss()

# setup optimizer
optimizer = torch.optim.SGD([w], lr=learning_rate)

# training loop
# - Forward pass
# - backward pass
# - update weight
for epoch in range(n_iters):
    # prediction
    predicted_y = forward(X)

    # loss
    l = loss(Y, predicted_y)

    # gradient is now by backwards pass of autograd
    l.backward() # dl/dw

    # No need to update weights by direct programming, use optimizer instead
    optimizer.step()

    # empty the gradient in optimizer now to avoid cumulation in loop
    optimizer.zero_grad()

    if epoch % 10 == 0:
        print(f'epoch {epoch}: w = {w:.3f}, loss = {l:.8f}')

# after training
print(f'Prediction after training for f(10): {forward(10):.3f}')