In [30]:
import numpy as np

# f = w * x

# f = 2 * x
X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0

In [31]:
# model prediction
def forward(x):
    return w * x

# Loss = MSE = 1/N * (w*x - y)**2
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

# Gradient
def gradient(x, y, y_pred):
    return np.dot(2*x, y_pred-y).mean() # dJ/dw = 1/N * 2x (w*x - y) [Derivative]

In [32]:
print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iterations = 12

for epoch in range(n_iterations):
    # prediction = forward pass
    y_pred = forward(X)

    # Loss
    l = loss(Y, y_pred)

    # Gradients
    dw = gradient(X, Y, y_pred)

    # Update weights (Go in the negative direction)
    w -= learning_rate * dw

    if epoch % 1 == 0: # PS: change the value to '2' to print only every other epoch
        print(f'Epoch: {epoch+1}: w = {w:.3f}, loss = {l:.8f}')


print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000
Epoch: 1: w = 1.200, loss = 30.00000000
Epoch: 2: w = 1.680, loss = 4.79999924
Epoch: 3: w = 1.872, loss = 0.76800019
Epoch: 4: w = 1.949, loss = 0.12288000
Epoch: 5: w = 1.980, loss = 0.01966083
Epoch: 6: w = 1.992, loss = 0.00314570
Epoch: 7: w = 1.997, loss = 0.00050332
Epoch: 8: w = 1.999, loss = 0.00008053
Epoch: 9: w = 1.999, loss = 0.00001288
Epoch: 10: w = 2.000, loss = 0.00000206
Epoch: 11: w = 2.000, loss = 0.00000033
Epoch: 12: w = 2.000, loss = 0.00000005
Prediction after training: f(5) = 10.000


In [33]:
# f = w * x (So 'w' should be 2 for us to get the right formula, which is 'f = 2 * x')

In [34]:
# NOW LET'S START USING PYTORCH AND ELIMINATE THE MANUAL GRADIENT FUNCTION
# Prediction: Manual
# Gradients Computation: Autograd
# Loss Computation: Manual
# Parameter Updates: Manual 

In [35]:
import torch

# f = w * x

# f = 2 * x
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [36]:
# Model and Loss function are still the same (no need for the 'gradient' function anymore though)
# model prediction
def forward(x):
    return w * x

# Loss = MSE = 1/N * (w*x - y)**2
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

In [37]:
print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iterations = 40

for epoch in range(n_iterations):
    # prediction = forward pass
    y_pred = forward(X)

    # Loss
    l = loss(Y, y_pred)

    # Gradients = backward pass
    l.backward() # dl/dw

    # Update weights
    with torch.no_grad(): # Use this as the weights should not be part of the computation graph
        w -= learning_rate * w.grad

    # We have to empty the gradients
    w.grad.zero_()

    if epoch % 3 == 0: # Print every 3 steps
        print(f'Epoch: {epoch+1}: w = {w:.3f}, loss = {l:.8f}')


print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training: f(5) = 0.000
Epoch: 1: w = 0.300, loss = 30.00000000
Epoch: 4: w = 0.956, loss = 11.31448650
Epoch: 7: w = 1.359, loss = 4.26725292
Epoch: 10: w = 1.606, loss = 1.60939169
Epoch: 13: w = 1.758, loss = 0.60698116
Epoch: 16: w = 1.851, loss = 0.22892261
Epoch: 19: w = 1.909, loss = 0.08633806
Epoch: 22: w = 1.944, loss = 0.03256231
Epoch: 25: w = 1.966, loss = 0.01228084
Epoch: 28: w = 1.979, loss = 0.00463169
Epoch: 31: w = 1.987, loss = 0.00174685
Epoch: 34: w = 1.992, loss = 0.00065882
Epoch: 37: w = 1.995, loss = 0.00024848
Epoch: 40: w = 1.997, loss = 0.00009371
Prediction after training: f(5) = 9.985


In [38]:
# PS: It uses a different backward propagation.
# That's why, in this case, it took longer to get closer to the right answer.