In [None]:
import torch

w = torch.tensor(10.0, requires_grad=True)

learning_rate = 0.1

for epoch in range(15):
    loss = w ** 2
    loss.backward()

    with torch.no_grad():
        w -= learning_rate * w.grad
        
    
    if epoch % 1 == 0:
        print(f'Epoch {epoch+1}: w={w.item():.7f}, loss={loss.item():.7f}, grad={w.grad.item():.7f}')
    w.grad.zero_()


    # In this code, we simply set the weight ** 2 as loss
    # So the backpropagation will try to reduce the weight to 0
    # The learning rate controls how big a step we take on each iteration
    # The w.grad.zero_() is important because PyTorch accumulates gradients by default
    # So we need to zero it out before the next iteration

Epoch 1: w=8.0000000, loss=100.0000000, grad=20.0000000
Epoch 2: w=6.4000001, loss=64.0000000, grad=16.0000000
Epoch 3: w=5.1199999, loss=40.9600029, grad=12.8000002
Epoch 4: w=4.0959997, loss=26.2143993, grad=10.2399998
Epoch 5: w=3.2767997, loss=16.7772141, grad=8.1919994
Epoch 6: w=2.6214397, loss=10.7374163, grad=6.5535994
Epoch 7: w=2.0971518, loss=6.8719459, grad=5.2428794
Epoch 8: w=1.6777214, loss=4.3980455, grad=4.1943035
Epoch 9: w=1.3421772, loss=2.8147490, grad=3.3554428
Epoch 10: w=1.0737417, loss=1.8014395, grad=2.6843543
Epoch 11: w=0.8589934, loss=1.1529212, grad=2.1474833
Epoch 12: w=0.6871947, loss=0.7378696, grad=1.7179867
Epoch 13: w=0.5497558, loss=0.4722366, grad=1.3743894
Epoch 14: w=0.4398046, loss=0.3022314, grad=1.0995115
Epoch 15: w=0.3518437, loss=0.1934281, grad=0.8796092


***Gradient Descent***
It is an Optimization algorithm that uses the gradients to update the models parameters and tries to minimize the loss
- new_weight = old_weight - learning_rate * gradient
>
***Backpropagation***
It is an algorithm that calculates the gradient of the loss function with respect to each weight and bias in the network
- It uses chain rule from calculus, working backwardfrom the final loss through each layer of the network.It OUTPUS is a gradient value for every parameter in the model.

### Linear Regression without Libraries

In [None]:
import numpy as np

# f = w * x

# f = 2 * x
X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0
 
# Model prediction
def forward(x):
    return w * x

# loss = Mean Squared Error (MSE)
def loss(y, y_pred):
    return ((y_pred - y)**2).mean() 

# gradient

# We need to take differentiation for the loss function which is MSE
# MSE = (1/N) * (w*x - y)^2
# After Differentiation - we take w*x-y as inner function and 1/N * u^2 as outer function
# Its the chain rule take differentiation of both and multiply those results

# differentiation of outer function = 2/N * u
# differentiation of inner function = x 

# gradient = 2/N * (w*x - y) * x  or 1/N * 2x * (wx - y)

def gradient(X, Y, Y_pred):
    return np.dot(2*X, Y_pred-Y).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # Prediction = Forward pass
    Y_pred = forward(X)

    # loss
    l = loss(Y, Y_pred)

    # gradient
    dw = gradient(X, Y, Y_pred)

    # update weights
    w -= (learning_rate * dw)

    if epoch % 1 == 0:
        print(f'Epoch {epoch+1}: w={w:.3f}, loss={l:.8f}, grad={dw:.3f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')
    

# New Prediction 
n = input("Enter a number to make a prediction: ")
n = float(n)
print(w*n)

Prediction before training: f(5) = 0.000
Epoch 1: w=1.200, loss=30.00000000, grad=-120.000
Epoch 2: w=1.680, loss=4.80000067, grad=-48.000
Epoch 3: w=1.872, loss=0.76800019, grad=-19.200
Epoch 4: w=1.949, loss=0.12288000, grad=-7.680
Epoch 5: w=1.980, loss=0.01966083, grad=-3.072
Epoch 6: w=1.992, loss=0.00314574, grad=-1.229
Epoch 7: w=1.997, loss=0.00050332, grad=-0.492
Epoch 8: w=1.999, loss=0.00008053, grad=-0.197
Epoch 9: w=1.999, loss=0.00001288, grad=-0.079
Epoch 10: w=2.000, loss=0.00000206, grad=-0.031
Epoch 11: w=2.000, loss=0.00000033, grad=-0.013
Epoch 12: w=2.000, loss=0.00000005, grad=-0.005
Epoch 13: w=2.000, loss=0.00000001, grad=-0.002
Epoch 14: w=2.000, loss=0.00000000, grad=-0.001
Epoch 15: w=2.000, loss=0.00000000, grad=-0.000
Epoch 16: w=2.000, loss=0.00000000, grad=-0.000
Epoch 17: w=2.000, loss=0.00000000, grad=-0.000
Epoch 18: w=2.000, loss=0.00000000, grad=-0.000
Epoch 19: w=2.000, loss=0.00000000, grad=-0.000
Epoch 20: w=2.000, loss=0.00000000, grad=0.000
Pred

### Linear Regression (Gradients using Pytorch)

In [20]:
import torch

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# Model prediction
def forward(x):
    return w*x

# loss = Mean Squared Error (MSE)
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # Prediction = Forward pass
    Y_pred = forward(X)

    # loss
    l = loss(Y, Y_pred)

    # gradient = backward pass
    l.backward() # dl/dw

    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}: w={w:.3f}, loss={l:.8f}, grad={w.grad:.3f}')
    w.grad.zero_()

print(f'Prediction after training: f(5) = {forward(5):.3f}')


Prediction before training: f(5) = 0.000
Epoch 1: w=0.300, loss=30.00000000, grad=-30.000
Epoch 11: w=1.665, loss=1.16278565, grad=-5.906
Epoch 21: w=1.934, loss=0.04506890, grad=-1.163
Epoch 31: w=1.987, loss=0.00174685, grad=-0.229
Epoch 41: w=1.997, loss=0.00006770, grad=-0.045
Epoch 51: w=1.999, loss=0.00000262, grad=-0.009
Epoch 61: w=2.000, loss=0.00000010, grad=-0.002
Epoch 71: w=2.000, loss=0.00000000, grad=-0.000
Epoch 81: w=2.000, loss=0.00000000, grad=-0.000
Epoch 91: w=2.000, loss=0.00000000, grad=-0.000
Prediction after training: f(5) = 10.000


### Linear Regression Fully on Pytorch

In [27]:
# General Pipeline
# 1. Design Model(input and output size, forward pass)
# 2. Construct Loss and Optimizer
# 3. Training Loop
#    - Forward Pass: Compute Prediction and Loss
#    - Backward Pass: Compute Gradients
#    - Update Weights


import torch
import torch.nn as nn

X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([5,  10, 15, 20], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# Model prediction
def forward(x):
    return w*x 

# loss = Mean Squared Error (MSE)
def loss(y, y_pred):
    return ((y - y_pred)**2).mean()

# Gradient 

# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # Prediction 
    Y_pred = forward(X)
     
    l = loss(Y, Y_pred)

    l.backward()

    with torch.no_grad():
        w -= learning_rate * w.grad
        
    if epoch % 1 == 0:
        print(f'Epoch {epoch+1}: w={w:.3f}, loss={l:.8f}, grad={w.grad:.3f}')
    
    w.grad.zero_()

print(f'Prediction after training: f(5) = {forward(5):.3f}')


Epoch 1: w=0.750, loss=187.50000000, grad=-75.000
Epoch 2: w=1.388, loss=135.46875000, grad=-63.750
Epoch 3: w=1.929, loss=97.87616730, grad=-54.188
Epoch 4: w=2.390, loss=70.71553040, grad=-46.059
Epoch 5: w=2.781, loss=51.09196854, grad=-39.150
Epoch 6: w=3.114, loss=36.91394043, grad=-33.278
Epoch 7: w=3.397, loss=26.67032433, grad=-28.286
Epoch 8: w=3.638, loss=19.26930809, grad=-24.043
Epoch 9: w=3.842, loss=13.92207718, grad=-20.437
Epoch 10: w=4.016, loss=10.05869961, grad=-17.371
Epoch 11: w=4.163, loss=7.26741409, grad=-14.766
Epoch 12: w=4.289, loss=5.25070572, grad=-12.551
Epoch 13: w=4.395, loss=3.79363537, grad=-10.668
Epoch 14: w=4.486, loss=2.74090099, grad=-9.068
Epoch 15: w=4.563, loss=1.98030066, grad=-7.708
Epoch 16: w=4.629, loss=1.43076599, grad=-6.552
Epoch 17: w=4.684, loss=1.03372908, grad=-5.569
Epoch 18: w=4.732, loss=0.74686909, grad=-4.734
Epoch 19: w=4.772, loss=0.53961229, grad=-4.023
Epoch 20: w=4.806, loss=0.38987094, grad=-3.420
Epoch 21: w=4.835, loss=