Gradient Descent - Autograd

## GENERAL Step by step <p>
1) Design model ( input, output size, forward pass )<p>
2) Construct loss and optimizer<p>
3) Training loop<p>

- forward pass : compute prediction
- backward pass : gradients
- update weights

# Add Autograd

In [1]:
import torch
import numpy as np

np.random.seed(42)

# f = w * x
# f = 2 * x

X = torch.tensor([1,2,3,4],dtype = torch.float32)
Y = torch.tensor([2,4,6,8],dtype = torch.float32)

#initialize the weight
w = torch.tensor(0,dtype = torch.float32, requires_grad=True) 
print(w)

# model prediction
def forward(x):
  return w * x

# loss = MSE
def loss(y, y_hat):
  return ((y_hat - y)**2).mean()

print(f'Prediction before training : f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = forward(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw
  print(w.grad)

  # update weights
  with torch.no_grad(): #ensure the gradient is not calculated
    w.sub_(learning_rate*w.grad)

  # zero the gradients to ensure it's not accumulated
  w.grad.zero_() #reset zero

  if epoch % 10==0:
    print(f'epoch: {epoch+1}, weight : {w:.3f}, loss ={l:.8f}')

print(f'Prediction after training : f(5) = {forward(5):.3f}')

tensor(0., requires_grad=True)
Prediction before training : f(5) = 0.000
tensor(-30.)
epoch: 1, weight : 0.300, loss =30.00000000
tensor(-25.5000)
tensor(-21.6750)
tensor(-18.4238)
tensor(-15.6602)
tensor(-13.3112)
tensor(-11.3145)
tensor(-9.6173)
tensor(-8.1747)
tensor(-6.9485)
tensor(-5.9062)
epoch: 11, weight : 1.665, loss =1.16278565
tensor(-5.0203)
tensor(-4.2673)
tensor(-3.6272)
tensor(-3.0831)
tensor(-2.6206)
tensor(-2.2275)
tensor(-1.8934)
tensor(-1.6094)
tensor(-1.3680)
tensor(-1.1628)
epoch: 21, weight : 1.934, loss =0.04506890
tensor(-0.9884)
tensor(-0.8401)
tensor(-0.7141)
tensor(-0.6070)
tensor(-0.5159)
tensor(-0.4385)
tensor(-0.3728)
tensor(-0.3168)
tensor(-0.2693)
tensor(-0.2289)
epoch: 31, weight : 1.987, loss =0.00174685
tensor(-0.1946)
tensor(-0.1654)
tensor(-0.1406)
tensor(-0.1195)
tensor(-0.1016)
tensor(-0.0863)
tensor(-0.0734)
tensor(-0.0624)
tensor(-0.0530)
tensor(-0.0451)
epoch: 41, weight : 1.997, loss =0.00006770
tensor(-0.0383)
tensor(-0.0326)
tensor(-0.0277)


# Add optimizer,nn.Linear

In [2]:
import torch
import torch.nn as nn 

# f = w * x
# f = 2 * x

# 0) Training samples, watch the shape!
# bias are taken care of
X = torch.tensor([[1],[2],[3],[4]],dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]],dtype = torch.float32)

# test sample
X_test = torch.tensor([5],dtype = torch.float32)

n_samples,n_features = X.shape
print(n_samples, n_features)

# 1) Design Model, the model has to implement the forward pass!
# Here we can use a built-in model from PyTorch

input_size = n_features
output_size = n_features

model = nn.Linear(input_size,output_size)

print(f'Prediction before training : f(5) = {model(X_test).item():.3f}')

# Training
learning_rate = 0.01
n_iters = 100

# Loss
loss = nn.MSELoss() 

# optimzie the weights
# bias all included
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = model(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw
  print(w.grad)

  # update weights
  optimizer.step()
  
  # zero gradients
  optimizer.zero_grad()

  # zero the gradients to ensure it's not accumulated
  w.grad.zero_() #reset zero

  if epoch % 10==0:
    [w,b] = model.parameters()
    print(f'epoch: {epoch+1}, weight : {w[0][0].item():.3f}, loss ={l:.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

4 1
Prediction before training : f(5) = -5.005
tensor(0.)
epoch: 1, weight : -0.407, loss =71.58859253
tensor([[-38.5890]])
tensor([[-32.1492]])
tensor([[-26.7849]])
tensor([[-22.3165]])
tensor([[-18.5943]])
tensor([[-15.4937]])
tensor([[-12.9109]])
tensor([[-10.7594]])
tensor([[-8.9673]])
tensor([[-7.4744]])
epoch: 11, weight : 1.533, loss =1.86503279
tensor([[-6.2308]])
tensor([[-5.1949]])
tensor([[-4.3320]])
tensor([[-3.6132]])
tensor([[-3.0144]])
tensor([[-2.5156]])
tensor([[-2.1001]])
tensor([[-1.7540]])
tensor([[-1.4656]])
tensor([[-1.2255]])
epoch: 21, weight : 1.848, loss =0.06036882
tensor([[-1.0254]])
tensor([[-0.8587]])
tensor([[-0.7198]])
tensor([[-0.6041]])
tensor([[-0.5077]])
tensor([[-0.4274]])
tensor([[-0.3605]])
tensor([[-0.3048]])
tensor([[-0.2583]])
tensor([[-0.2196]])
epoch: 31, weight : 1.901, loss =0.01297265
tensor([[-0.1874]])
tensor([[-0.1605]])
tensor([[-0.1381]])
tensor([[-0.1194]])
tensor([[-0.1038]])
tensor([[-0.0909]])
tensor([[-0.0800]])
tensor([[-0.0710]

# Add nn.MSELoss()

In [3]:
#1) Design model ( input, output size, forward pass )
#2) Construct loss and optimizer
#3) Training loop
#forward pass : compute prediction
#backward pass : gradients
#update weights

import torch
import torch.nn as nn

#np.random.seed(42)

# f = w * x
# f = 2 * x

# 0) Training samples, watch the shape!
# bias are taken care of
X = torch.tensor([[1],[2],[3],[4]],dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]],dtype = torch.float32)

X_test = torch.tensor([[5]],dtype = torch.float32)
n_samples,n_features = X.shape
print(X.shape)

input_size = n_features
output_size = n_features

model = nn.Linear(input_size,output_size)

#initialize the weight
w = torch.tensor(0.0,dtype = torch.float32, requires_grad=True) 

# model prediction
def forward(x):
  return w * x

# Training
learning_rate = 0.01
n_iters = 100

# loss = MSE
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

print(f'Prediction before training : f(5) = {model(X_test).item():.3f}')

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = model(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights
  #with torch.no_grad(): #ensure the gradient is not calculated
  optimizer.step()

  # zero the gradients to ensure it's not accumulated
  optimizer.zero_grad() #reset zero

  if epoch % 10==0:
    w,b = model.parameters()
    print(f'epoch: {epoch+1}, weight : {w.item():.3f}, loss ={l.item():.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

torch.Size([4, 1])
Prediction before training : f(5) = -1.751
epoch: 1, weight : 0.151, loss =44.74452209
epoch: 11, weight : 1.684, loss =1.15837872
epoch: 21, weight : 1.931, loss =0.03065797
epoch: 31, weight : 1.971, loss =0.00144119
epoch: 41, weight : 1.978, loss =0.00064757
epoch: 51, weight : 1.980, loss =0.00059152
epoch: 61, weight : 1.980, loss =0.00055661
epoch: 71, weight : 1.981, loss =0.00052421
epoch: 81, weight : 1.982, loss =0.00049369
epoch: 91, weight : 1.982, loss =0.00046496
Prediction after training : f(5) = 9.964


# Add Class

In [4]:
#1) Design model ( input, output size, forward pass )
#2) Construct loss and optimizer
#3) Training loop
#forward pass : compute prediction
#backward pass : gradients
#update weights

import torch
import torch.nn as nn

#np.random.seed(42)

# f = w * x
# f = 2 * x

# 0) Training samples, watch the shape!
# bias are taken care of
X = torch.tensor([[1],[2],[3],[4]],dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]],dtype = torch.float32)

X_test = torch.tensor([[5]],dtype = torch.float32)
n_samples,n_features = X.shape
print(X.shape)

input_size = n_features
output_size = n_features

#model = nn.Linear(input_size,output_size)

#initialize the weight
w = torch.tensor(0.0,dtype = torch.float32, requires_grad=True) 

# model prediction
def forward(x):
  return w * x

class LinearRegression(nn.Module):
  
  def __init__(self,input_dim,output_dim):
    super(LinearRegression,self).__init__()
    # define layers
    self.lin = nn.Linear(input_dim,output_dim)
  
  def forward(self,x):
    return self.lin(x)

model = LinearRegression(input_size,output_size)


# Training
learning_rate = 0.01
n_iters = 100

# loss = MSE
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

print(f'Prediction before training : f(5) = {model(X_test).item():.3f}')

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = model(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights
  #with torch.no_grad(): #ensure the gradient is not calculated
  optimizer.step()

  # zero the gradients to ensure it's not accumulated
  optimizer.zero_grad() #reset zero

  if epoch % 10==0:
    w,b = model.parameters()
    print(f'epoch: {epoch+1}, weight : {w.item():.3f}, loss ={l.item():.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

torch.Size([4, 1])
Prediction before training : f(5) = -0.745
epoch: 1, weight : 0.229, loss =35.72889709
epoch: 11, weight : 1.601, loss =0.95077527
epoch: 21, weight : 1.825, loss =0.04944678
epoch: 31, weight : 1.864, loss =0.02468096
epoch: 41, weight : 1.874, loss =0.02267811
epoch: 51, weight : 1.879, loss =0.02134349
epoch: 61, weight : 1.882, loss =0.02010081
epoch: 71, weight : 1.886, loss =0.01893084
epoch: 81, weight : 1.889, loss =0.01782896
epoch: 91, weight : 1.892, loss =0.01679120
Prediction after training : f(5) = 9.784
