Gradient Descent - Autograd

## GENERAL Step by step <p>
1) Design model ( input, output size, forward pass )<p>
2) Construct loss and optimizer<p>
3) Training loop<p>

- forward pass : compute prediction
- backward pass : gradients
- update weights

# Add Autograd

In [1]:
import torch
import numpy as np

np.random.seed(42)

# f = w * x
# f = 2 * x

X = torch.tensor([1,2,3,4],dtype = torch.float32)
Y = torch.tensor([2,4,6,8],dtype = torch.float32)

#initialize the weight
w = torch.tensor(0,dtype = torch.float32, requires_grad=True) 
print(w)

# model prediction
def forward(x):
  return w * x

# loss = MSE
def loss(y, y_hat):
  return ((y_hat - y)**2).mean()

print(f'Prediction before training : f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = forward(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw
  print(w.grad)

  # update weights
  with torch.no_grad(): #ensure the gradient is not calculated
    w.sub_(learning_rate*w.grad)

  # zero the gradients to ensure it's not accumulated
  w.grad.zero_() #reset zero

  if epoch % 10==0:
    print(f'epoch: {epoch+1}, weight : {w:.3f}, loss ={l:.8f}')

print(f'Prediction after training : f(5) = {forward(5):.3f}')

tensor(0., requires_grad=True)
Prediction before training : f(5) = 0.000
tensor(-30.)
epoch: 1, weight : 0.300, loss =30.00000000
tensor(-25.5000)
tensor(-21.6750)
tensor(-18.4238)
tensor(-15.6602)
tensor(-13.3112)
tensor(-11.3145)
tensor(-9.6173)
tensor(-8.1747)
tensor(-6.9485)
tensor(-5.9062)
epoch: 11, weight : 1.665, loss =1.16278565
tensor(-5.0203)
tensor(-4.2673)
tensor(-3.6272)
tensor(-3.0831)
tensor(-2.6206)
tensor(-2.2275)
tensor(-1.8934)
tensor(-1.6094)
tensor(-1.3680)
tensor(-1.1628)
epoch: 21, weight : 1.934, loss =0.04506890
tensor(-0.9884)
tensor(-0.8401)
tensor(-0.7141)
tensor(-0.6070)
tensor(-0.5159)
tensor(-0.4385)
tensor(-0.3728)
tensor(-0.3168)
tensor(-0.2693)
tensor(-0.2289)
epoch: 31, weight : 1.987, loss =0.00174685
tensor(-0.1946)
tensor(-0.1654)
tensor(-0.1406)
tensor(-0.1195)
tensor(-0.1016)
tensor(-0.0863)
tensor(-0.0734)
tensor(-0.0624)
tensor(-0.0530)
tensor(-0.0451)
epoch: 41, weight : 1.997, loss =0.00006770
tensor(-0.0383)
tensor(-0.0326)
tensor(-0.0277)


# Add optimizer,nn.Linear

In [2]:
import torch
import torch.nn as nn 

# f = w * x
# f = 2 * x

# 0) Training samples, watch the shape!
# bias are taken care of
X = torch.tensor([[1],[2],[3],[4]],dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]],dtype = torch.float32)

# test sample
X_test = torch.tensor([5],dtype = torch.float32)

n_samples,n_features = X.shape
print(n_samples, n_features)

# 1) Design Model, the model has to implement the forward pass!
# Here we can use a built-in model from PyTorch

input_size = n_features
output_size = n_features

model = nn.Linear(input_size,output_size)

print(f'Prediction before training : f(5) = {model(X_test).item():.3f}')

# Training
learning_rate = 0.01
n_iters = 100

# Loss
loss = nn.MSELoss() 

# optimzie the weights
# bias all included
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = model(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw
  print(w.grad)

  # update weights
  optimizer.step()
  
  # zero gradients
  optimizer.zero_grad()

  # zero the gradients to ensure it's not accumulated
  w.grad.zero_() #reset zero

  if epoch % 10==0:
    [w,b] = model.parameters()
    print(f'epoch: {epoch+1}, weight : {w[0][0].item():.3f}, loss ={l:.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

4 1
Prediction before training : f(5) = 0.081
tensor(0.)
epoch: 1, weight : 0.265, loss =28.65767860
tensor([[-24.4345]])
tensor([[-20.3674]])
tensor([[-16.9795]])
tensor([[-14.1573]])
tensor([[-11.8064]])
tensor([[-9.8481]])
tensor([[-8.2168]])
tensor([[-6.8579]])
tensor([[-5.7259]])
tensor([[-4.7829]])
epoch: 11, weight : 1.497, loss =0.84330875
tensor([[-3.9973]])
tensor([[-3.3429]])
tensor([[-2.7978]])
tensor([[-2.3437]])
tensor([[-1.9653]])
tensor([[-1.6501]])
tensor([[-1.3875]])
tensor([[-1.1688]])
tensor([[-0.9865]])
tensor([[-0.8346]])
epoch: 21, weight : 1.702, loss =0.11775488
tensor([[-0.7081]])
tensor([[-0.6026]])
tensor([[-0.5147]])
tensor([[-0.4415]])
tensor([[-0.3804]])
tensor([[-0.3295]])
tensor([[-0.2871]])
tensor([[-0.2517]])
tensor([[-0.2222]])
tensor([[-0.1976]])
epoch: 31, weight : 1.741, loss =0.09339909
tensor([[-0.1771]])
tensor([[-0.1599]])
tensor([[-0.1456]])
tensor([[-0.1336]])
tensor([[-0.1236]])
tensor([[-0.1152]])
tensor([[-0.1082]])
tensor([[-0.1023]])
te

# Add nn.MSELoss()

In [3]:
#1) Design model ( input, output size, forward pass )
#2) Construct loss and optimizer
#3) Training loop
#forward pass : compute prediction
#backward pass : gradients
#update weights

import torch
import torch.nn as nn

#np.random.seed(42)

# f = w * x
# f = 2 * x

# 0) Training samples, watch the shape!
# bias are taken care of
X = torch.tensor([[1],[2],[3],[4]],dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]],dtype = torch.float32)

X_test = torch.tensor([[5]],dtype = torch.float32)
n_samples,n_features = X.shape
print(X.shape)

input_size = n_features
output_size = n_features

model = nn.Linear(input_size,output_size)

#initialize the weight
w = torch.tensor(0.0,dtype = torch.float32, requires_grad=True) 

# model prediction
def forward(x):
  return w * x

# Training
learning_rate = 0.01
n_iters = 100

# loss = MSE
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

print(f'Prediction before training : f(5) = {model(X_test).item():.3f}')

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = model(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights
  #with torch.no_grad(): #ensure the gradient is not calculated
  optimizer.step()

  # zero the gradients to ensure it's not accumulated
  optimizer.zero_grad() #reset zero

  if epoch % 10==0:
    w,b = model.parameters()
    print(f'epoch: {epoch+1}, weight : {w.item():.3f}, loss ={l.item():.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

torch.Size([4, 1])
Prediction before training : f(5) = -4.077
epoch: 1, weight : -0.570, loss =55.11408615
epoch: 11, weight : 1.140, loss =1.83126807
epoch: 21, weight : 1.428, loss =0.42912209
epoch: 31, weight : 1.487, loss =0.37062573
epoch: 41, weight : 1.509, loss =0.34818617
epoch: 51, weight : 1.525, loss =0.32789737
epoch: 61, weight : 1.539, loss =0.30881146
epoch: 71, weight : 1.553, loss =0.29083693
epoch: 81, weight : 1.566, loss =0.27390882
epoch: 91, weight : 1.579, loss =0.25796592
Prediction after training : f(5) = 9.155


In [0]:
Add Class

In [4]:
#1) Design model ( input, output size, forward pass )
#2) Construct loss and optimizer
#3) Training loop
#forward pass : compute prediction
#backward pass : gradients
#update weights

import torch
import torch.nn as nn

#np.random.seed(42)

# f = w * x
# f = 2 * x

# 0) Training samples, watch the shape!
# bias are taken care of
X = torch.tensor([[1],[2],[3],[4]],dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]],dtype = torch.float32)

X_test = torch.tensor([[5]],dtype = torch.float32)
n_samples,n_features = X.shape
print(X.shape)

input_size = n_features
output_size = n_features

#model = nn.Linear(input_size,output_size)

#initialize the weight
w = torch.tensor(0.0,dtype = torch.float32, requires_grad=True) 

# model prediction
def forward(x):
  return w * x

class LinearRegression(nn.Module):
  
  def __init__(self,input_dim,output_dim):
    super(LinearRegression,self).__init__()
    # define layers
    self.lin = nn.Linear(input_dim,output_dim)
  
  def forward(self,x):
    return self.lin(x)

model = LinearRegression(input_size,output_size)


# Training
learning_rate = 0.01
n_iters = 100

# loss = MSE
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=learning_rate)

print(f'Prediction before training : f(5) = {model(X_test).item():.3f}')

for epoch in range(n_iters):
  # prediction = forward pass
  y_hat = model(X)
  
  # loss
  l=loss(Y,y_hat)

  # gradients = backward pass
  l.backward() # dl/dw

  # update weights
  #with torch.no_grad(): #ensure the gradient is not calculated
  optimizer.step()

  # zero the gradients to ensure it's not accumulated
  optimizer.zero_grad() #reset zero

  if epoch % 10==0:
    w,b = model.parameters()
    print(f'epoch: {epoch+1}, weight : {w.item():.3f}, loss ={l.item():.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

torch.Size([4, 1])
Prediction before training : f(5) = 3.155
epoch: 1, weight : 0.668, loss =12.13250446
epoch: 11, weight : 1.470, loss =0.51648831
epoch: 21, weight : 1.608, loss =0.20416233
epoch: 31, weight : 1.639, loss =0.18497618
epoch: 41, weight : 1.653, loss =0.17402071
epoch: 51, weight : 1.664, loss =0.16388685
epoch: 61, weight : 1.674, loss =0.15434767
epoch: 71, weight : 1.684, loss =0.14536379
epoch: 81, weight : 1.693, loss =0.13690287
epoch: 91, weight : 1.702, loss =0.12893438
Prediction after training : f(5) = 9.403
