<a href="https://colab.research.google.com/github/dongminkim0220/pytorch_tutorial/blob/master/Backprop_Loss_Optimizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Backprop-Loss-Optimizer

PyTorch Tutorial 04 - Backpropagation - Theory With Example

https://www.youtube.com/watch?v=3Kb0QS6z7WA&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=4

PyTorch Tutorial 05 - Gradient Descent with Autograd and Backpropagation

https://www.youtube.com/watch?v=E-I2DNVzQLg&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=5

PyTorch Tutorial 06 - Training Pipeline: Model, Loss, and Optimizer

https://www.youtube.com/watch?v=VVDHU_TWwUg&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=6

In [44]:
import torch

In [45]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0, requires_grad = True)

forward pass and compute the loss

In [46]:
y_hat = w * x

In [47]:
loss = (y_hat - y)**2

In [48]:
loss

tensor(1., grad_fn=<PowBackward0>)

backward pass

In [49]:
loss.backward()

In [50]:
w.grad

tensor(-2.)

## gradient with numpy

In [51]:
import numpy as np

In [52]:
X = np.array([1, 2, 3, 4], dtype = np.float32)

Y = np.array([2, 4, 6, 8], dtype = np.float32)

w = 0.0

model prediction

In [53]:
def forward(x):
  return w * x

loss

In [54]:
def loss(y, y_predicted):
  return ((y_predicted-y)**2).mean()

gradient
MSE = 1/N * (w *x - y) ** 2
dJ / dw = 1 / N * 2(w*x-y)

In [55]:
def gradient(x, y, y_predicted):
  return np.dot(2 * x, y_predicted - y).mean()

before training

In [56]:
forward(5)

0.0

Training

In [57]:
learning_rate = 0.01
n_iters = 10

In [58]:
for epoch in range(n_iters):
  # prediction = forward pass
  y_pred = forward(X)

  # loss
  l = loss(Y, y_pred)

  # gradients
  dw = gradient(X, Y, y_pred)

  # update weights
  w -= learning_rate * dw

  if epoch % 1 == 0:
    print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

epoch 1: w = 1.200, loss = 30.00000000
epoch 2: w = 1.680, loss = 4.79999924
epoch 3: w = 1.872, loss = 0.76800019
epoch 4: w = 1.949, loss = 0.12288000
epoch 5: w = 1.980, loss = 0.01966083
epoch 6: w = 1.992, loss = 0.00314574
epoch 7: w = 1.997, loss = 0.00050331
epoch 8: w = 1.999, loss = 0.00008053
epoch 9: w = 1.999, loss = 0.00001288
epoch 10: w = 2.000, loss = 0.00000206


after training

In [59]:
forward(5)

9.998951268196105

## gradient with torch

In [60]:
X = torch.tensor([1, 2, 3, 4], dtype = torch.float32)

Y = torch.tensor([2, 4, 6, 8], dtype = torch.float32)

w = torch.tensor(0.0, dtype = torch.float32, requires_grad=True)

In [61]:
learning_rate = 0.01
n_iters = 100

In [62]:
for epoch in range(n_iters):
  # prediction = forward pass
  y_pred = forward(X)

  # loss
  l = loss(Y, y_pred)

  # gradients
  l.backward() # dl/dw 

  # update weights
  # weight update should not be part of grad
  with torch.no_grad():
    w -= learning_rate * w.grad

  # zero grad
  # w.grad is accumulated, so set it to zero
  w.grad.zero_()

  if epoch % 10 == 0:
    print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000


## steps
1) design models (input, output size, forward pass)

2) loss, optimizer

3) training loop

- forward: compute prediction
- backward: gradients
- update weights

In [97]:
import torch
import torch.nn as nn

In [105]:
X = torch.tensor([[1], [2], [3], [4]], dtype = torch.float32)

Y = torch.tensor([[2], [4], [6], [8]], dtype = torch.float32)

n_samples, n_features = X.shape

In [99]:
X_test = torch.tensor([5], dtype = torch.float32)

In [100]:
n_samples, n_features

(4, 1)

In [101]:
input_size = n_features
output_size = n_features

In [102]:
learning_rate = 0.01
n_iters = 100
loss = nn.MSELoss()

In [103]:
model = nn.Linear(input_size, output_size)
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [104]:
for epoch in range(n_iters):
  # prediction = forward pass
  y_pred = model(X)

  # loss
  l = loss(Y, y_pred)

  # gradients
  l.backward() # dl/dw 

  # update weights
  # weight update should not be part of grad
  optimizer.step()

  # zero grad
  # w.grad is accumulated, so set it to zero
  optimizer.zero_grad()

  if epoch % 10 == 0:
    [w, b] = model.parameters()
    print(f'epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

epoch 1: w = 1.155, loss = 10.25678253
epoch 11: w = 1.887, loss = 0.26645815
epoch 21: w = 2.004, loss = 0.00792098
epoch 31: w = 2.023, loss = 0.00117224
epoch 41: w = 2.025, loss = 0.00094132
epoch 51: w = 2.025, loss = 0.00088232
epoch 61: w = 2.024, loss = 0.00083086
epoch 71: w = 2.023, loss = 0.00078249
epoch 81: w = 2.023, loss = 0.00073695
epoch 91: w = 2.022, loss = 0.00069405


## with class def

In [126]:
X = torch.tensor([[1], [2], [3], [4]], dtype = torch.float32)

Y = torch.tensor([[2], [4], [6], [8]], dtype = torch.float32)

In [127]:
class LinearRegression(nn.Module):
  def __init__(self, input_dim, output_dim):
    super(LinearRegression, self).__init__()
    # define layers
    self.lin = nn.Linear(input_dim, output_dim)

  def forward(self, x):
    return self.lin(x)

model = LinearRegression(input_size, output_size)

In [128]:
n_samples, n_features = X.shape

In [129]:
n_samples, n_features

(4, 1)

In [130]:
iter = 100
learning_rate = 0.01

In [131]:
loss = nn.MSELoss()
optimizer = torch.optim.SGD(params = model.parameters(),lr = learning_rate)

In [132]:
for epoch in range(iter):
  y_pred = model(X)
  l = loss(Y, y_pred)
  l.backward()

  optimizer.step()
  optimizer.zero_grad()

  if epoch % 10 == 0:
    print(f"epoch: {epoch}, w: {w[0][0].item()}, loss: {l}")
  

epoch: 0, w: 2.021278142929077, loss: 37.851158142089844
epoch: 10, w: 2.021278142929077, loss: 0.994083046913147
epoch: 20, w: 2.021278142929077, loss: 0.03964261710643768
epoch: 30, w: 2.021278142929077, loss: 0.01413857564330101
epoch: 40, w: 2.021278142929077, loss: 0.012715504504740238
epoch: 50, w: 2.021278142929077, loss: 0.011959866620600224
epoch: 60, w: 2.021278142929077, loss: 0.011263318359851837
epoch: 70, w: 2.021278142929077, loss: 0.010607723146677017
epoch: 80, w: 2.021278142929077, loss: 0.009990283288061619
epoch: 90, w: 2.021278142929077, loss: 0.009408797137439251
