## PyTorch Tutorial 06 - Training Pipeline: Model, Loss, and Optimizer


In [None]:
# 1) Design model (input, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Training Loop 

#- forward pass: compute prediction
#- backward pass: gradients
#- update weights

#- We iterate above till done!

In [2]:
import torch
import torch.nn as nn  # we want to use functions from here


learning_rate = 0.01
n_iters = 50


X = torch.tensor([1,2,3,4], dtype = torch.float32)
Y = torch.tensor([2,4,6,8], dtype = torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad = True)


# model prediction
def forward(x):
    return w * x

# loss = mse
#def loss(y, y_predicted):
#    return ((y - y_predicted)**2).mean()

#We do not define manual as above but use inbuilt loss. This is callable function
loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # loss: is a callable function
    l = loss(Y, y_pred)
    
    #gradients = backward pass
    #dw = gradient(X,Y, y_pred)
    l.backward()  # dl/dw
    
    # update weights
    #with torch.no_grad():  
    #    w -=  learning_rate * w.grad
    # We do not need update weights manually - but directly using below
    optimizer.step()
        
    # zero gradients : to avoid accumulation of gradient to next pass
    #w.grad.zero_()
    optimizer.zero_grad()
    
    # print info
    if epoch %2 == 0: 
        print(f'epoch {epoch + 1}: w = {w:.3}f, loss = {l:.8}')
        
        
        

epoch 1: w = 0.3f, loss = 30.0
epoch 3: w = 0.772f, loss = 15.660188
epoch 5: w = 1.11f, loss = 8.1747169
epoch 7: w = 1.36f, loss = 4.2672529
epoch 9: w = 1.54f, loss = 2.2275321
epoch 11: w = 1.67f, loss = 1.1627856
epoch 13: w = 1.76f, loss = 0.60698116
epoch 15: w = 1.83f, loss = 0.3168478
epoch 17: w = 1.87f, loss = 0.16539653
epoch 19: w = 1.91f, loss = 0.086338058
epoch 21: w = 1.93f, loss = 0.045068897
epoch 23: w = 1.95f, loss = 0.023526315
epoch 25: w = 1.97f, loss = 0.012280837
epoch 27: w = 1.98f, loss = 0.0064106593
epoch 29: w = 1.98f, loss = 0.0033464201
epoch 31: w = 1.99f, loss = 0.0017468547
epoch 33: w = 1.99f, loss = 0.00091188005
epoch 35: w = 1.99f, loss = 0.00047600627
epoch 37: w = 2.0f, loss = 0.000248478
epoch 39: w = 2.0f, loss = 0.0001297064
epoch 41: w = 2.0f, loss = 6.7704947e-05
epoch 43: w = 2.0f, loss = 3.5343608e-05
epoch 45: w = 2.0f, loss = 1.8447146e-05
epoch 47: w = 2.0f, loss = 9.6315316e-06
epoch 49: w = 2.0f, loss = 5.0273838e-06


So it still works - when we made many inbuilt functions of nn.

Lets use other unbuilt functions for **forward** etc. SO WE HAVE THIS **TRAINING PIPELINE**

In [4]:
import torch
import torch.nn as nn  # we want to use functions from here


learning_rate = 0.01
n_iters = 50


# Rows is number of data, number of columns is number of feature. so we change the input
X = torch.tensor([[1],[2],[3],[4]], dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype = torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad = True)

X_test = torch.tensor([5], dtype = torch.float32)
n_samples, n_features = X.shape
print("n_samples:{} n_features:{}".format(n_samples, n_features))

# model prediction
#def forward(x):
#    return w * x
input_size = n_features
output_size = n_features
model = nn.Linear(input_size, output_size)


# loss = mse
#def loss(y, y_predicted):
#    return ((y - y_predicted)**2).mean()

#We do not define manual as above but use inbuilt loss. This is callable function
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    #y_pred = forward(X)
    y_pred = model(X)
    
    # loss: is a callable function
    l = loss(Y, y_pred)
    
    #gradients = backward pass
    #dw = gradient(X,Y, y_pred)
    l.backward()  # dl/dw
    
    # update weights
    #with torch.no_grad():  
    #    w -=  learning_rate * w.grad
    # We do not need update weights manually - but directly using below
    optimizer.step()
        
    # zero gradients : to avoid accumulation of gradient to next pass
    #w.grad.zero_()
    optimizer.zero_grad()
    
    # print info
    if epoch %2 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch + 1}: w = {w[0][0].item():.3}f, loss = {l:.8}')
        
print(f'Prediction after training f(5) = {model(X_test).item():.3f}')

n_samples:4 n_features:1
epoch 1: w = 0.0332f, loss = 31.665836
epoch 3: w = 0.504f, loss = 15.387104
epoch 5: w = 0.831f, loss = 7.5477362
epoch 7: w = 1.06f, loss = 3.7716708
epoch 9: w = 1.22f, loss = 1.9519744
epoch 11: w = 1.33f, loss = 1.0742285
epoch 13: w = 1.41f, loss = 0.65001935
epoch 15: w = 1.46f, loss = 0.44419229
epoch 17: w = 1.5f, loss = 0.34352911
epoch 19: w = 1.53f, loss = 0.29351708
epoch 21: w = 1.55f, loss = 0.26791045
epoch 23: w = 1.56f, loss = 0.25407252
epoch 25: w = 1.57f, loss = 0.24591872
epoch 27: w = 1.58f, loss = 0.24051957
epoch 29: w = 1.59f, loss = 0.2364641
epoch 31: w = 1.59f, loss = 0.23307304
epoch 33: w = 1.6f, loss = 0.23001896
epoch 35: w = 1.6f, loss = 0.22714424
epoch 37: w = 1.6f, loss = 0.22437237
epoch 39: w = 1.61f, loss = 0.22166666
epoch 41: w = 1.61f, loss = 0.2190091
epoch 43: w = 1.61f, loss = 0.21639097
epoch 45: w = 1.62f, loss = 0.21380782
epoch 47: w = 1.62f, loss = 0.21125706
epoch 49: w = 1.62f, loss = 0.20873772
Prediction af

Dummy example to show another way

In [7]:
class MyLinearRegression(nn.Module):
    def __init__(self, input_size, output_size):
        super(MyLinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_size, output_size)
    
    def forward(self, x):
        return self.lin(x)

In [8]:
import torch
import torch.nn as nn  # we want to use functions from here


learning_rate = 0.01
n_iters = 50


# Rows is number of data, number of columns is number of feature. so we change the input
X = torch.tensor([[1],[2],[3],[4]], dtype = torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype = torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad = True)

X_test = torch.tensor([5], dtype = torch.float32)
n_samples, n_features = X.shape
print("n_samples:{} n_features:{}".format(n_samples, n_features))

# model prediction
#def forward(x):
#    return w * x
input_size = n_features
output_size = n_features
#model = nn.Linear(input_size, output_size)
model = MyLinearRegression(input_size, output_size)

# loss = mse
#def loss(y, y_predicted):
#    return ((y - y_predicted)**2).mean()

#We do not define manual as above but use inbuilt loss. This is callable function
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    #y_pred = forward(X)
    y_pred = model(X)
    
    # loss: is a callable function
    l = loss(Y, y_pred)
    
    #gradients = backward pass
    #dw = gradient(X,Y, y_pred)
    l.backward()  # dl/dw
    
    # update weights
    #with torch.no_grad():  
    #    w -=  learning_rate * w.grad
    # We do not need update weights manually - but directly using below
    optimizer.step()
        
    # zero gradients : to avoid accumulation of gradient to next pass
    #w.grad.zero_()
    optimizer.zero_grad()
    
    # print info
    if epoch %2 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch + 1}: w = {w[0][0].item():.3}f, loss = {l:.8}')
        
print(f'Prediction after training f(5) = {model(X_test).item():.3f}')

n_samples:4 n_features:1
epoch 1: w = -0.131f, loss = 49.147724
epoch 3: w = 0.455f, loss = 23.698429
epoch 5: w = 0.863f, loss = 11.444988
epoch 7: w = 1.15f, loss = 5.5449347
epoch 9: w = 1.34f, loss = 2.7038374
epoch 11: w = 1.48f, loss = 1.3355322
epoch 13: w = 1.58f, loss = 0.67633599
epoch 15: w = 1.64f, loss = 0.3585569
epoch 17: w = 1.69f, loss = 0.20516354
epoch 19: w = 1.72f, loss = 0.13092169
epoch 21: w = 1.74f, loss = 0.094793618
epoch 23: w = 1.76f, loss = 0.07702031
epoch 25: w = 1.77f, loss = 0.068089075
epoch 27: w = 1.78f, loss = 0.063419238
epoch 29: w = 1.79f, loss = 0.060805649
epoch 31: w = 1.79f, loss = 0.059186339
epoch 33: w = 1.8f, loss = 0.058050163
epoch 35: w = 1.8f, loss = 0.057150789
epoch 37: w = 1.8f, loss = 0.056369573
epoch 39: w = 1.8f, loss = 0.055649478
epoch 41: w = 1.8f, loss = 0.054962926
epoch 43: w = 1.81f, loss = 0.054296512
epoch 45: w = 1.81f, loss = 0.053643811
epoch 47: w = 1.81f, loss = 0.053001717
epoch 49: w = 1.81f, loss = 0.052368611