In [2]:
import torch
import torch.nn as nn
import numpy as np
from IPython.core.debugger import set_trace

1. Design Model (input, output size, forward pass)
2. Construct loss and optimizer
3. Training loop
    - forward pass: compute prediction
    - backward pass: gradients
    - update weights

In [None]:
X = torch.tensor([1,2,3,4], dtype=torch.float32)
#f = 2 * x
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

def forward(x):
    return w * x

learning_rate = 0.01
n_iters = 60

# MSE loss
loss = nn.MSELoss()

# gradient and weight update
optimizer = torch.optim.SGD([w], lr=learning_rate)

for epoch in range(n_iters):
    # prediction
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients dl/dw
    l.backward()
    
    # update weight
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')
    
print(f'Prediction after training f(5) = {forward(5):.3f}')

## Optimize multiple parameter in a custom model

In [17]:
X = torch.tensor([1,2,3,4], dtype=torch.float32)
#f = w * x + b, where w = 2 and b = 0.x
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)
b = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

def forward(x):
    return w * x + b

learning_rate = 0.01
n_iters = 500

# MSE loss
loss = nn.MSELoss()

# gradient and weight update
optimizer = torch.optim.SGD([w,b], lr=learning_rate)

for epoch in range(n_iters):
    # prediction
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients dl/dw
    l.backward()
    
    # update weight
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if (epoch+1)%50==0:
        print(f'epoch {epoch+1}: w = {w:.3f}, b = {b:.3f}, loss = {l:.8f}')
    
print(f'Prediction after training f(5) = {forward(5):.3f}')

epoch 50: w = 1.821, b = 0.525, loss = 0.04627926
epoch 100: w = 1.846, b = 0.452, loss = 0.03428968
epoch 150: w = 1.868, b = 0.389, loss = 0.02540650
epoch 200: w = 1.886, b = 0.335, loss = 0.01882461
epoch 250: w = 1.902, b = 0.288, loss = 0.01394782
epoch 300: w = 1.916, b = 0.248, loss = 0.01033446
epoch 350: w = 1.927, b = 0.213, loss = 0.00765719
epoch 400: w = 1.937, b = 0.184, loss = 0.00567349
epoch 450: w = 1.946, b = 0.158, loss = 0.00420371
epoch 500: w = 1.954, b = 0.136, loss = 0.00311468
Prediction after training f(5) = 9.905


## Use Linear NN as model instead of custom function

In [None]:
X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
#f = 2 * x
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)

n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = 1
ouput_size = 1

# weight and forward pass
model = nn.Linear(input_size, ouput_size)

# Prediction test
print(f'Prediction before training forward(5): {model(X_test).item():.3f}')

learning_rate = 0.01
n_iters = 200

loss = nn.MSELoss()

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Training Loop
for epoch in range(n_iters):
    # prediction
    y_pred = model(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients
    l.backward()
    
    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 10 == 0:
        [w,b] = model.parameters()
        print(f'epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')
    
print(f'Prediction after training f(5) = {model(X_test).item():.3f}')