In [116]:
### General training pipeline

# 1) Design model (input / output size, forward pass (layers))
# 2) Construct loss and optimizer
# 3) Training loop
#  - forward pass: compute prediction
#  - backward pass: compute gradients
#  - update weights

In [117]:
# imports
import torch
import torch.nn as nn

In [118]:
# simple linear function without bias
# f = w * x
# let's w = 2

# data
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)

n_samples, n_features = X.shape
n_samples, n_features

(4, 1)

In [119]:
# model
input_size = n_features
output_size = n_features

### in our case 1 linear layer is enough,
### so we can use it straight as a model
# model = nn.Linear(input_size, output_size)

# but in case our model is complex, we can build a class:
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        return self.lin(x)

model = LinearRegression(input_size, output_size)

In [120]:
# loss (MSE)
loss = nn.MSELoss()

# training params
learning_rate = 0.1

# optimizer (for weights' update)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [121]:
print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

Prediction before training: f(5) = -1.664


In [122]:
# Training
n_iters = 200

for epoch in range(n_iters):
    # prediction step = forward pass
    y_pred = model(X)
    
    # loss computation
    l = loss(Y, y_pred)
    
    # gradients = backward pass
    l.backward() # dl/dw
    
    # update weights
    optimizer.step()
    
    # zeroing gradients
    optimizer.zero_grad()
    
    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch}: w = {w[0][0]:.3f}, loss = {l:.8f}')

epoch 0: w = 3.564, loss = 45.75347137
epoch 10: w = 2.086, loss = 0.02052142
epoch 20: w = 2.044, loss = 0.00288765
epoch 30: w = 2.032, loss = 0.00156950
epoch 40: w = 2.024, loss = 0.00085456
epoch 50: w = 2.017, loss = 0.00046529
epoch 60: w = 2.013, loss = 0.00025334
epoch 70: w = 2.009, loss = 0.00013794
epoch 80: w = 2.007, loss = 0.00007511
epoch 90: w = 2.005, loss = 0.00004089
epoch 100: w = 2.004, loss = 0.00002227
epoch 110: w = 2.003, loss = 0.00001212
epoch 120: w = 2.002, loss = 0.00000660
epoch 130: w = 2.002, loss = 0.00000359
epoch 140: w = 2.001, loss = 0.00000196
epoch 150: w = 2.001, loss = 0.00000107
epoch 160: w = 2.001, loss = 0.00000058
epoch 170: w = 2.000, loss = 0.00000032
epoch 180: w = 2.000, loss = 0.00000017
epoch 190: w = 2.000, loss = 0.00000009


In [123]:
print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')

Prediction after training: f(5) = 10.000
