# Linear Regression using PyTorch

In [97]:
# import jovian
# jovian.commit()

In [98]:
import numpy as np
import torch

Randomly intializing weight and bias

In [99]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [100]:
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

Convert numpy array to pytorch's tensors

In [101]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [102]:
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


### Linear Regression from scratch

###### Y_pred = w * x *+b

In [103]:
w = torch.randn(2,3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
# Weights and biases
print(w)
print(b)

tensor([[ 0.0899, -0.7145,  1.0041],
        [ 0.5587,  0.4228,  0.4967]], requires_grad=True)
tensor([-1.2347,  0.4867], requires_grad=True)


In [104]:
def model(x):
    return x @ w.t() + b

In [105]:
preds = model(inputs)
preds 

tensor([[  0.6362,  90.9555],
        [  8.3371, 120.3210],
        [-30.9151, 134.5561],
        [ 14.3679,  94.0288],
        [  6.6671, 114.3931]], grad_fn=<AddBackward0>)

In [106]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


Model predicts very inaccurate results because weight and bias are randomly initialized and we haven't used loss function yet..

### Using Loss Function

In [107]:
def MSE(t1, t2):
    diff = t1-t2
    return torch.sum(diff**2)/diff.numel()

In [108]:
loss = MSE(preds, targets)
print(loss)

tensor(4424.6206, grad_fn=<DivBackward0>)


### Compute gradients

In [109]:
loss.backward()

In [110]:
print(w)
print(w.grad)

tensor([[ 0.0899, -0.7145,  1.0041],
        [ 0.5587,  0.4228,  0.4967]], requires_grad=True)
tensor([[-6224.3857, -7953.6924, -4550.3662],
        [ 1784.4811,  1064.5515,   803.0931]])


In [111]:
w.grad.zero_()
b.grad.zero_()

print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


### Adjust weights and biases using gradient descent
We'll reduce the loss and improve our model using the gradient descent optimization algorithm, which has the following steps:

1.Generate predictions

2.Calculate the loss

3.Compute gradients w.r.t the weights and biases

4.Adjust the weights by subtracting a small quantity proportional to the gradient

5.Reset the gradients to zero

In [112]:
lr = 10^-5
with torch.no_grad():
    w -= w.grad * lr 
    b -= b.grad * lr 
    w.grad.zero_()
    b.grad.zero_()

1. We use torch.no_grad to indicate to PyTorch that we shouldn't track, calculate or modify gradients while updating the weights and biases. 

2. We multiply the gradients with a really small number (10^-5 in this case), to ensure that we don't modify the weights by a really large amount, since we only want to take a small step in the downhill direction of the gradient. This number is called the learning rate of the algorithm. 

3. After we have updated the weights, we reset the gradients back to zero, to avoid affecting any future computations.

In [113]:
print(w)
print(b)

tensor([[ 0.0899, -0.7145,  1.0041],
        [ 0.5587,  0.4228,  0.4967]], requires_grad=True)
tensor([-1.2347,  0.4867], requires_grad=True)


In [114]:
preds = model(inputs)
loss = MSE(preds, targets)
print(loss)

tensor(4424.6206, grad_fn=<DivBackward0>)


### Train for multiple epochs

In [115]:
epochs = 100
# Train for 100 epochs
for i in range(epochs):
    preds = model(inputs)
    loss = MSE(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [116]:
# Calculate loss
preds = model(inputs)
loss = MSE(preds, targets)
print(loss)

tensor(268.7842, grad_fn=<DivBackward0>)


In [117]:
preds

tensor([[ 62.0854,  74.5694],
        [ 91.2895, 100.9455],
        [ 90.1225, 125.4848],
        [ 51.4586,  61.0444],
        [ 99.7508, 105.6897]], grad_fn=<AddBackward0>)

In [119]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

# Linear regression using PyTorch built-ins

In [120]:
import torch.nn as nn

In [122]:
inputs = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], 
                   [102, 43, 37], [69, 96, 70], [73, 67, 43], 
                   [91, 88, 64], [87, 134, 58], [102, 43, 37], 
                   [69, 96, 70], [73, 67, 43], [91, 88, 64], 
                   [87, 134, 58], [102, 43, 37], [69, 96, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], [81, 101], [119, 133], 
                    [22, 37], [103, 119], [56, 70], 
                    [81, 101], [119, 133], [22, 37], 
                    [103, 119], [56, 70], [81, 101], 
                    [119, 133], [22, 37], [103, 119]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

### Dataset and DataLoader
We'll create a TensorDataset, which allows access to rows from inputs and targets as tuples, and provides standard APIs for working with many different types of datasets in PyTorch.

In [123]:
from torch.utils.data import TensorDataset

In [133]:
train_ds = TensorDataset(inputs, targets)
train_ds[0:4]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.],
         [102.,  43.,  37.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.]]))

We'll also create a DataLoader, which can split the data into batches of a predefined size while training. It also provides other utilities like shuffling and random sampling of the data.

In [127]:
from torch.utils.data import DataLoader

In [129]:
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
train_dl

<torch.utils.data.dataloader.DataLoader at 0x2037333ac48>

In [132]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [ 73.,  67.,  43.],
        [ 73.,  67.,  43.],
        [ 69.,  96.,  70.]])
tensor([[ 81., 101.],
        [119., 133.],
        [ 56.,  70.],
        [ 56.,  70.],
        [103., 119.]])


In each iteration, the data loader returns one batch of data, with the given batch size. If shuffle is set to True, it shuffles the training data before creating batches. Shuffling helps randomize the input to the optimization algorithm, which can lead to faster reduction in the loss.

### nn.Linear
Instead of initializing the weights & biases manually, we can define the model using the nn.Linear class from PyTorch, which does it automatically.

In [134]:
model = nn.Linear(3,2)
#(3,2) 3 features x1,x2,x3 and 2 targets y1,y2
model

Linear(in_features=3, out_features=2, bias=True)

In [138]:
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.1302, -0.3647,  0.3985],
        [ 0.1753,  0.4848, -0.5514]], requires_grad=True)
Parameter containing:
tensor([-0.2597, -0.4253], requires_grad=True)


In [140]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.1302, -0.3647,  0.3985],
         [ 0.1753,  0.4848, -0.5514]], requires_grad=True),
 Parameter containing:
 tensor([-0.2597, -0.4253], requires_grad=True)]

In [145]:
preds = model(inputs)
preds[:4]

tensor([[-17.0617,  21.1429],
        [-18.6953,  22.9000],
        [-37.3418,  47.8070],
        [-14.4741,  17.8999]], grad_fn=<SliceBackward>)

### Loss Function
Instead of defining a loss function manually, we can use the built-in loss function mse_loss.

In [147]:
import torch.nn.functional as F

In [148]:
loss_fn = F.mse_loss

In [150]:
loss = loss_fn(model(inputs),targets)
loss

tensor(8128.3765, grad_fn=<MseLossBackward>)

### Optimizer

In [151]:
opt = torch.optim.SGD(model.parameters(),lr=1e-5)

### Train the model
We are now ready to train the model. We'll follow the exact same process to implement gradient descent:

1. Generate predictions

2. Calculate the loss

3. Compute gradients w.r.t the weights and biases

4. Adjust the weights by subtracting a small quantity proportional to the gradient

5. Reset the gradients to zero

6. The only change is that we'll work batches of data, instead of processing the entire training data in every iteration. Let's define a utility function fit which trains the model for a given number of epochs.

In [159]:
def fit(epochs, model, loss_fn, opt, train_dl):
    
    for epoch in range(epochs):
        for xb, yb in train_dl:
            
            # 1. Generate predictions
            preds = model(xb)
            
            # 2. Calculate loss
            loss = loss_fn(preds, yb)
            
            # 3. Compute gradients
            loss.backward()
            
            # 4. update parameters using gradients
            opt.step()
            
            # 5. opt.zero_grad()
            
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, loss.item()))

In [166]:
fit(150, model, loss_fn, opt, train_dl)

Epoch [10/150], Loss: 469904.5000
Epoch [20/150], Loss: 518876.6562
Epoch [30/150], Loss: 200058.7344
Epoch [40/150], Loss: 66294.2031
Epoch [50/150], Loss: 3741.4695
Epoch [60/150], Loss: 64108.9883
Epoch [70/150], Loss: 63698.7695
Epoch [80/150], Loss: 112016.0469
Epoch [90/150], Loss: 265977.4375
Epoch [100/150], Loss: 478172.4062
Epoch [110/150], Loss: 539256.7500
Epoch [120/150], Loss: 476573.2500
Epoch [130/150], Loss: 428784.3438
Epoch [140/150], Loss: 336397.2188
Epoch [150/150], Loss: 99178.1953


In [167]:
preds = model(inputs)
preds

tensor([[398.2281, 430.0754],
        [467.4411, 503.3766],
        [186.5905, 262.6119],
        [839.1767, 860.5686],
        [185.6542, 220.4144],
        [398.2281, 430.0754],
        [467.4411, 503.3766],
        [186.5905, 262.6119],
        [839.1767, 860.5686],
        [185.6542, 220.4144],
        [398.2281, 430.0754],
        [467.4411, 503.3766],
        [186.5905, 262.6119],
        [839.1767, 860.5686],
        [185.6542, 220.4144]], grad_fn=<AddmmBackward>)

In [169]:
# Compare with targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [170]:
import jovian

<IPython.core.display.Javascript object>

In [171]:
jovian.commit()

<IPython.core.display.Javascript object>

[jovian] Attempting to save notebook..
[jovian] Please enter your API key ( from https://jovian.ml/ ):
API KEY: ········
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jovian.ml/harsht24/linear-regression-with-pytorch


'https://jovian.ml/harsht24/linear-regression-with-pytorch'