## Linear Regression from scratch with PyTorch

Credits: \
https://jovian.ai/aakashns/02-linear-regression

In [2]:
import numpy as np
import torch

In [4]:
# features
X = np.array([[73, 67, 43], 
              [91, 88, 64], 
              [87, 134, 58], 
              [102, 43, 37], 
              [69, 96, 70]], dtype='float32')

print(X)
print(X.shape)

[[ 73.  67.  43.]
 [ 91.  88.  64.]
 [ 87. 134.  58.]
 [102.  43.  37.]
 [ 69.  96.  70.]]
(5, 3)


In [5]:
# target
y = np.array([[56, 70], 
              [81, 101], 
              [119, 133], 
              [22, 37], 
              [103, 119]], dtype='float32')
print(y)
print(y.shape)

[[ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]]
(5, 2)


In [7]:
# Convert features and target to tensors
X = torch.from_numpy(X)
y = torch.from_numpy(y)
print('X:')
print(X)

print('y:')
print(y)

X:
tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
y:
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


### Linear regression model from scratch

In [8]:
n_samples,n_features = X.shape
print('n_samples:', n_samples, 'n_features:', n_features)

n_targets = y.shape[1]
print('n_targets:', n_targets)

# Weights and biases
w = torch.randn(n_targets, n_features, requires_grad=True)
b = torch.randn(n_targets, requires_grad=True)
print('w:')
print(w)

print('b:')
print(b)

n_samples: 5 n_features: 3
n_targets: 2
w:
tensor([[-0.1482,  1.3129,  2.6688],
        [ 1.0401, -0.9681,  0.6123]], requires_grad=True)
b:
tensor([-0.0477,  0.4939], requires_grad=True)


In [9]:
def model(x):
    return x @ w.t() + b   ## in PyTorch: @ represents matrix multiplication, .t() matrix transpose

In [10]:
y_pred = model(X)
print(y_pred)

tensor([[191.8617,  37.8871],
        [272.8117,  49.1364],
        [317.7859,  -3.2318],
        [140.0414,  87.6130],
        [302.5880,  22.1816]], grad_fn=<AddBackward0>)


### Loss function

In [11]:
# MSE loss
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel() ## .numel() returns number of elements

In [13]:
# Compute loss
loss = mse(y_pred, y)
print(loss)

tensor(18275.0742, grad_fn=<DivBackward0>)


### Compute gradients

In [14]:
# Compute gradients
loss.backward()

In [15]:
# Gradients for weights
print(w)
print(w.grad)

tensor([[-0.1482,  1.3129,  2.6688],
        [ 1.0401, -0.9681,  0.6123]], requires_grad=True)
tensor([[14095.7881, 15371.1406,  9597.2559],
        [-4086.7883, -6417.7656, -3501.2354]])


In [16]:
## Reset the gradients to zero, because PyTorch accumulates gradients,
## i.e. the next time we call .backward on the loss, the new gradient values will
## get added to the existing gradient values, which may lead to unexpected results.

w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


### Adjust weights and biases using gradient descent

In [22]:
lr = 1e-5 ## learning rate

for iloop in range(100):
    # Generate predictions:
    y_pred = model(X)
    #print(y_pred)

    # Calculate the loss
    loss = mse(y_pred, y)
    #print(iloop,loss)

    # Compute gradients
    loss.backward()
    #print(w.grad)
    #print(b.grad)

    # Adjust weights & reset gradients:

    ## with torch.no_grad() : indicate to PyTorch that we shouldn't track, calculate or modify gradients while updating
    ## the weights and biases
    with torch.no_grad():
        w -= w.grad * lr
        b -= b.grad * lr
        w.grad.zero_()
        b.grad.zero_()

In [23]:
y_pred

tensor([[ 55.5011,  75.1806],
        [ 90.6904, 104.7555],
        [102.0740, 115.8369],
        [ 12.4339,  64.9343],
        [121.7139, 110.0489]], grad_fn=<AddBackward0>)

In [24]:
y

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])