In [1]:
import numpy as np
import torch

#### Define Inputs & Targets

In [2]:
inputs = np.array([[73, 67, 43],
                   [91, 88, 64],
                   [87, 134, 58],
                   [102, 43, 37],
                   [69, 96, 70]], dtype = 'float32')

In [3]:
inputs.shape

(5, 3)

In [4]:
targets = np.array([[56, 70],
                   [81, 101],
                   [119, 133],
                   [22 , 37],
                   [103, 119]], dtype = 'float32')

In [5]:
targets.shape

(5, 2)

#### Convert to Tensors

In [6]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


#### Initilize / Define Weights & Biases

In [7]:
w = torch.randn(2 , 3 , requires_grad=True)
b = torch.randn(2 , requires_grad=True)
print(w)
print(b)

tensor([[ 2.4076, -1.1652,  1.1493],
        [-0.5530, -0.0310,  0.7046]], requires_grad=True)
tensor([1.9075, 0.4725], requires_grad=True)


#### Define the model 

In [8]:
def model(x):
    return x @ w.t() + b

#### generate predictions

In [9]:
preds = model(inputs)
preds

tensor([[149.0158, -11.6742],
        [192.0196,  -7.4824],
        [121.8938, -10.9221],
        [239.9050, -31.1954],
        [136.6268,   8.6633]], grad_fn=<ThAddBackward>)

#### MSE loss

In [10]:
def mse(t1 , t2):
    diff = (t1 - t2)
    return torch.sum(diff * diff) / diff.numel()

#### Compute Loss

In [11]:
loss = mse(preds , targets)
loss

tensor(12557.6514, grad_fn=<DivBackward0>)

#### Compute Gradients

In [12]:
loss.backward()

In [13]:
# check the gradients
w.grad , b.grad

(tensor([[ 8338.2510,  5797.5273,  4337.8267],
         [-8584.9014, -9565.7832, -5809.8296]]),
 tensor([  91.6922, -102.5222]))

#### Pytorch accumulates gradients so we need to set them to zero

In [14]:
w.grad.zero_()

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [15]:
w.grad.zero_()

tensor([[0., 0., 0.],
        [0., 0., 0.]])

#### Gradient Descent
1. Generate Predictions
2. Calculate loss
3. Update Weights/Bias
4. Clear out gradients

In [16]:
preds = model(inputs)

In [17]:
preds.shape , targets.shape

(torch.Size([5, 2]), torch.Size([5, 2]))

In [18]:
loss = mse(preds , targets)
loss

tensor(12557.6514, grad_fn=<DivBackward0>)

In [19]:
loss.backward()

In [20]:
# Adjust weights/Biases & reset gradients
with torch.no_grad(): # not to add these operations in the computation graph
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [21]:
preds = model(inputs)
loss = mse(preds,targets)

In [22]:
loss

tensor(9631.4922, grad_fn=<DivBackward0>)

#### Running through multiple epochs and checking the loss

In [23]:
for i in range(10):
    preds = model(inputs)
    loss = mse(preds , targets)
    print(f'Epochs : {i} , Loss : {loss.item()}')
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

Epochs : 0 , Loss : 9631.4921875
Epochs : 1 , Loss : 7645.03759765625
Epochs : 2 , Loss : 6291.8701171875
Epochs : 3 , Loss : 5365.6474609375
Epochs : 4 , Loss : 4727.3193359375
Epochs : 5 , Loss : 4283.177734375
Epochs : 6 , Loss : 3970.0703125
Epochs : 7 , Loss : 3745.442138671875
Epochs : 8 , Loss : 3580.608154296875
Epochs : 9 , Loss : 3456.239013671875


In [24]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [25]:
preds

tensor([[ 91.2726,  61.1941],
        [116.9977,  88.3527],
        [ 40.4137, 103.1320],
        [174.4621,  40.4954],
        [ 69.2815, 101.0230]], grad_fn=<ThAddBackward>)