In [305]:
import numpy as np
import torch

In [306]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [307]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [308]:
inputs=torch.from_numpy(inputs)
targets=torch.from_numpy(targets)
print("Inputs:\n",inputs)
print("Targets:\n",targets)

Inputs:
 tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
Targets:
 tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [309]:
w=torch.randn(2,3,requires_grad=True)
b=torch.randn(2,requires_grad=True)
print(w)
print(b)

tensor([[-1.4336, -0.9474,  1.0756],
        [ 0.5355,  1.0091, -0.1944]], requires_grad=True)
tensor([0.7795, 0.3055], requires_grad=True)


In [310]:
inputs

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])

In [311]:
inputs @ w.t() + b

tensor([[-121.0988,   98.6517],
        [-144.2113,  125.4008],
        [-188.5125,  170.8431],
        [-146.3887,   91.1303],
        [-113.7978,  120.5254]], grad_fn=<AddBackward0>)

In [312]:
def model(x):
    return x @ w.t() +b

In [313]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [314]:
preds=model(inputs)
print(preds)

tensor([[-121.0988,   98.6517],
        [-144.2113,  125.4008],
        [-188.5125,  170.8431],
        [-146.3887,   91.1303],
        [-113.7978,  120.5254]], grad_fn=<AddBackward0>)


In [315]:
def mse(t1,t2):
    diff = t1-t2
    return torch.sum(diff * diff)/ diff.numel()

In [316]:
loss=mse(preds,targets)
print(loss)

tensor(25778.4961, grad_fn=<DivBackward0>)


In [317]:
loss.backward()
print(w)
print(w.grad)

tensor([[-1.4336, -0.9474,  1.0756],
        [ 0.5355,  1.0091, -0.1944]], requires_grad=True)
tensor([[-18462.1445, -20188.8398, -12254.1455],
        [  2646.1892,   2322.3914,   1419.6353]])


In [318]:
print(b)
print(b.grad)

tensor([0.7795, 0.3055], requires_grad=True)
tensor([-219.0018,   29.3103])


In [319]:
print(w)
print(w.grad)

tensor([[-1.4336, -0.9474,  1.0756],
        [ 0.5355,  1.0091, -0.1944]], requires_grad=True)
tensor([[-18462.1445, -20188.8398, -12254.1455],
        [  2646.1892,   2322.3914,   1419.6353]])


In [320]:
with torch.no_grad():
    w -=w.grad*1e-5
    b -=b.grad*1e-5

In [321]:
w,b

(tensor([[-1.2490, -0.7455,  1.1982],
         [ 0.5091,  0.9859, -0.2086]], requires_grad=True),
 tensor([0.7817, 0.3052], requires_grad=True))

In [322]:
loss=mse(preds,targets)
print(loss)

tensor(25778.4961, grad_fn=<DivBackward0>)


In [323]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


### Train the model using gradient descent
As seen above, we reduce the loss and improve our model using the gradient descent optimization algorithm. Thus, we can train the model using the following steps:

1.Generate predictions

2.Calculate the loss

3.Compute gradients w.r.t the weights and biases

4.Adjust the weights by subtracting a small quantity proportional to the gradient

5.Reset the gradients to zero

Let's implement the above step by step

In [324]:
pred_f=model(inputs)
loss_f=mse(pred_f,targets)
loss_f.backward()

with torch.no_grad():
    w.grad -= w.grad*1e-5
    b.grad -= b.grad*1e-5
    w.grad.zero_()
    b.grad.zero_()


In [325]:
print(w)
print(b)

tensor([[-1.2490, -0.7455,  1.1982],
        [ 0.5091,  0.9859, -0.2086]], requires_grad=True)
tensor([0.7817, 0.3052], requires_grad=True)


In [326]:
pred_f=model(inputs)
loss_f=mse(pred_f,targets)
print(loss_f)

tensor(17465.2949, grad_fn=<DivBackward0>)


In [335]:
for i in range(0,1000):
    pred_f=model(inputs)
    loss_f=mse(pred_f,targets)
    loss_f.backward()

    with torch.no_grad():
        w -= w.grad*1e-5
        b -= b.grad*1e-5
        w.grad.zero_()
        b.grad.zero_()

In [336]:
print(w.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [337]:
print(w)
print(b)

tensor([[-0.4659,  0.7500,  0.9314],
        [-0.2431,  0.8956,  0.6517]], requires_grad=True)
tensor([0.7854, 0.3047], requires_grad=True)


In [338]:
pred_f=model(inputs)
loss_f=mse(pred_f,targets)
print(loss_f)

tensor(7.7754, grad_fn=<DivBackward0>)


In [339]:
pred_f

tensor([[ 57.0737,  70.5900],
        [ 83.9967,  98.7088],
        [114.7721, 136.9685],
        [ 19.9734,  38.1356],
        [105.8355, 115.1319]], grad_fn=<AddBackward0>)

In [340]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])