Linear regression model : 

In [1]:
import numpy as np
import torch

In [2]:
inputs = np.array([[73, 67, 43],
[91, 88, 64],
[87, 134, 58],
[102, 43, 37],
[69, 96, 70]], dtype='float32')

In [3]:
targets = np.array([[56],
[81],
[119],
[22],
[103]], dtype='float32')

In [4]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [5]:
weight = torch.randn(2, 3, requires_grad=True)
bias = torch.randn(2, requires_grad=True)

In [6]:
inputs @ weight.t() + bias

tensor([[-29.3858, -31.7851],
        [-52.3963, -40.2111],
        [-49.8094, -29.9758],
        [ -1.8888, -51.1429],
        [-74.5067, -27.3179]], grad_fn=<AddBackward0>)

In [7]:
def model(inputs):
    return inputs @ weight.t() + bias

In [8]:
prediction = model(inputs)
print(prediction)

tensor([[-29.3858, -31.7851],
        [-52.3963, -40.2111],
        [-49.8094, -29.9758],
        [ -1.8888, -51.1429],
        [-74.5067, -27.3179]], grad_fn=<AddBackward0>)


Loss Function : 

In [9]:
diff0 = prediction - targets
torch.sum(diff0 * diff0) / diff0.numel()

tensor(15258.5986, grad_fn=<DivBackward0>)

In [10]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [11]:
loss = mse(prediction, targets)
print(loss)

tensor(15258.5986, grad_fn=<DivBackward0>)


Compute Gradients : 

In [12]:
loss.backward()
print(weight.grad)
print(bias.grad)

tensor([[ -9548.6504, -11629.6074,  -7061.8496],
        [ -9370.3857, -10433.3213,  -6400.2822]])
tensor([-117.7974, -112.2866])


Train the model using gradient descent : 

In [13]:
with torch.no_grad():
    weight -= weight.grad * 1e-5
    bias -= bias.grad * 1e-5
    weight.grad.zero_()
    bias.grad.zero_()

In [14]:
prediction = model(inputs)
loss = mse(prediction, targets)
print(loss)

tensor(10578.5254, grad_fn=<DivBackward0>)


In [15]:
for i in range(100):
    prediction = model(inputs)
    loss = mse(prediction, targets)
    loss.backward()
    with torch.no_grad():
        weight -= weight.grad * 1e-5
        bias -= bias.grad * 1e-5
        weight.grad.zero_()
        bias.grad.zero_()

In [16]:
prediction = model(inputs)
loss = mse(prediction, targets)
print(loss)

tensor(353.8745, grad_fn=<DivBackward0>)


In [17]:
prediction,targets

(tensor([[ 64.6806,  59.3969],
         [ 75.5647,  80.7154],
         [121.7154, 118.5022],
         [ 64.1421,  32.2821],
         [ 65.1230,  93.0872]], grad_fn=<AddBackward0>), tensor([[ 56.],
         [ 81.],
         [119.],
         [ 22.],
         [103.]]))