In [1]:
import numpy as np
import torch

In [2]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [3]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [4]:
# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [5]:
# Example of how weights and biases will work

# Weights and biases
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-0.2511,  1.1549,  0.6977],
        [-1.5681, -0.5041,  2.4710]], requires_grad=True)
tensor([ 0.3753, -0.2572], requires_grad=True)


In [6]:
def model(x):
    return x @ w.t() + b

@ represents matrix multiplication in PyTorch, and the .t method returns the transpose of a tensor.
The matrix obtained by passing the input data into the model is a set of predictions for the target variables.

In [7]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[ 89.4253, -42.2495],
        [123.8108, -29.1700],
        [173.7565, -60.9148],
        [ 50.2370, -90.4501],
        [142.7619,  16.1205]], grad_fn=<AddBackward0>)


In [8]:
# Compare with targets
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [9]:
# Mean Squared Error (MSE)

diff = targets - preds
diff_sqr = diff * diff
torch.sum(diff_sqr) / diff.numel()

tensor(10230.1465, grad_fn=<DivBackward0>)

In [10]:
# MSE Function

def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [11]:
# Compute loss
loss = mse(preds, targets)
print(loss)

tensor(10230.1465, grad_fn=<DivBackward0>)


In [12]:
# Compute gradients
loss.backward()

In [13]:
# Gradients for weights
print(w)
print(w.grad)

tensor([[-0.2511,  1.1549,  0.6977],
        [-1.5681, -0.5041,  2.4710]], requires_grad=True)
tensor([[  3344.6785,   3675.1096,   2236.2314],
        [-11401.7734, -12063.4102,  -7264.3770]])


In [14]:
print(b)
print(b.grad)

tensor([ 0.3753, -0.2572], requires_grad=True)
tensor([  39.7983, -133.3328])


In [15]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [16]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[ 89.4253, -42.2495],
        [123.8108, -29.1700],
        [173.7565, -60.9148],
        [ 50.2370, -90.4501],
        [142.7619,  16.1205]], grad_fn=<AddBackward0>)


In [17]:
# Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(10230.1465, grad_fn=<DivBackward0>)


In [18]:
# Compute gradients
loss.backward()
print(w.grad)
print(b.grad)

tensor([[  3344.6785,   3675.1096,   2236.2314],
        [-11401.7734, -12063.4102,  -7264.3770]])
tensor([  39.7983, -133.3328])


In [19]:
# Adjust weights & reset gradients
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [20]:
print(w)
print(b)

tensor([[-0.2846,  1.1182,  0.6754],
        [-1.4540, -0.3835,  2.5437]], requires_grad=True)
tensor([ 0.3749, -0.2558], requires_grad=True)


In [21]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(6970.5703, grad_fn=<DivBackward0>)


# Train for multiple epochs
To reduce the loss further, we can repeat the process of adjusting the weights and biases using the gradients multiple times. Each iteration is called an epoch. Let's train the model for 100 epochs.

In [22]:
# Train for 100 epochs
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [23]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(186.0139, grad_fn=<DivBackward0>)


In [24]:
# Predictions
preds

tensor([[ 56.9637,  67.7335],
        [ 81.0730, 113.3618],
        [121.5969, 108.2334],
        [ 19.4477,  23.7435],
        [100.9748, 148.9128]], grad_fn=<AddBackward0>)

In [25]:
# Targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [26]:
import torch.nn as nn

In [27]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70], 
                   [74, 66, 43], 
                   [91, 87, 65], 
                   [88, 134, 59], 
                   [101, 44, 37], 
                   [68, 96, 71], 
                   [73, 66, 44], 
                   [92, 87, 64], 
                   [87, 135, 57], 
                   [103, 43, 36], 
                   [68, 97, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119],
                    [57, 69], 
                    [80, 102], 
                    [118, 132], 
                    [21, 38], 
                    [104, 118], 
                    [57, 69], 
                    [82, 100], 
                    [118, 134], 
                    [20, 38], 
                    [102, 120]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [28]:
from torch.utils.data import TensorDataset

In [29]:
# Define dataset
train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [30]:
from torch.utils.data import DataLoader

In [31]:
# Define data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [32]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[ 92.,  87.,  64.],
        [ 68.,  97.,  70.],
        [ 73.,  67.,  43.],
        [ 87., 135.,  57.],
        [ 91.,  88.,  64.]])
tensor([[ 82., 100.],
        [102., 120.],
        [ 56.,  70.],
        [118., 134.],
        [ 81., 101.]])


In [33]:
# Define model
model = nn.Linear(3, 2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.2971,  0.0325,  0.0083],
        [ 0.0371,  0.4147,  0.5142]], requires_grad=True)
Parameter containing:
tensor([0.4278, 0.1335], requires_grad=True)


In [34]:
# Parameters
list(model.parameters())

[Parameter containing:
 tensor([[-0.2971,  0.0325,  0.0083],
         [ 0.0371,  0.4147,  0.5142]], requires_grad=True),
 Parameter containing:
 tensor([0.4278, 0.1335], requires_grad=True)]

In [35]:
# Generate predictions
preds = model(inputs)
preds

tensor([[-18.7223,  52.7385],
        [-23.2121,  72.9139],
        [-20.5776,  88.7562],
        [-28.1682,  40.7759],
        [-16.3661,  78.5010],
        [-19.0519,  52.3609],
        [-23.2363,  73.0135],
        [-20.8664,  89.3075],
        [-27.8385,  41.1535],
        [-16.0607,  78.9781],
        [-18.7465,  52.8380],
        [-23.5417,  72.5363],
        [-20.5534,  88.6566],
        [-28.4736,  40.2987],
        [-16.0365,  78.8786]], grad_fn=<AddmmBackward>)

In [36]:
# Import nn.functional
import torch.nn.functional as F

In [37]:
# Define loss function
loss_fn = F.mse_loss

In [38]:
loss = loss_fn(model(inputs), targets)
print(loss)

tensor(5717.0229, grad_fn=<MseLossBackward>)


In [39]:
# Define optimizer
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

In [40]:
# Utility function to train the model
def fit(num_epochs, model, loss_fn, opt, train_dl):
    
    # Repeat for given number of epochs
    for epoch in range(num_epochs):
        
        # Train with batches of data
        for xb,yb in train_dl:
            
            # 1. Generate predictions
            pred = model(xb)
            
            # 2. Calculate loss
            loss = loss_fn(pred, yb)
            
            # 3. Compute gradients
            loss.backward()
            
            # 4. Update parameters using gradients
            opt.step()
            
            # 5. Reset the gradients to zero
            opt.zero_grad()
        
        # Print the progress
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

In [41]:
fit(100, model, loss_fn, opt, train_dl)

Epoch [10/100], Loss: 152.3046
Epoch [20/100], Loss: 114.9523
Epoch [30/100], Loss: 57.2235
Epoch [40/100], Loss: 42.5415
Epoch [50/100], Loss: 41.7527
Epoch [60/100], Loss: 31.8557
Epoch [70/100], Loss: 15.5527
Epoch [80/100], Loss: 14.9745
Epoch [90/100], Loss: 20.5760
Epoch [100/100], Loss: 21.3885
