In [4]:
## Linear Regression and Gradient Descent from scratch in PyTorch

## Problem : We’ll create a model that predicts crop yields for apples and oranges (target variables) by looking at the 
## average temperature, rainfall and humidity (input variables or features) in a region.

# yield_apple  = w11 * temp + w12 * rainfall + w13 * humidity + b1
# yield_orange = w21 * temp + w22 * rainfall + w23 * humidity + b2 

# The learning part of linear regression is to figure out a set of weights w11, w12,... w23, b1 & b2 by looking at the 
# training data, to make accurate predictions for new data (i.e. to predict the yields for apples and oranges in a new 
# region using the average temperature, rainfall and humidity). 
# This is done by adjusting the weights slightly many times to make better predictions, using an optimization technique 
# called gradient descent.

# 1. Load data, normalise/ pre-process, Split data into train n test
# 2. Build model
# 3. Define loss function
# 4. Train model using data
# 5. Test model using data


In [2]:
!pip install torch torchvision

Collecting torch
  Obtaining dependency information for torch from https://files.pythonhosted.org/packages/e4/ae/2ad8820045b6631965750435f28583e80905b8273d57cf026163b51323ee/torch-2.1.2-cp311-cp311-win_amd64.whl.metadata
  Downloading torch-2.1.2-cp311-cp311-win_amd64.whl.metadata (26 kB)
Collecting torchvision
  Obtaining dependency information for torchvision from https://files.pythonhosted.org/packages/f9/e6/3c821e7417acd82df89e39f09156ce80d58817b5b4b1ac5453b522bc5dd4/torchvision-0.16.2-cp311-cp311-win_amd64.whl.metadata
  Downloading torchvision-0.16.2-cp311-cp311-win_amd64.whl.metadata (6.6 kB)
Downloading torch-2.1.2-cp311-cp311-win_amd64.whl (192.3 MB)
   ---------------------------------------- 0.0/192.3 MB ? eta -:--:--
   ---------------------------------------- 0.4/192.3 MB 7.6 MB/s eta 0:00:26
   ---------------------------------------- 0.8/192.3 MB 8.3 MB/s eta 0:00:24
   ---------------------------------------- 1.4/192.3 MB 8.8 MB/s eta 0:00:22
   ------------------------

In [6]:
import torch
x = torch.rand(5, 3)
print(x)

torch.cuda.is_available()

tensor([[0.5242, 0.2806, 0.8229],
        [0.5670, 0.2719, 0.8882],
        [0.9350, 0.3254, 0.9793],
        [0.5371, 0.0568, 0.8435],
        [0.0927, 0.2770, 0.7519]])


False

In [7]:
import torch
import numpy as np

# Temperature, Humidity, Rainfall
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype = 'float32')

# Crop yield [Apples, Oranges]
outputs = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

# Convert inputs and outputs to PyTorch tensors
inputs = torch.from_numpy(inputs)
outputs = torch.from_numpy(outputs)

print("Inputs ", inputs)
print("Outputs ", outputs)

# torch.randn creates a tensor with the given shape, with elements picked randomly from a normal distribution with mean 0 and 
# standard deviation 1.
w = torch.randn(2, 3, requires_grad = True)
b = torch.randn(2, requires_grad = True)

print("w : ", w)
print("b : ", b)



Inputs  tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
Outputs  tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])
w :  tensor([[ 0.3456,  2.1329,  0.9594],
        [ 1.9714, -0.0978,  0.4064]], requires_grad=True)
b :  tensor([-1.4280, -0.2347], requires_grad=True)


In [8]:
# @ represents matrix multiplication in PyTorch, and the .t method returns the transpose of a tensor.
def model(x):
    return x @ w.t() + b

In [11]:
# Generate predictions
preds = model(inputs)
print(preds)

# Compare with targets
print(outputs)

tensor([[207.9603, 154.6000],
        [279.1192, 196.5659],
        [370.0934, 181.7422],
        [161.0386, 211.6798],
        [294.3342, 154.8510]], grad_fn=<AddBackward0>)
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [13]:
# MSE loss
# torch.sum returns the sum of all the elements in a tensor, and the .numel method returns the number of elements in a tensor. 
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [19]:
# Train for 100 epochs
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, outputs)
    
    # With PyTorch, we can automatically compute the gradient or derivative of the loss w.r.t. to the weights and biases, 
    # because they have requires_grad set to True.
    loss.backward()
    
    # Gradients for weights: gradients are stored in .grad property of respective tensor
    print(w)
    print(w.grad)
    
    # 1. We use torch.no_grad to indicate to PyTorch that we shouldn’t track, calculate or modify gradients while updating the 
    #    weights and biases.
    # 2. We multiply the gradients with a really small number (10^-5 in this case), to ensure that we don’t modify the weights 
    #    by a really large amount, since we only want to take a small step in the downhill direction of the gradient. 
    #    This number is called the learning rate of the algorithm.
    # 3. After we have updated the weights, we reset the gradients back to zero, to avoid affecting any future computations.
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [24]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, outputs)
print(loss)

# Predictions
preds

tensor(507.3384, grad_fn=<DivBackward0>)


tensor([[ 55.9347,  80.4089],
        [ 79.0063, 104.2362],
        [127.9459, 108.6103],
        [ 15.0913,  95.6274],
        [ 99.5641,  91.0895]], grad_fn=<AddBackward0>)

In [25]:
# Targets
outputs

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])