Sources:
https://donaldpinckney.com/books/pytorch/book/ch2-linreg/2018-03-21-multi-variable.html
https://www.kaggle.com/code/joseguzman/multiple-regression-explained-with-pytorch

##### Importing basic libraries

In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
import random
random.seed(123)


##### Importing PyTorch libraries

In [11]:
import torch
import torch.optim as optim

##### Importing the data

In [12]:
ad_data = pd.read_csv("../../DATA/Advertising.csv")
ad_data = ad_data.drop('CompID', axis = 1)
ad_data.head()


Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [13]:

ad_data = ad_data.values
ad_train, ad_test = train_test_split(ad_data, test_size= 0.2)

In [14]:
D_train = torch.tensor(ad_train, dtype=torch.float)
D_test = torch.tensor(ad_test, dtype=torch.float)

In [15]:
x_train = D_train[:,0:2].t()
y_train = D_train[:,3].t()

x_test = D_test[:,0:2].t()
y_test = D_test[:,3].t()


In [16]:
n = 2

In [17]:
A = torch.randn((1, n), requires_grad=True)
b = torch.randn(1, requires_grad=True)

In [18]:
def model(x_input):
    return A.mm(x_input) + b

In [19]:
length = len(ad_data)
predictors = 3

def loss(y_predicted, y_target):
    return ((y_predicted - y_target)**2).sum()


In [20]:
optimizer = optim.Adam([A, b], lr=0.1)

In [21]:
for t in range(30):
    # Set the gradients to 0.
    optimizer.zero_grad()
    # Compute the current predicted y's from x_dataset
    y_train_predicted = model(x_train)
    # See how far off the prediction is
    current_loss = loss(y_train_predicted, y_train)
    RSE_loss = math.sqrt(current_loss/(length - predictors - 1))
    # Compute the gradient of the loss with respect to A and b.
    current_loss.backward()
    # Update A and b accordingly.
    optimizer.step()
    print(f"t = {t}, loss = {RSE_loss}, A = {A.detach().numpy()}, b = {b.item()}")

t = 0, loss = 298.6878988802526, A = [[-1.707698   -0.08027472]], b = 0.5740096569061279
t = 1, loss = 281.0637716782439, A = [[-1.6079022   0.01950449]], b = 0.6737990975379944
t = 2, loss = 263.48268444055293, A = [[-1.508466    0.11889311]], b = 0.7732170224189758
t = 3, loss = 245.97370286272474, A = [[-1.4095608   0.21770275]], b = 0.8720857501029968
t = 4, loss = 228.56901993708595, A = [[-1.3113742   0.31572512]], b = 0.9702097177505493
t = 5, loss = 211.30428296653147, A = [[-1.2141112   0.41273096]], b = 1.0673748254776
t = 6, loss = 194.2188379927395, A = [[-1.1179947  0.5084691]], b = 1.1633474826812744
t = 7, loss = 177.35609263216756, A = [[-1.0232655   0.60266584]], b = 1.257874608039856
t = 8, loss = 160.76394768417452, A = [[-0.93018216  0.6950251 ]], b = 1.350683569908142
t = 9, loss = 144.4951407595598, A = [[-0.83902043  0.7852287 ]], b = 1.4414830207824707
t = 10, loss = 128.60812875261811, A = [[-0.7500715   0.87293845]], b = 1.5299639701843262
t = 11, loss = 113.1

In [22]:
y_pred_test = model(x_test)
y_test

tensor([10.4000, 11.0000, 16.0000, 11.7000, 10.9000, 19.4000,  4.8000,  9.9000,
        17.3000, 11.4000, 14.7000, 22.4000, 15.9000,  5.7000, 10.5000, 20.2000,
        12.5000, 18.0000, 13.2000, 15.2000, 10.1000,  9.6000,  5.6000, 14.7000,
         8.7000, 24.7000, 21.8000, 16.7000,  3.2000, 19.7000, 14.1000,  9.7000,
         9.6000, 11.3000, 12.2000, 10.6000, 11.8000,  8.8000, 21.4000, 13.2000])