<a href="https://colab.research.google.com/github/danielpy108/MachineLearningAlgorithms/blob/master/00-LinearRegression-Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Regression problem

In [0]:
from sklearn.datasets import load_diabetes

In [3]:
training_data = load_diabetes()

X = training_data['data']
y = training_data['target']
features = training_data['feature_names']
description = training_data['DESCR']

print(f'Data samples: {X.shape[0]}')
print(f'Input features (dimenssion): {X.shape[1]}')
print(f"Features: {features}")
print(description)

Data samples: 442
Input features (dimenssion): 10
Features: ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - Age
      - Sex
      - Body mass index
      - Average blood pressure
      - S1
      - S2
      - S3
      - S4
      - S5
      - S6

Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of 

In [4]:
# Turn the data into a pandas DataFrame
df = pd.DataFrame(np.concatenate((X, y.reshape(-1, 1)), axis=1), columns=features+['Output'])
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,Output
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


In [0]:
# We need to separate the dataset into training and test samples
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, train_size=0.2)

In [0]:
# Now, the data will be turned into a pytorch tensor for the linear
# regression model
X_training_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_training_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

## Creating the linear model

In [0]:
class LinearRegression():
    def __init__(self, nsamples, nfeatures):
        self.W = torch.randn(nfeatures, requires_grad=True)
        self.b = torch.randn(1, requires_grad=True)
    
    def pred(self, x):
        return x@self.W + self.b
    
    def loss(self, y, y_pred):
        squared_error = (y - y_pred).pow(2)
        N = torch.numel(squared_error)
        return 1/(2*N) * torch.sum(squared_error)
    
    def fit(self, X, y, lr, epochs):
        Loss = []
        for e in range(epochs):
            y_pred = self.pred(X)
            loss = self.loss(y, y_pred)
            Loss.append(loss.item())
            loss.backward()
            with torch.no_grad():
                self.W -=  lr*self.W.grad
                self.b -= lr*self.b.grad
                self.W.grad.zero_()
                self.b.grad.zero_()
            print(f"Epoch: {e} - loss: {loss}")
        return Loss

In [0]:
torch.manual_seed(1234)

nsamples = X_training_tensor.shape[0]
nfeatures = X_training_tensor.shape[1]

lr = LinearRegression(nsamples, nfeatures)

In [22]:
Loss = lr.fit(X_training_tensor, y_training_tensor, lr=0.1, epochs=1000)

Epoch: 0 - loss: 2166.2802734375
Epoch: 1 - loss: 2165.0078125
Epoch: 2 - loss: 2163.7373046875
Epoch: 3 - loss: 2162.468017578125
Epoch: 4 - loss: 2161.201171875
Epoch: 5 - loss: 2159.935546875
Epoch: 6 - loss: 2158.672119140625
Epoch: 7 - loss: 2157.41015625
Epoch: 8 - loss: 2156.15087890625
Epoch: 9 - loss: 2154.893798828125
Epoch: 10 - loss: 2153.6376953125
Epoch: 11 - loss: 2152.384033203125
Epoch: 12 - loss: 2151.132080078125
Epoch: 13 - loss: 2149.88232421875
Epoch: 14 - loss: 2148.634033203125
Epoch: 15 - loss: 2147.3876953125
Epoch: 16 - loss: 2146.14306640625
Epoch: 17 - loss: 2144.900390625
Epoch: 18 - loss: 2143.659912109375
Epoch: 19 - loss: 2142.421142578125
Epoch: 20 - loss: 2141.184326171875
Epoch: 21 - loss: 2139.948974609375
Epoch: 22 - loss: 2138.715576171875
Epoch: 23 - loss: 2137.484375
Epoch: 24 - loss: 2136.25439453125
Epoch: 25 - loss: 2135.026611328125
Epoch: 26 - loss: 2133.80126953125
Epoch: 27 - loss: 2132.577392578125
Epoch: 28 - loss: 2131.35546875
Epoch: 

In [23]:
# Plotting the loss

figure = go.Figure(
    data = go.Scatter(
        x=Loss
    )
)

figure.show()

In [27]:
y_pred_test_tensor = lr.pred(X_test_tensor)
Loss_pred = lr.loss(y_test_tensor, y_pred_tensor)
Loss_pred
# figure = go.Figure(
#     data = go.Scatter(
#         x=Loss_pred
#     )
# )

# figure.show()

tensor(1869.9482, grad_fn=<MulBackward0>)

# Classification problem