In [1]:
from sklearn.datasets import load_diabetes
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression

In [3]:
X,y = load_diabetes(return_X_y = True)

In [4]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 2)

In [33]:
print("X_train shape: ", X_train.shape)
print("N =  ", X_train.shape[0])

X_train shape:  (353, 10)
N =   353


In [7]:
linreg = LinearRegression()
linreg.fit(X_train,y_train)

## Using Sklearn

In [8]:
print("Coefficients: ",linreg.coef_)
print()
print("Intercepts: ", linreg.intercept_)

Coefficients:  [  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]

Intercepts:  151.88331005254167


In [9]:
y_pred = linreg.predict(X_test)
r2_score(y_test,y_pred)

0.4399338661568968

## Batch Gradient Descent

In [26]:
"""
1. Random Values: beta_0 = 0 and beta_1, beta_2,.... beta_n = 1
   Here, beta_0 = intercept, beta_1, beta_2, .... beta_n = coefficients
   Fix the values of Epoch and the Learning Rate
2. Update the value of coefficients:
   example: (beta_0)new = (bets_0)old - learning_rate*(loss_slope)
"""
class BatchGradientDescentRegressor:
  def __init__(self, learning_rate = 0.1, epochs = 1000):
    self.coefficient = None
    self.intercept = None
    self.learning_rate = learning_rate
    self.epochs = epochs
  def fit(self, X_train, y_train):
    # Step 1:  Initialise your coefficients = 1 and Intercept = 0
    self.intercept = 0
    self.coefficient = np.ones(X_train.shape[1])
    for i in range(self.epochs):
      # Step 2: Updating the values of Intercepts and the Coefficients
      # 2.1 Update the Intercepts
      y_hat = np.dot(X_train, self.coefficient) + self.intercept
      # print("Y-Hat: ", y_hat.shape)
      loss_slope_with_respect_to_intercept = -2 * (np.mean(y_train - y_hat))
      self.intercept = self.intercept - self.learning_rate * loss_slope_with_respect_to_intercept
      # 2.2 Update the Coefficients
      loss_slope_with_respect_to_coefficients = -2 * np.dot((y_train-y_hat),X_train)/X_train.shape[0]
      self.coefficient = self.coefficient - self.learning_rate * loss_slope_with_respect_to_coefficients

    print("Intercept: ", self.intercept)
    print("Coefficient: ", self.coefficient)

  def predict(self, X_test):
    return np.dot(X_test, self.coefficient) + self.intercept

In [27]:
batchgd = BatchGradientDescentRegressor()
batchgd.fit(X_train, y_train)

Intercept:  151.94042847773682
Coefficient:  [  62.27835432  -24.14017912  262.40285385  192.20751489   39.48809013
   10.26886323 -142.50597903  124.33312557  244.33510843  119.34350233]


In [28]:
y_pred = batchgd.predict(X_test)
r2_score(y_test, y_pred)

0.3971698388048742