In [1]:
from sklearn.datasets import load_diabetes

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [2]:
X,y = load_diabetes(return_X_y=True)

print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [3]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [4]:
reg = LinearRegression()
reg.fit(X_train,y_train)

In [5]:
print(reg.coef_)
print(reg.intercept_)

[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
151.88331005254167


In [22]:
class batch_gradient_descent:

  def __init__(self,learning_rate= 0.01,epochs=100):

    self.coeff_ = None
    self.intercept_ = None
    self.learning_rate =learning_rate
    self.epochs =epochs

  def fit(self, X_train, y_train):

    self.intercept_ = 0   #initialising the betas -> intercept and the coeff
    self.coeff_ =np.ones(X_train.shape[1])

    for i in range(self.epochs):

      y_hat = np.dot(X_train,self.coeff_) + self.intercept_

      intercept_der = -2 *np.mean(y_train - y_hat)
      self.intercept_ = self.intercept_ - self.learning_rate * intercept_der

      coeff_der =-2* np.dot((y_train -y_hat),X_train)/X_train.shape[0] # vectorisation to speed up the code
#consistently moves to the minima of the curve with each update (epoch)
#one issue for large datasets->has to load the complete X_train  in memory for calculating y_hat,
#possible vectorisation might not be performed
      self.coeff_ =self.coeff_ -self.learning_rate * coeff_der

  def predict(self,X_test):
    return np.dot(X_test,self.coeff_) + self.intercept_



In [23]:
bgdr =batch_gradient_descent()
bgdr.fit( X_train, y_train)

print(bgdr.coeff_, bgdr.intercept_)

[ 2.63772241  1.19060622  5.06046267  4.31375004  2.44715577  1.99246132
 -1.48838841  3.60218122  5.24193975  3.51394813] 130.52896930987225


In [44]:
bgdr =batch_gradient_descent(epochs=20,learning_rate=0.1)
bgdr.fit( X_train, y_train)
y_pred = bgdr.predict(X_test)
r2_score(y_test,y_pred)

0.009992790420697673

In [48]:
bgdr =batch_gradient_descent(epochs=20,learning_rate=0.5)
bgdr.fit( X_train, y_train)
y_pred = bgdr.predict(X_test)
r2_score(y_test,y_pred)

0.10403699428543622

In [40]:
bgdr =batch_gradient_descent(epochs=1000,learning_rate=0.5)
bgdr.fit( X_train, y_train)

print(bgdr.coeff_, bgdr.intercept_)

[  14.38990585 -173.7235727   491.54898524  323.91524824  -39.32648042
 -116.01061213 -194.04077415  103.38135565  451.63448787   97.57218278] 152.01351687661833


In [41]:
y_pred = bgdr.predict(X_test)
r2_score(y_test,y_pred)

0.4534503034722803