In [1]:
## Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_diabetes

In [2]:
## Import dataset
X,Y=load_diabetes(return_X_y=True)

In [3]:
print(X.shape)
print(Y.shape)

(442, 10)
(442,)


In [4]:
## Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2, random_state=2)

In [5]:
## Implement Linear Regression
from sklearn.linear_model import LinearRegression
LR=LinearRegression()

## Fit
LR.fit(X_train,Y_train)

## Print coefficients and intercepts
print("Coefficients=",LR.coef_)
print("Intercept=",LR.intercept_)

## Prediction
Y_pred=LR.predict(X_test)

## Metrics
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
mae=mean_absolute_error(Y_test,Y_pred)
mse=mean_squared_error(Y_test,Y_pred)
rmse=np.sqrt(mse)
print("Mean Absolute Error=",mae)
print("Mean Squared Error=",mse)
print("Root Mean Squared Error=",rmse)

score=r2_score(Y_test,Y_pred)
print("R2 Score=",score)


Coefficients= [  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
Intercept= 151.88331005254167
Mean Absolute Error= 45.21303419046903
Mean Squared Error= 3094.4566715660626
Root Mean Squared Error= 55.627840795469155
R2 Score= 0.4399338661568968


Helper functions--> demo implementation while writing Mini Batch Gradient Descent code from scratch.

In [6]:
import random
## This function is used to generate 10 numbers between specified range randomly
random.sample(range(1,100),10)

[71, 55, 97, 65, 40, 4, 44, 76, 9, 18]

In [7]:
## We are getting 10 rows
X_train[[73, 68, 69, 51, 41, 47, 83, 86, 34, 27]]

array([[-0.02730979,  0.05068012, -0.02345095, -0.01599898,  0.01356652,
         0.0127778 ,  0.02655027, -0.00259226, -0.01090325, -0.02178823],
       [ 0.01264814,  0.05068012, -0.07195249, -0.04698463, -0.05110326,
        -0.09713731,  0.11859122, -0.0763945 , -0.02029232, -0.03835666],
       [ 0.04897352,  0.05068012, -0.03099563, -0.04929134,  0.0493413 ,
        -0.00413221,  0.13331777, -0.05351581,  0.02131129,  0.01963284],
       [ 0.04897352,  0.05068012,  0.12313149,  0.0838437 , -0.10476542,
        -0.10089509, -0.06917231, -0.00259226,  0.03664373, -0.03007245],
       [-0.00188202,  0.05068012, -0.02452876,  0.05285804,  0.02732605,
         0.03000097,  0.03023191, -0.00259226, -0.02139531,  0.03620126],
       [-0.08906294, -0.04464164, -0.01159501, -0.03665608,  0.01219057,
         0.02499059, -0.03603757,  0.03430886,  0.02268774, -0.00936191],
       [ 0.00175052,  0.05068012, -0.00512814, -0.01255612, -0.01532849,
        -0.01383982,  0.00814208, -0.03949338

In [8]:
demo_Y_hat=np.dot(X_train[[73, 68, 69, 51, 41, 47, 83, 86, 34, 27]],np.ones(X_train.shape[1]))+0
demo_Y_hat

array([ 0.00153126, -0.22030171,  0.18532183,  0.03577504,  0.17690001,
       -0.13317739, -0.09920573, -0.08737999,  0.42273183, -0.55162796])

In [9]:
Y_train[[73, 68, 69, 51, 41, 47, 83, 86, 34, 27]]-demo_Y_hat

array([ 70.99846874,  77.22030171, 101.81467817, 280.96422496,
        64.82309999, 206.13317739,  92.09920573,  94.08737999,
       185.57726817,  55.55162796])

In [10]:
## Mini Batch Gradient Descent 
class MBGDRegressor:
    
    ## Constructor
    def __init__(self,batch_size,epochs=10,learning_rate=0.01):
        self.coefficient_=None
        self.intercept_=None
        self.lr=learning_rate
        self.epochs=epochs
        self.batch_size=batch_size

    ## Fit Method
    def fit(self,X_train,Y_train):

        ## Initialize the coefficients and intercept
        self.intercept_=0
        self.coefficient_=np.ones(X_train.shape[1])

        ## Per epoch batch no of updates to be done where batch= len(X_train)/batch_size
        for i in range(self.epochs):
            for j in range(int(X_train.shape[0]) //self.batch_size):

                ## in this loop we need to pick batch_size no of rows randomly
                idx=random.sample(range(0,X_train.shape[0]),self.batch_size)

                ## Calculate y_hat only for these indexes
                y_hat=np.dot(X_train[idx],self.coefficient_)+ self.intercept_

                ## Intercept derivative
                intercept_der=-2 * np.mean(Y_train[idx]-y_hat)

                ##Update Intercept
                self.intercept_=self.intercept_- (self.lr * intercept_der)

                ## Coefficient derivative
                coef_der=-2 * np.dot((Y_train[idx]-y_hat),X_train[idx])

                ## Update Coefficient
                self.coefficient_=self.coefficient_ - (self.lr * coef_der)

        print("Coefficients=",self.coefficient_)
        print("Intercept=",self.intercept_)

    def predict(self,X_test):
        return (np.dot(X_test,self.coefficient_)+self.intercept_)


In [11]:
## Create object of class
mgbd=MBGDRegressor(batch_size=int(X_train.shape[0]/10),epochs=100,learning_rate=0.01)

In [12]:
## Fit 
mgbd.fit(X_train,Y_train)

Coefficients= [  28.35517684 -139.44037413  447.45171237  293.73738985  -18.4971381
  -86.25996493 -192.05144869  114.89094033  413.11106199  110.99457108]
Intercept= 150.9176885862555


In [13]:
## Predict
Y_pred=mgbd.predict(X_test)

In [14]:
## Score
print("Score=",r2_score(Y_test,Y_pred))

Score= 0.45315830378080424


Compare the result of coefficients and intercepts with linear regression outputs

Using Scikit Learn 

In [27]:
from sklearn.linear_model import SGDRegressor
sgd=SGDRegressor(learning_rate='constant',eta0=0.3)

In [28]:
## We dont have direct method of mini batch gradient descent
batch_size=35

## Considering epochs=100
for i in range(100):
    idx=random.sample(range(X_train.shape[0]),batch_size)

    ## Partial fit is used to train data on subset of entire data
    sgd.partial_fit(X_train[idx],Y_train[idx])

In [29]:
## Coefficients
sgd.coef_

array([   2.92059056, -172.25133457,  507.28875905,  296.73674006,
        -66.8030559 , -116.34930118, -234.42659927,  122.1965356 ,
        414.75680434,   70.01568833])

In [30]:
##Intercept
sgd.intercept_

array([165.22649579])

In [31]:
## Predition
Y_pred=sgd.predict(X_test)

In [32]:
## Score
print("Score=",r2_score(Y_test,Y_pred))

Score= 0.4312663909897759
