In [23]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import random

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [2]:
X,y = load_diabetes(return_X_y=True)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=40)

In [5]:
X_train.shape

(353, 10)

In [6]:
X_test.shape

(89, 10)

In [7]:
y_train.shape

(353,)

In [8]:
y_test.shape

(89,)

## Training the data wit Sklearn library first

In [9]:
lgr = LinearRegression()

lgr.fit(X_train,y_train)

In [10]:
y_pred = lgr.predict(X_test)

In [12]:
r2_score(y_test,y_pred)

0.35116056151211594

## Training the data with my own Mini Batch Gradient Descent Algorithm

In [68]:
class MiniBatchGD:
    def __init__(self,epochs,learning_rate,batch_size):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.coefficient = None
        self.intercept = None
    
    def fit(self,X_train,y_train):
        
        self.intercept = 0
        self.coefficient = np.ones(X_train.shape[1])
        
        for i in range(self.epochs):
            
            total_number_of_batch = X_train.shape[0] / self.batch_size
            
            for j in range(int(total_number_of_batch)):
                
                #Taking batch_size random rows out of all the rows of X_train
                random_rows = random.sample(range(X_train.shape[0]), self.batch_size)
                
                y_hat = np.dot(X_train[random_rows], self.coefficient) + self.intercept
                
                intercept_derivative = (-2 ) * np.mean(y_train[random_rows] - y_hat)
                self.intercept = self.intercept - self.learning_rate * intercept_derivative
                
                coefficient_derivative = (-2) * (np.dot((y_train[random_rows] - y_hat), X_train[random_rows]))
                self.coefficient = self.coefficient - self.learning_rate * coefficient_derivative
                
        return self.intercept, self.coefficient
                
    
    def predict(self,X_test):
        return np.dot(X_test,self.coefficient) + self.intercept
                      

In [69]:
own_mbgd = MiniBatchGD(100,0.02,35)

In [70]:
own_mbgd.fit(X_train,y_train)

(151.26763965017986,
 array([  28.27017074, -217.93121881,  530.74807915,  305.73900801,
         -88.00627067, -161.06170995, -225.78807609,  119.75245094,
         477.94750494,   57.61677469]))

In [71]:
y_pred = own_mbgd.predict(X_test)

In [72]:
y_test.shape

(89,)

In [73]:
y_pred.shape

(89,)

In [74]:
r2_score(y_test,y_pred)

0.3716424940332407