In [1]:
import numpy as np
from sklearn.datasets import load_diabetes 

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score



In [3]:
X,y = load_diabetes(return_X_y = True)

In [4]:
X_train, X_test ,y_train , y_test = train_test_split(X,y, test_size=0.2 ,random_state=42)

In [5]:
lr = LinearRegression()

In [8]:
lr.fit(X_train,y_train)

In [9]:
y_pred = lr.predict(X_test)

In [10]:
r2_score(y_test, y_pred)

0.4526027629719197

In [11]:
lr.coef_

array([  37.90402135, -241.96436231,  542.42875852,  347.70384391,
       -931.48884588,  518.06227698,  163.41998299,  275.31790158,
        736.1988589 ,   48.67065743])

In [12]:
lr.intercept_

151.34560453985995

## Creating class for MiniBatch GD

In [28]:
import random
class MBGDRegression:
    def __init__(self,batch_size, learning_rate=0.1, epochs=100):
        self.coef_ = None
        self.intercept_ = None
        self.epochs = epochs
        self.l_rate = learning_rate
        self.batch_size = batch_size

    def fit(self, X,y):
        #initilizing the b,m
        self.intercept_ = 0  # b0 || b
        self.coef_ = np.ones(X_train.shape[1])  # b1,b2,b2... || m1,m2,m3...

        for i in range(self.epochs):
            for j in range(int(X_train.shape[0]/self.batch_size)):   #this loop will run for batch_size times
                idx = random.sample(range(X_train.shape[0]),10)  #it will create values from 0-num and there will be 10 values 
                #this methode is known as vectorization
                y_hat = np.dot(X_train[idx], self.coef_) + self.intercept_
                #calculating intercept
                intercept_derivative = -2 * np.mean(y_train[idx] - y_hat)
                self.intercept_  = self.intercept_ - (self.l_rate * intercept_derivative)
                #calculating coeficent
                coef_deriv = -2 * np.dot((y_train[idx] - y_hat), X_train[idx])
                self.coef_ = self.coef_ - (self.l_rate * coef_deriv)
            

        print(self.intercept_, self.coef_)

    def predict(self,X):
        return np.dot(X,self.coef_) + self.intercept_  # y_pred = mx + b


In [37]:
mbgdr = MBGDRegression(batch_size = int(X_train.shape[0]/10), learning_rate=0.1, epochs=100)

In [38]:
mbgdr.fit(X_train,y_train)

146.40309852407142 [  34.37206791 -207.97450074  574.85670353  334.36809272  -82.62765897
 -116.93096652 -198.43620301  164.17946902  434.53801019  102.03211833]


In [39]:
y_pred = mbgdr.predict(X_test)

In [40]:
r2_score(y_test,y_pred)

0.44523716983583694

### BatchGD using StochasticGD sklearn


In [41]:
from sklearn.linear_model import SGDRegressor

In [65]:
sgdr = SGDRegressor(max_iter=100, learning_rate='constant', eta0=1)

In [80]:
import random

mini_batch = 20
for i in range(300):
    idx = random.sample(range(X_train.shape[0]),mini_batch)

    sgdr.partial_fit(X_train[idx],y_train[idx])

In [81]:
y_pred =sgdr.predict(X_test)

In [82]:
r2_score(y_test,y_pred)

0.4357910476763729