# Batch->Group Of Rows

## Mini Batch Gradient Descent is Combination of Batch Gardient descent and Stochastic Gradient Descent

In [1]:
from sklearn.datasets import load_diabetes
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [2]:
X,y=load_diabetes(return_X_y=True)

In [3]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [4]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=2)

In [5]:
reg=LinearRegression()
reg.fit(X_train,y_train)

In [6]:
print(reg.coef_)
print(reg.intercept_)

[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
151.88331005254167


In [7]:
y_predict=reg.predict(X_test)

In [8]:
r2_score(y_test,y_predict)

0.4399338661568969

# <hr><br>## Coding From Scratch

In [12]:
import random
class MBGDregresor:
    
    def __init__(self,batch_size,learning_rate=0.1,epochs=100):
        
        self.coef_=None
        self.intercept_=None
        self.lr=learning_rate
        self.epochs=epochs
        self.batch_size=batch_size
        
    def fit(self,X_train,y_train):
        
        self.intercept_=0
        self.coef_=np.ones(X_train.shape[1])
        
        for i in range(self.epochs):
            for j in range(int(X_train.shape[0]/self.batch_size)):
                
                #choosing a random group of Rows 
                idx=random.sample(range(X_train.shape[0]),self.batch_size)
                
                y_hat=np.dot(X_train[idx],self.coef_)+self.intercept_
                
                intercept_der = -2 * np.mean(y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - (self.lr * intercept_der)

                coef_der = -2 * np.dot((y_train[idx] - y_hat),X_train[idx])
                self.coef_ = self.coef_ - (self.lr * coef_der)
                
        print(self.coef_,self.intercept_)
        
        
    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [15]:
mbgd=MBGDregresor(batch_size=int(X_train.shape[0]/50),learning_rate=0.01,epochs=100)

In [16]:
mbgd.fit(X_train,y_train)

[  27.83856733 -140.21798865  451.49556353  303.81151184  -22.66111184
  -94.07019483 -186.66425796  106.11032629  410.31379626  107.58080843] 152.65030700686881


In [17]:
y_pred=mbgd.predict(X_test)

In [18]:
r2_score(y_test,y_pred)

0.453045165131023

## From the above you can see that we can acheive the required accuracy just by tuning the parameters and changing the batch size for algorithm , Our created class is working same as Sklearn class , and it is possible to acheive more accuracy.

# <hr>

# Implementation Using Sklearn

In [20]:
from sklearn.linear_model import SGDRegressor

In [22]:
sgd=SGDRegressor(learning_rate='constant',eta0=0.3)

In [23]:
batch_size=50

for i in range (100):
    
    idx=random.sample(range(X_train.shape[0]),batch_size)
    sgd.partial_fit(X_train[idx],y_train[idx])

## Since The Sklearn Doesn't have any Class for Mini batch Gradient Descent we implemented it in SGD regressor class of Sklearn using partial_fit() method of SGD regressor

In [24]:
sgd.coef_

array([  14.51426742, -180.0942287 ,  534.59781413,  321.43038045,
        -47.152408  , -137.09756666, -175.62364841,   70.54887472,
        485.04869284,   89.44855399])

In [25]:
sgd.intercept_

array([171.60227459])

In [26]:
y_pred=sgd.predict(X_test)

In [27]:
r2_score(y_test,y_predict)

0.4399338661568969

<hr>

# Graph

![mini_batch_contour_plot.gif](attachment:mini_batch_contour_plot.gif)

### From the above graph you can notice that behaviour of Mini batch graph is somewhere lies between the behaviours of Stochastic Gradient Descent and Batch Gradient Descent , it is not completely in consistent or disorted neither to constant

## To gain stability at the Global Minima (solution) we can use the concept of Learning_Schedule to change the learning rate of mini batch with respect to Epochs such that learning rate will reduce as it will reach closer towards the solution

# Conclude