# Mini - Batch Gradient Descent =>

In [15]:
from sklearn.datasets import load_diabetes

import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [17]:
X , y = load_diabetes(return_X_y = True)

In [19]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [23]:
X_train , X_test , y_train ,y_test = train_test_split(X , y , test_size = 0.2 , random_state = 2)

In [25]:
print(X_train.shape)
print(y_train.shape)

(353, 10)
(353,)


In [27]:
reg = LinearRegression()

In [29]:
reg.fit(X_train , y_train)

In [31]:
print(reg.coef_)
print(reg.intercept_)

[  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
151.88331005254167


In [33]:
y_pred = reg.predict(X_test)
y_pred

array([154.1213881 , 204.81835118, 124.93755353, 106.08950893,
       258.5348576 , 256.3310074 , 118.75087616, 119.52440696,
       101.50816735, 190.54048661, 141.70656811, 172.51883961,
       174.33861649, 134.80942706, 294.13994537,  94.11798038,
       211.97059795, 156.49579378, 134.21000428, 119.62664644,
       148.87842251, 165.00873409, 151.10021038, 176.04063756,
       133.27769647, 221.29555392, 197.17324941,  96.1577688 ,
        50.26012711, 230.48580317, 242.06073866, 114.11129218,
        67.07532417,  94.52943825, 201.21415375, 167.05136201,
       159.881268  , 192.78746659, 114.49551325, 233.48234551,
       140.82563045, 121.0680409 , 192.27480772, 191.12738845,
       179.16865788, 148.34935601, 163.47414622, 276.81647884,
       100.17926432, 164.10555298, 255.80762189, 136.9466204 ,
       152.37503699, 107.92237882, 194.21924678,  77.34670792,
       118.50482479,  68.38335763, 154.29258529, 162.48840259,
       168.36788326, 156.87790322,  97.14191797, 238.16

In [35]:
r2_score(y_test , y_pred)

0.4399338661568968

In [39]:
import random

random.sample(range(1,100),10)

[8, 78, 38, 45, 55, 92, 84, 75, 34, 14]

In [41]:
X_train[[5, 51, 18, 52, 48, 91, 97, 79, 90, 64]]

array([[ 0.03807591,  0.05068012, -0.00943939,  0.00235142,  0.00118295,
         0.03751653, -0.05444576,  0.05017634, -0.02595311,  0.10661708],
       [ 0.04897352,  0.05068012,  0.12313149,  0.0838437 , -0.10476542,
        -0.10089509, -0.06917231, -0.00259226,  0.03664373, -0.03007245],
       [ 0.02354575,  0.05068012, -0.0191397 ,  0.04941519, -0.06348684,
        -0.06112523,  0.00446045, -0.03949338, -0.02595311, -0.01350402],
       [-0.02367725, -0.04464164,  0.03043966, -0.00567042,  0.08236416,
         0.09200436, -0.01762938,  0.07120998,  0.03304307,  0.00306441],
       [-0.06000263,  0.05068012, -0.0105172 , -0.01486283, -0.04972731,
        -0.02354742, -0.0581274 ,  0.0158583 , -0.00991877, -0.03421455],
       [-0.01277963,  0.05068012, -0.05578531, -0.00222757, -0.02771206,
        -0.02918409,  0.019187  , -0.03949338, -0.01705628,  0.04448548],
       [ 0.04897352,  0.05068012,  0.07462995,  0.06662945, -0.00982468,
        -0.00225332, -0.04340085,  0.03430886

In [43]:
X_train[[5, 51, 18, 52, 48, 91, 97, 79, 90, 64]].shape

(10, 10)

In [45]:
class MBGDRegressor:

    def __init__(self , batch_size , learning_rate = 0.01 , epochs =100):

        self.coef_ = None 
        self.intercept_ = None 
        self.lr = learning_rate 
        self.epochs = epochs 
        self.batch_size = batch_size 

    def fit(self , X_train , y_train):
        # initialize your coefficients -------->
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])

        for i in range(self.epochs):
            
            for j in range(int(X_train.shape[0]/self.batch_size)):
                
                idx = random.sample(range(X_train.shape[0]),self.batch_size)
                y_hat = np.dot(X_train[idx] , self.coef_) + self.intercept_
                
                intercept_deriv = -2 * np.mean(y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - (self.lr * intercept_deriv)
                
                coef_deriv = -2 * np.dot((y_train[idx] - y_hat) , X_train[idx]) 
                self.coef_ = self.coef_ - (self.lr * coef_deriv)

        print(self.intercept_)
        print(self.coef_)     

    def predict(self , X_test):
        return np.dot(X_test , self.coef_) + self.intercept_

In [49]:
mbr = MBGDRegressor(batch_size = int(X_train.shape[0]/10), learning_rate = 0.01 , epochs = 50)

In [51]:
mbr.fit(X_train , y_train)

151.69209419974214
[  54.42550538  -71.15462267  345.92362637  250.27956494   25.55987436
  -25.16746047 -167.44544588  125.42947787  326.34058635  128.67075734]


In [55]:
y_pred = mbr.predict(X_test)

In [57]:
y_test

array([ 73., 233.,  97., 111., 277., 341.,  64.,  68.,  65., 178., 142.,
        77., 244., 115., 258.,  87., 220.,  86.,  74., 132., 136., 220.,
        91., 235., 148., 317., 131.,  84.,  65., 217., 306.,  79., 158.,
        54., 123., 174., 237., 212., 179., 281., 187., 200.,  68., 163.,
       141., 202., 178., 242.,  47., 131., 243., 142., 200.,  89., 232.,
        55., 253., 128., 104., 184., 110., 198.,  81., 195., 150.,  63.,
       151., 233., 178.,  84., 237., 109., 131., 252., 200., 160., 200.,
        51., 111.,  77., 201.,  88.,  78., 243., 268.,  55., 270., 288.,
        91.])

In [33]:
r2_score(y_test , y_pred)

0.4310391795499685

In [59]:
from sklearn.linear_model import SGDRegressor

In [83]:
sgd = SGDRegressor(learning_rate = 'constant' , eta0 = 0.01)

In [85]:
batch_size = 35 

for i in range(100):

    idx = random.sample(range(X_train.shape[0]) , batch_size)
    sgd.partial_fit(X_train[idx] , y_train[idx])

In [67]:
idx = random.sample(range(X_train.shape[0]) , batch_size)

idx

[83,
 136,
 205,
 342,
 249,
 71,
 331,
 79,
 5,
 259,
 87,
 170,
 289,
 182,
 119,
 338,
 242,
 208,
 272,
 142,
 290,
 120,
 57,
 138,
 296,
 164,
 160,
 137,
 213,
 308,
 176,
 7,
 251,
 207,
 27]

In [87]:
sgd.coef_

array([ 24.6102299 ,   1.71540273,  71.18465598,  51.96456835,
        20.27409836,  12.20177459, -41.87486168,  41.32334541,
        69.83302402,  39.05111856])

In [89]:
sgd.intercept_

array([151.65118854])

In [91]:
y_pred1 = sgd.predict(X_test)

In [97]:
r2_score(y_test , y_pred)

0.4320574093740355