In [1]:
from sklearn.datasets import load_diabetes
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [2]:
X,y = load_diabetes(return_X_y = True)

In [3]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=2)
print(X_train.shape[0])
print(X_train.shape[1])

353
10


In [5]:
linreg = LinearRegression()
linreg.fit(X_train, y_train)
print("Coefficients: ", linreg.coef_)
print()
print("Intercept: ",linreg.intercept_)

Coefficients:  [  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]

Intercept:  151.88331005254167


In [6]:
y_pred = linreg.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))

R2 Score:  0.4399338661568968


# Custom Mini Batch Gradient Regressor

In [7]:
import random
random.sample(range(1, 100), 10)

[24, 50, 94, 33, 46, 81, 74, 72, 79, 97]

In [8]:
# No of batches = Total Number of Rows / Batch Size = X_train.shape[0]/batch_size
class MiniBatchGradientRegression:
  def __init__(self, learning_rate, epochs, batch_size):
    self.coefficient = None
    self.intercept = None
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.batch_size = batch_size
  def fit(self, X_train, y_train):
    self.intercept = 0
    self.coefficient = np.ones(X_train.shape[1])
    for i in range(self.epochs):
      for j in range(int(X_train.shape[0]/self.batch_size)):
        idx = random.sample(range(X_train.shape[0]), self.batch_size)
        y_hat = np.dot(X_train[idx], self.coefficient) + self.intercept
        loss_slope_with_respect_to_intercept = -2 * np.mean(y_train[idx] - y_hat)
        self.intercept = self.intercept - self.learning_rate * loss_slope_with_respect_to_intercept
        loss_slope_with_respect_to_coefficient = -2 * np.dot((y_train[idx] - y_hat), X_train[idx])
        self.coefficient = self.coefficient - self.learning_rate * loss_slope_with_respect_to_coefficient
    print("Intercept: ", self.intercept)
    print("Coefficients: ", self.coefficient)
  def predict(self, X_test):
    y_pred = np.dot(X_test, self.coefficient) + self.intercept
    return y_pred

In [9]:
mbgd = MiniBatchGradientRegression(learning_rate = 0.01, epochs = 50, batch_size = int(X_train.shape[0]/10))
mbgd.fit(X_train, y_train)

Intercept:  149.30167300160383
Coefficients:  [  59.65032189  -66.4497961   345.56217987  248.87462291   22.60648971
  -25.65345849 -171.76767094  129.01618313  322.48717345  143.91119415]


In [10]:
y_pred = mbgd.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))

R2 Score:  0.43026342977225884


# Sklearn Mini Batch Gradient Descent

In [11]:
from sklearn.linear_model import SGDRegressor
sgd = SGDRegressor(learning_rate = 'constant', eta0= 0.2)
batch_size = 35
for i in range(100):
  idx = random.sample(range(X_train.shape[0]),batch_size)
  sgd.partial_fit(X_train[idx], y_train[idx])

In [12]:
print("Coefficients: ", sgd.coef_)
print()
print("Intercept: ",sgd.intercept_)

Coefficients:  [  31.10190442 -135.28678907  462.30519223  298.88472825  -31.90616642
  -94.25544288 -196.91246469  104.09202823  407.89661811  100.33325496]

Intercept:  [130.26026968]


In [13]:
y_pred = sgd.predict(X_test)
print("R2 Score: ",r2_score(y_test,y_pred))

R2 Score:  0.36461282787041804
