In [34]:
from sklearn.datasets import load_diabetes
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [35]:
X,y = load_diabetes(return_X_y=True)

In [36]:
print("Shape of X: ", X.shape)
print("Shape of y: ", y.shape)
print("Number of Rows: ", X.shape[0])
print("Number of Columns: ", X.shape[1])

Shape of X:  (442, 10)
Shape of y:  (442,)
Number of Rows:  442
Number of Columns:  10


In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 2)

In [38]:
print("Number of Rows: ", X_train.shape[0])
print("Number of Columns or Coefficients: ", X_train.shape[1])

Number of Rows:  353
Number of Columns or Coefficients:  10


In [39]:
linreg = LinearRegression()
linreg.fit(X_train, y_train)

In [40]:
print("Coefficients: ", linreg.coef_)
print()
print("Intercept: ", linreg.intercept_)

Coefficients:  [  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]

Intercept:  151.88331005254167


In [41]:
y_pred = linreg.predict(X_test)
r2score = r2_score(y_test, y_pred)
print("R2 Score: ", r2score)

R2 Score:  0.4399338661568968


## Custom SGD

In [42]:
class StochasticGradientDescent:
  def __init__(self, learning_rate, epochs):
    self.coefficient = None
    self.intercept = None
    self.learning_rate = learning_rate
    self.epochs = epochs
  def fit(self, X_train, y_train):
    self.intercept = 0
    self.coefficient = np.ones(X_train.shape[1])
    for i in range(self.epochs):
      for j in range(X_train.shape[0]):
        idx = np.random.randint(0, X_train.shape[0])
        y_hat = np.dot(X_train[idx], self.coefficient) + self.intercept
        loss = y_train[idx] - y_hat
        loss_slope_with_respect_to_intercept = -2 * loss
        loss_slope_with_respect_to_coefficient = -2 * np.dot(loss, X_train[idx])
        self.intercept = self.intercept - self.learning_rate * loss_slope_with_respect_to_intercept
        self.coefficient = self.coefficient - self.learning_rate * loss_slope_with_respect_to_coefficient
    print("Coefficients: ", self.coefficient)
    print("Intercept: ", self.intercept)
  def predict(self, X_test):
    return np.dot(X_test, self.coefficient) + self.intercept

In [43]:
sgd = StochasticGradientDescent(learning_rate = 0.01, epochs = 50)
sgd.fit(X_train, y_train)

Coefficients:  [  56.67206841  -59.46964105  341.45506723  249.10031855   16.85084957
  -30.57932126 -171.85416179  129.9680791   321.7658241   132.81126311]
Intercept:  156.07537784857266


In [44]:
y_pred = sgd.predict(X_test)
r2score = r2_score(y_test, y_pred)
print("R2 Score: ", r2score)

R2 Score:  0.4284768411432237
