In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression

In [119]:
X, y = fetch_california_housing(return_X_y=True)

In [86]:
X

array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
          37.88      , -122.23      ],
       [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
          37.86      , -122.22      ],
       [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
          37.85      , -122.24      ],
       ...,
       [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
          39.43      , -121.22      ],
       [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
          39.43      , -121.32      ],
       [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
          39.37      , -121.24      ]])

In [87]:
y

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [88]:
X.shape

(20640, 8)

In [89]:
y.shape

(20640,)

In [120]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=2, test_size=0.2)

X_train.shape, X_test.shape

((16512, 8), (4128, 8))

In [121]:
reg = LinearRegression()
reg.fit(X_train, y_train)

In [122]:
y_pred = reg.predict(X_test)

In [123]:
y_pred.shape

(4128,)

In [124]:
y_pred

array([2.80991017, 1.25276895, 1.44720173, ..., 1.57213375, 1.43077765,
       2.60233972])

In [125]:
r2_score(y_test, y_pred)

0.6013853272055167

After implementing batch gradient descent on the specified dataset directly, I came across the problem during the training process. The problem arised during gradient estimation as the values were exploding to extremely large numbers (even inf and nan), which indicated that the algorithm is diverging rather than converging. It was due to the unscaled data that I had in my dataset. So I applied Scaling on my dataset.

In [164]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Implementing Batch Gradient Descent

In [177]:
class BGDRegressor:
  def __init__(self, learning_rate=0.001, epochs=500):
    self.lr = learning_rate
    self.epochs = epochs
    self.intercept_ = None
    self.coef_ = None

  def fit(self, X_train, y_train):
    # Initializing intercept and coefficients
    self.intercept_ = 0
    self.coef_ = np.ones(X_train.shape[1])

    for i in range(self.epochs):
      # Now, we will first update intercept
      y_hat = np.dot(X_train, self.coef_) + self.intercept_
      intercept_der = -2 * np.mean(y_train - y_hat)
      self.intercept_ = self.intercept_ - (self.lr * intercept_der)

      # Next we will update coefficients
      coef_der = -2 * np.dot((y_train - y_hat), X_train) / X_train.shape[0]
      self.coef_ = self.coef_ - (self.lr * coef_der)
    print(self.intercept_, self.coef_)

  def predict(self, X_test):
      return np.dot(X_test, self.coef_) + self.intercept_

In [181]:
bgd = BGDRegressor(learning_rate=0.1, epochs=500)
bgd.fit(X_train, y_train)

2.0650925145346517 [ 0.8393851   0.11592215 -0.26684722  0.28471035 -0.00751026 -0.04504052
 -0.86982888 -0.84027834]


In [182]:
y_pred = bgd.predict(X_test)

In [183]:
r2_score(y_test, y_pred)

0.6011097195801407

The r2_score of both the Sklearn implementation and gradient descent implementation are similar, indicating that we can get similar results with gradient descent effectively and efficiently.