In [2]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

In [3]:
X,y = load_diabetes(return_X_y=True)

In [4]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 2)

Stochastic Gradient Descent (SGD) is an optimization algorithm widely used in machine learning to minimize loss functions, especially when working with large datasets. Unlike batch gradient descent, which updates model parameters after calculating the gradient across the entire dataset, SGD updates the parameters using only a single randomly selected data point at each step. This approach makes each update faster and allows the algorithm to handle massive datasets efficiently. However, the updates are noisier, resulting in a more erratic path toward the minimum, but this randomness can help the algorithm escape local minima and potentially find better solutions. SGD is particularly useful for online learning and real-time applications, where data arrives continuously and models need frequent updates.

In [9]:
# Creating a custom class for Linear Regression using Stochastic Gradient Descent
class SGDRegressor:

    # Constructor to initialize learning rate and number of training epochs
    def __init__(self, learning_rate=0.01, epochs=100):
        self.coef_ = None        # This will hold the feature coefficients (weights)
        self.intercept_ = None   # This will hold the bias/intercept
        self.lr = learning_rate  # Learning rate for gradient descent
        self.epochs = epochs     # Number of iterations for training

    # Fit method: used to train the model on given data
    def fit(self, X_train, y_train):
        # Initialize the intercept to 0
        self.intercept_ = 0

        # Initialize coefficients to 1 for all features (same shape as number of columns in X)
        self.coef_ = np.ones(X_train.shape[1])

        # Loop through all epochs to update coefficients and intercept
        for i in range(self.epochs):
          for j in range(X_train.shape[0]):
            idx = np.random.randint(0,X_train.shape[0])

            y_hat = np.dot(X_train[idx],self.coef_) + self.intercept_
            intercept_der = -2 * (y_train[idx] - y_hat)
            self.intercept_  = self.intercept_ - (self.lr * intercept_der)

            coef_der = -2 * np.dot((y_train[idx] - y_hat) , X_train[idx])
            self.coef_ = self.coef_ - (self.lr * coef_der)

    # Predict method: used to predict outputs for test data
    def predict(self, X_test):
        # Prediction is the dot product of X and learned coefficients plus the intercept
        return np.dot(X_test, self.coef_) + self.intercept_

In [10]:
gdr = SGDRegressor(epochs=100, learning_rate=0.01)

In [11]:
gdr.fit(X_train, y_train)

In [12]:
y_pred = gdr.predict(X_test)