In [14]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

In [15]:
X,y = load_diabetes(return_X_y=True)

In [16]:
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [17]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 2)

Batch Gradient Descent
Batch Gradient Descent is an optimization algorithm used to minimize the error (or loss) in machine learning models, particularly in regression problems. It works by updating the model parameters (like weights and bias) in the direction that reduces the loss function the most.


Concept:
In batch gradient descent, the algorithm uses the entire training dataset to compute the gradients before updating the parameters. This is different from stochastic and mini-batch gradient descent, which use smaller subsets of data.

In [33]:
# Creating a custom class for Linear Regression using Gradient Descent
class GDRegressor:

    # Constructor to initialize learning rate and number of training epochs
    def __init__(self, learning_rate=0.01, epochs=100):
        self.coef_ = None        # This will hold the feature coefficients (weights)
        self.intercept_ = None   # This will hold the bias/intercept
        self.lr = learning_rate  # Learning rate for gradient descent
        self.epochs = epochs     # Number of iterations for training

    # Fit method: used to train the model on given data
    def fit(self, X_train, y_train):
        # Initialize the intercept to 0
        self.intercept_ = 0

        # Initialize coefficients to 1 for all features (same shape as number of columns in X)
        self.coef_ = np.ones(X_train.shape[1])

        # Loop through all epochs to update coefficients and intercept
        for i in range(self.epochs):

            # Predict the output using current weights and intercept
            y_hat = np.dot(X_train, self.coef_) + self.intercept_

            # Calculate derivative of loss with respect to intercept (mean of errors)
            intercept_derivative = -2 * np.mean(y_train - y_hat)

            # Update the intercept using gradient descent
            self.intercept_ = self.intercept_ - (self.lr * intercept_derivative)

            # Calculate derivative of loss with respect to each coefficient
            coef_derivative = -2 * np.dot((y_train - y_hat), X_train) / X_train.shape[0]

            # Update the coefficients using gradient descent
            self.coef_ = self.coef_ - (self.lr * coef_derivative)

    # Predict method: used to predict outputs for test data
    def predict(self, X_test):
        # Prediction is the dot product of X and learned coefficients plus the intercept
        return np.dot(X_test, self.coef_) + self.intercept_

In [29]:
gdr = GDRegressor(epochs=100, learning_rate=0.01)

In [30]:
gdr.fit(X_train, y_train)

In [31]:
y_pred = gdr.predict(X_test)