In [None]:
import torch
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits import mplot3d
from torch.utils.data import Dataset, DataLoader

Linear regression with various optimization techniques: Batch Gradient Descent, Stochastic Gradient Descent, and Mini-Batch Gradient Descent. Demonstrates how to create a simple linear regression model, define loss functions, and train the model using different optimization methods.

In [None]:
# Define the class plot_error_surfaces
class plot_error_surfaces(object):
    """
    This class is designed to visualize the data space and parameter space during training.

    Args:
        w_range (float): Range for the parameter w.
        b_range (float): Range for the parameter b.
        X (torch.Tensor): Input data.
        Y (torch.Tensor): Target data.
        n_samples (int, optional): Number of samples for creating surfaces. Default is 30.
        go (bool, optional): Whether to generate plots during initialization. Default is True.
    """

    # Constructor
    def __init__(self, w_range, b_range, X, Y, n_samples=30, go=True):
        """
        Initializes the plot_error_surfaces object.

        Args:
            w_range (float): Range for the parameter w.
            b_range (float): Range for the parameter b.
            X (torch.Tensor): Input data.
            Y (torch.Tensor): Target data.
            n_samples (int, optional): Number of samples for creating surfaces. Default is 30.
            go (bool, optional): Whether to generate plots during initialization. Default is True.
        """
        # Create ranges for w and b
        W = np.linspace(-w_range, w_range, n_samples)
        B = np.linspace(-b_range, b_range, n_samples)
        w, b = np.meshgrid(W, B)

        # Initialize the loss surface Z
        Z = np.zeros((30, 30))
        count1 = 0
        self.y = Y.detach().numpy()  # Convert Y to NumPy array
        self.x = X.detach().numpy()  # Convert X to NumPy array

        # Calculate the loss surface
        for w1, b1 in zip(w, b):
            count2 = 0
            for w2, b2 in zip(w1, b1):
                Z[count1, count2] = np.mean((self.y - w2 * self.x + b2) ** 2)
                count2 += 1
            count1 += 1

        # Store variables for visualization
        self.Z = Z
        self.w = w
        self.b = b
        self.W = []
        self.B = []
        self.LOSS = []
        self.n = 0

        # Generate 3D and contour plots if 'go' is True
        if go:
            plt.figure(figsize=(7.5, 5))
            plt.axes(projection='3d').plot_surface(self.w, self.b, self.Z, rstride=1, cstride=1, cmap='viridis', edgecolor='none')
            plt.title('Loss Surface')
            plt.xlabel('w')
            plt.ylabel('b')
            plt.show()
            plt.figure()
            plt.title('Loss Surface Contour')
            plt.xlabel('w')
            plt.ylabel('b')
            plt.contour(self.w, self.b, self.Z)
            plt.show()

    # Setter
    def set_para_loss(self, W, B, loss):
        """
        Store parameter values and loss for plotting purposes.

        Args:
            W (float): Value of parameter w.
            B (float): Value of parameter b.
            loss (float): Loss value.
        """
        self.n = self.n + 1
        self.W.append(W)
        self.B.append(B)
        self.LOSS.append(loss)

    # Plot diagram
    def final_plot(self):
        """
        Plot the final diagram with the loss surface and parameter updates.
        """
        ax = plt.axes(projection='3d')
        ax.plot_wireframe(self.w, self.b, self.Z)
        ax.scatter(self.W, self.B, self.LOSS, c='r', marker='x', s=200, alpha=1)
        plt.figure()
        plt.contour(self.w, self.b, self.Z)
        plt.scatter(self.W, self.B, c='r', marker='x')
        plt.xlabel('w')
        plt.ylabel('b')
        plt.show()

    # Plot diagram
    def plot_ps(self):
        """
        Plot the data space and loss surface contour for each iteration.
        """
        plt.subplot(121)
        plt.ylim
        plt.plot(self.x, self.y, 'ro', label="training points")
        plt.plot(self.x, self.W[-1] * self.x + self.B[-1], label="estimated line")
        plt.xlabel('x')
        plt.ylabel('y')
        plt.ylim((-10, 15))
        plt.title('Data Space Iteration: ' + str(self.n))
        plt.subplot(122)
        plt.contour(self.w, self.b, self.Z)
        plt.scatter(self.W, self.B, c='r', marker='x')
        plt.title('Loss Surface Contour Iteration' + str(self.n))
        plt.xlabel('w')
        plt.ylabel('b')
        plt.show()

In [None]:
# Generate data
torch.manual_seed(1)
X = torch.arange(-3, 3, 0.1).view(-1, 1)
f = 1 * X - 1
Y = f + 0.1 * torch.randn(X.size())
plt.plot(X.numpy(), Y.numpy(), 'rx', label='y')
plt.plot(X.numpy(), f.numpy(), label='f')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

In [None]:
# Define the forward function and MSE Loss function
def forward(x):
    return w * x + b

def criterion(yhat, y):
    return torch.mean((yhat - y) ** 2)

In [None]:
# Create plot_error_surfaces for viewing the data
get_surface = plot_error_surfaces(15, 13, X, Y, 30)

# Batch Gradient Descent

In [None]:
# Function for training the model using Batch Gradient Descent
def train_model(iter):
    """
    Train the model using Batch Gradient Descent optimization technique.

    Args:
        iter (int): Number of iterations for training.
    """
    # Loop through the specified number of epochs
    for epoch in range(iter):

        # Make a prediction using the current parameters
        Yhat = forward(X)

        # Calculate the loss using the mean squared error (MSE) criterion
        loss = criterion(Yhat, Y)

        # Record the current parameter values and loss for visualization
        get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), loss.tolist())
        get_surface.plot_ps()

        # Store the loss value in the list LOSS_BGD
        LOSS_BGD.append(loss.detach().numpy())

        # Perform backward pass to compute gradients of the loss with respect to parameters
        loss.backward()

        # Update the parameters w and b using the learning rate and gradients
        w.data = w.data - lr * w.grad.data
        b.data = b.data - lr * b.grad.data

        # Zero out the gradients to prepare for the next iteration
        w.grad.data.zero_()
        b.grad.data.zero_()


In [None]:
w = torch.tensor(-15.0, requires_grad=True)
b = torch.tensor(-15.0, requires_grad=True)
lr = 0.1
LOSS_BGD = []

In [None]:
# Train the model with n iterations
train_model(10)

# Stochastic Gradient Descent

In [None]:
def train_model_SGD(iter):
    """
    Train the model using Stochastic Gradient Descent (SGD) optimization technique.

    Args:
        iter (int): Number of iterations for training.
    """
    # Loop through the specified number of epochs
    for epoch in range(iter):

        # SGD is an approximation of the true total loss/cost
        # Calculate the true loss for the current model parameters
        Yhat = forward(X)
        true_loss = criterion(Yhat, Y)

        # Store the true loss value in the list LOSS_SGD
        LOSS_SGD.append(criterion(Yhat, Y).detach().numpy())

        # Iterate through the data points for each epoch (Stochastic part)
        for x, y in zip(X, Y):

            # Make a prediction using the current parameters
            yhat = forward(x)

            # Calculate the loss for the current data point
            loss = criterion(yhat, y)

            # Record the current parameter values and loss for visualization
            get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), loss.tolist())

            # Perform backward pass to compute gradients of the loss with respect to parameters
            loss.backward()

            # Update the parameters w and b using the learning rate and gradients
            w.data = w.data - lr * w.grad.data
            b.data = b.data - lr * b.grad.data

            # Zero out the gradients to prepare for the next iteration
            w.grad.data.zero_()
            b.grad.data.zero_()

        # Plot the surface and data space after each epoch
        get_surface.plot_ps()

In [None]:
get_surface = plot_error_surfaces(15, 13, X, Y, 30, go=False)
LOSS_SGD = []
w = torch.tensor(-15.0, requires_grad=True)
b = torch.tensor(-10.0, requires_grad=True)

In [None]:
# Train the model with n iterations
train_model_SGD(10)

In [None]:
# Plot LOSS_BGD and LOSS_SGD
plt.plot(LOSS_BGD, label="Batch Gradient Descent")
plt.plot(LOSS_SGD, label="Stochastic Gradient Descent")
plt.xlabel('epoch')
plt.ylabel('Cost/total loss')
plt.legend()
plt.show()

# SGD with Dataset DataLoader

In [None]:
# Define a custom Dataset class
class Data(Dataset):
    """
    Custom dataset class for generating synthetic data.
    """
    def __init__(self):
        """
        Constructor for the Data class.
        Initializes the dataset with synthetic data points.
        """
        # Generate x values within the range [-3, 3] with a step of 0.1
        self.x = torch.arange(-3, 3, 0.1).view(-1, 1)

        # Calculate corresponding y values using a linear equation y = 1 * x - 1
        self.y = 1 * self.x - 1

        # Store the number of data points in the dataset
        self.len = self.x.shape[0]

    def __getitem__(self, index):
        """
        Getter method to retrieve a specific data point.

        Args:
            index (int): Index of the desired data point.

        Returns:
            tuple: A tuple containing the x and y values of the data point.
        """
        return self.x[index], self.y[index]

    def __len__(self):
        """
        Return the total number of data points in the dataset.

        Returns:
            int: Number of data points in the dataset.
        """
        return self.len


In [None]:
# Create dataset and DataLoader
dataset = Data()
get_surface = plot_error_surfaces(15, 13, X, Y, 30, go=False)
trainloader = DataLoader(dataset=dataset, batch_size=1)

In [None]:
# Train model using DataLoader
w = torch.tensor(-15.0, requires_grad=True)
b = torch.tensor(-10.0, requires_grad=True)
LOSS_Loader = []

In [None]:
# Function to train the model using Stochastic Gradient Descent (SGD) with DataLoader
def train_model_DataLoader(epochs):
    """
    Train the model using Stochastic Gradient Descent (SGD) with DataLoader.

    Args:
        epochs (int): The number of epochs for training.

    This function trains the model using Stochastic Gradient Descent (SGD) with DataLoader,
    iterating over the dataset in batches for a specified number of epochs.

    During each epoch:
    - A prediction Yhat is made using the forward pass.
    - The true loss is computed using the criterion.
    - The loss is stored in the LOSS_Loader list.
    - For each batch of data (x, y) from the DataLoader:
        - A prediction yhat is made using the forward function.
        - The loss is calculated using the criterion.
        - The plotting parameters are updated using set_para_loss.
        - The backward pass computes gradients of the loss with respect to parameters.
        - The parameters w and b are updated using the gradients and learning rate.
        - Gradients are cleared.
    - The surface and data space are plotted using get_surface.plot_ps().

    This process is repeated for the specified number of epochs.
    """
    for epoch in range(epochs):

        # SGD is an approximation of the true total loss/cost
        # Compute the true loss using the forward pass
        Yhat = forward(X)

        # Store the loss in LOSS_Loader
        LOSS_Loader.append(criterion(Yhat, Y).detach().numpy())

        # Iterate over each batch of data in the DataLoader
        for x, y in trainloader:

            # Make a prediction using the forward function
            yhat = forward(x)

            # Calculate the loss using the criterion
            loss = criterion(yhat, y)

            # Update plotting parameters using set_para_loss
            get_surface.set_para_loss(w.data.tolist(), b.data.tolist(), loss.tolist())

            # Backward pass: compute gradient of the loss with respect to all the learnable parameters
            loss.backward()

            # Update parameters w and b using gradients and learning rate
            w.data = w.data - lr * w.grad.data
            b.data = b.data - lr * b.grad.data

            # Clear gradients
            w.grad.data.zero_()
            b.grad.data.zero_()

        # Plot surface and data space after each epoch
        get_surface.plot_ps()


In [None]:
# Train the model with n iterations
train_model_DataLoader(10)

In [None]:
# Plot LOSS_BGD and LOSS_Loader
plt.plot(LOSS_BGD, label="Batch Gradient Descent")
plt.plot(LOSS_Loader, label="Stochastic Gradient Descent with DataLoader")
plt.xlabel('epoch')
plt.ylabel('Cost/total loss')
plt.legend()
plt.show()