#### Batch GD

In [1]:
import numpy as np

In [2]:
# Generate dummy data
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

In [3]:
# Add bias term to feature matrix X
X_b = np.c_[np.ones((100, 1)), X]

In [4]:
# Set hyperparameters
learning_rate  = 0.1
num_iterations = 1000

In [5]:
# Initialize model parameters
theta = np.random.randn(2, 1)

In [6]:
# Batch Gradient Descent
for iteration in range(num_iterations):
    # Compute gradients
    gradients = 2/100 * X_b.T.dot(X_b.dot(theta) - y)
    
    # Update parameters
    theta = theta - learning_rate * gradients

In [7]:
# Print the final model parameters
print("Intercept:", theta[0])
print("Slope:", theta[1])

Intercept: [4.21509616]
Slope: [2.77011339]


#### Stochastic GD

In [8]:
# Generate dummy data
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

In [9]:
# Add bias term to feature matrix X
X_b = np.c_[np.ones((100, 1)), X]

In [10]:
# Set hyperparameters
learning_rate = 0.1
num_epochs = 100
m = len(X_b)

In [11]:
# Initialize model parameters
theta = np.random.randn(2, 1)

In [12]:
# Stochastic Gradient Descent
for epoch in range(num_epochs):
    # Shuffle the dataset for each epoch
    shuffled_indices = np.random.permutation(m)
    X_b_shuffled = X_b[shuffled_indices]
    y_shuffled = y[shuffled_indices]
    
    for i in range(m):
        xi = X_b_shuffled[i:i+1]
        yi = y_shuffled[i:i+1]
        
        # Compute gradients for a single instance
        gradients = 2 * xi.T.dot(xi.dot(theta) - yi)
        
        # Update parameters
        theta = theta - learning_rate * gradients

In [13]:
# Print the final model parameters
print("Intercept:", theta[0])
print("Slope:", theta[1])

Intercept: [4.52274662]
Slope: [2.30464594]


Stochastic Gradient Descent uses a single instance at a time to compute gradients and update parameters, making it faster than Batch Gradient Descent but potentially less stable.

#### Minibatch GD

In [14]:
# Generate dummy data
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

In [15]:
# Add bias term to feature matrix X
X_b = np.c_[np.ones((100, 1)), X]

In [16]:
# Set hyperparameters
learning_rate = 0.1
num_epochs    = 100
batch_size    = 10

m = len(X_b)

In [17]:
# Initialize model parameters
theta = np.random.randn(2, 1)

In [18]:
# Mini-Batch Gradient Descent
for epoch in range(num_epochs):
    # Shuffle the dataset for each epoch
    shuffled_indices = np.random.permutation(m)
    X_b_shuffled = X_b[shuffled_indices]
    y_shuffled = y[shuffled_indices]
    
    for i in range(0, m, batch_size):
        X_batch = X_b_shuffled[i:i+batch_size]
        y_batch = y_shuffled[i:i+batch_size]
        
        # Compute gradients for a mini-batch
        gradients = 2/batch_size * X_batch.T.dot(X_batch.dot(theta) - y_batch)
        
        # Update parameters
        theta = theta - learning_rate * gradients

In [19]:
# Print the final model parameters
print("Intercept:", theta[0])
print("Slope:", theta[1])

Intercept: [4.19593351]
Slope: [2.75141764]
