# Stochastic Gradient Descent

In [1]:
import numpy as np

# Function to perform Stochastic Gradient Descent (SGD)
# Unlike Batch GD, this method updates parameters using one sample at a time

def stochastic_gradient_descent(X, y, learning_rate=0.01, epochs=1000):
    m, n = X.shape  # m: number of samples, n: number of features
    theta = np.zeros(n)  # Initialize weights (all zeros)
    history = []  # To track the cost function over iterations
    
    for epoch in range(epochs):
        for i in range(m):  # Loop through each training example
            random_index = np.random.randint(m)  # Pick a random sample
            xi = X[random_index:random_index+1]  # Select the corresponding feature vector
            yi = y[random_index]  # Select the corresponding target value
            gradients = xi.T.dot(xi.dot(theta) - yi)  # Compute gradients
            theta -= learning_rate * gradients  # Update parameters
            cost = (1/(2*m)) * np.sum((X.dot(theta) - y) ** 2)  # Compute cost
            history.append(cost)  # Store the cost for visualization later
        
        if epoch % 100 == 0:  # Print progress every 100 epochs
            print(f"Epoch {epoch}, Cost: {cost:.6f}")  # Display the current cost
    
    return theta, history  # Return optimized parameters and cost history

# Example Usage: Linear Regression with Stochastic Gradient Descent
X = np.array([[1, 1], [1, 2], [1, 3], [1, 4]])  # Adding bias term (column of ones)
y = np.array([2, 3, 4, 5])  # Target variable

theta, history = stochastic_gradient_descent(X, y)  # Run SGD
print("Optimized Weights:", theta)  # Print the final values of the weights


Epoch 0, Cost: 3.538047
Epoch 100, Cost: 0.007964
Epoch 200, Cost: 0.002440
Epoch 300, Cost: 0.000748
Epoch 400, Cost: 0.000220
Epoch 500, Cost: 0.000070
Epoch 600, Cost: 0.000021
Epoch 700, Cost: 0.000007
Epoch 800, Cost: 0.000002
Epoch 900, Cost: 0.000001
Optimized Weights: [0.99864918 1.00046231]
