In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 1. Generate sample data (y = 4 + 3x + noise)
np.random.seed(42)
X = 2 * np.random.rand(100, 1)
y = 4 + 3 * X + np.random.randn(100, 1)

# 2. Function to perform SGD
def sgd(X, y, learning_rate=0.1, epochs=1000):
    m = len(X)  # number of samples
    theta = np.random.randn(2, 1)  # random weights [bias, slope]

    # Add 1s column to X for bias term
    X_bias = np.c_[np.ones((m, 1)), X]
    cost_history = []

    for epoch in range(epochs):
        for i in range(m):
            rand_index = np.random.randint(m)
            xi = X_bias[rand_index:rand_index+1]
            yi = y[rand_index:rand_index+1]

            # Gradient and update
            gradient = 2 * xi.T.dot(xi.dot(theta) - yi)
            theta -= learning_rate * gradient

        # Calculate cost (MSE)
        predictions = X_bias.dot(theta)
        cost = np.mean((predictions - y) ** 2)
        cost_history.append(cost)

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Cost: {cost:.4f}")

    return theta, cost_history

# 3. Train the model
theta_final, cost_history = sgd(X, y)

# 4. Plot cost over time
plt.plot(cost_history)
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.title('Training Cost')
plt.show()

# 5. Plot regression line
plt.scatter(X, y, color='blue', label='Data')
plt.plot(X, np.c_[np.ones((len(X), 1)), X].dot(theta_final), color='red', label='Line')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Linear Fit using SGD')
plt.legend()
plt.show()
