In [3]:
import numpy as np
import matplotlib.pyplot as plt

In [11]:
def batch_gradient_descent(x, y, learning_rate, num_iterations):
    samples = x.shape[0]
    features = x.shape[1]

    theta = np.zeros(features)
    for i in range(num_iterations):
        gradient = np.dot(x.T, np.dot(x, theta) - y) / samples
        theta = theta - learning_rate * gradient
    return theta

def stochastic_gradient_descent(x, y, learning_rate, num_iterations):
    samples = x.shape[0]
    features = x.shape[1]

    theta = np.zeros(features)
    for _ in range(num_iterations):
        optimize = np.random.permutation(samples)
        for j in optimize:
            gradient = np.dot(x[j].T, np.dot(x[j], theta) - y[j])
            theta = theta - learning_rate * gradient
    return theta

def mini_batch_gradient_descent(x, y, learning_rate, num_iterations, batch_size):
    samples = x.shape[0]
    features = x.shape[1]

    theta = np.zeros(features)
    for i in range(num_iterations):
        indices = np.random.permutation(samples)
        x_shuffled = x[indices]
        y_shuffled = y[indices]
        for j in range(0, samples, batch_size):
            x_batch = x_shuffled[j:j + batch_size]
            y_batch = y_shuffled[j:j + batch_size]
            gradient = np.dot(x_batch.T, np.dot(x_batch, theta) - y_batch) / batch_size
            theta = theta - learning_rate * gradient
    return theta

In [12]:
x = np.random.rand(100, 2) * 10  
y = np.dot(x, np.array([1.5, 2.5])) + np.random.randn(100) * 0.5  

# learning rate and number of iterations impace the theta values
learning_rates = [0.001, 0.005, 0.01, 0.05, 0.1]
num_iterations = 1000

batch_thetas = []
stochastic_gradient_descent_thetas = []
mini_batch_thetas = []

for rate in learning_rates:
    batch_theta = batch_gradient_descent(x, y, rate, num_iterations)
    stochastic_theta = stochastic_gradient_descent(x, y, rate, num_iterations)
    mini_batch_theta = mini_batch_gradient_descent(x, y, rate, num_iterations, 2)
    stochastic_gradient_descent_thetas.append(stochastic_theta)
    mini_batch_thetas.append(mini_batch_theta)
    batch_thetas.append(batch_theta)

batch_thetas = np.array(batch_thetas)
stochastic_gradient_descent_thetas = np.array(stochastic_gradient_descent_thetas)
mini_batch_thetas = np.array(mini_batch_thetas)

print(batch_thetas)
print()
print(stochastic_gradient_descent_thetas)
print()
print(mini_batch_thetas)


  theta = theta - learning_rate * gradient
  theta = theta - learning_rate * gradient
  theta = theta - learning_rate * gradient


[[ 1.48998813e+000  2.49334199e+000]
 [ 1.48958918e+000  2.49376370e+000]
 [ 1.48958918e+000  2.49376370e+000]
 [-1.97061780e+288 -1.86425333e+288]
 [             nan              nan]]

[[1.49002204 2.49532037]
 [1.48661379 2.49162792]
 [1.49460333 2.48871701]
 [       nan        nan]
 [       nan        nan]]

[[1.48697153 2.49277746]
 [1.48990103 2.48779423]
 [1.47222948 2.49023523]
 [       nan        nan]
 [       nan        nan]]
