In [2]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
def batch_gradient_descent(x, y, learning_rate, num_iterations):
    samples = x.shape[0]
    features = x.shape[1]

    theta = np.zeros(features)
    for i in range(num_iterations):
        gradient = np.dot(x.T, np.dot(x, theta) - y) / samples
        theta = theta - learning_rate * gradient
    return theta

def stochastic_gradient_descent(x, y, learning_rate, num_iterations):
    samples = x.shape[0]
    features = x.shape[1]

    theta = np.zeros(features)
    for _ in range(num_iterations):
        j = np.random.randint(samples)
        gradient = np.dot(x[j].T, np.dot(x[j], theta) - y[j])
        theta = theta - learning_rate * gradient
    return theta

def mini_batch_gradient_descent(x, y, learning_rate, num_iterations, batch_size):
    samples = x.shape[0]
    features = x.shape[1]

    theta = np.zeros(features)
    for i in range(num_iterations):
        indices = np.random.choice(samples, batch_size, replace=False)
        x_batch = x[indices]
        y_batch = y[indices]
        gradient = np.dot(x_batch.T, np.dot(x_batch, theta) - y_batch) / batch_size
        theta = theta - learning_rate * gradient
    return theta


In [4]:
x = np.random.rand(100, 2) * 10  
y = np.dot(x, np.array([1.5, 2.5])) + np.random.randn(100) * 0.5  

# learning rate and number of iterations impace the theta values
learning_rates = [0.001, 0.005, 0.01, 0.05, 0.1]
num_iterations = 1000

batch_thetas = []
stochastic_gradient_descent_thetas = []
mini_batch_thetas = []

for rate in learning_rates:
    batch_theta = batch_gradient_descent(x, y, rate, num_iterations)
    stochastic_theta = stochastic_gradient_descent(x, y, rate, num_iterations)
    mini_batch_theta = mini_batch_gradient_descent(x, y, rate, num_iterations, 2)
    stochastic_gradient_descent_thetas.append(stochastic_theta)
    mini_batch_thetas.append(mini_batch_theta)
    batch_thetas.append(batch_theta)

batch_thetas = np.array(batch_thetas)
stochastic_gradient_descent_thetas = np.array(stochastic_gradient_descent_thetas)
mini_batch_thetas = np.array(mini_batch_thetas)

print(batch_thetas)
print()
print(stochastic_gradient_descent_thetas)
print()
print(mini_batch_thetas)


[[ 1.50158068e+000  2.48951893e+000]
 [ 1.50151872e+000  2.48958592e+000]
 [ 1.50151872e+000  2.48958592e+000]
 [-2.82120345e+225 -2.60895109e+225]
 [             nan              nan]]

[[ 1.49568917e+000  2.49118223e+000]
 [ 1.48835257e+000  2.48551294e+000]
 [ 1.57225424e+000  2.51504416e+000]
 [ 4.56161528e+170 -1.89420741e+169]
 [             nan              nan]]

[[1.49325675e+000 2.48411469e+000]
 [1.51945687e+000 2.50542768e+000]
 [1.50398046e+000 2.50659858e+000]
 [3.95001608e+102 7.93223734e+101]
 [            nan             nan]]


  theta = theta - learning_rate * gradient
  theta = theta - learning_rate * gradient
  theta = theta - learning_rate * gradient
