In [1]:
import numpy as np
import time

In [2]:
def generate_synthetic_data():
    X = np.random.randn(100, 2)
    y = 1 + 2 * X[:, 0] + 3*X[:, 1]
    return X, y

In [3]:
def calculating_mse(predictions, y):
    mse = np.mean((predictions - y)**2)
    return mse

In [4]:
def batch_gradient_decent(X, y, learning_rate, epochs):
    m, n = X.shape
    theta = np.zeros(n)
    start_time = time.time()
    for epochs in range(epochs):
        predictions = X.dot(theta)
        errors = predictions - y
        gradient = (1/m) * X.T.dot(errors)
        theta -= learning_rate * gradient
    end_time = time.time()
    train_time = end_time - start_time
    return theta, train_time

In [5]:
def stochastic_gradient_decent(X, y, learning_rate, epochs):
    m, n = X.shape
    theta = np.zeros(n)
    start_time = time.time()
    for epochs in range(epochs):
        for i in range(m):
            predictions = X[i, :].dot(theta)
            error = predictions - y[i]
            gradient = (1/m) * X[i, :].T.dot(error)
            theta -= learning_rate * gradient
    end_time = time.time()
    train_time = end_time - start_time
    return theta, train_time

In [6]:
def mini_batch_gradient_decent(X, y, learning_rate, epochs, batch_size):
    m, n = X.shape
    theta = np.zeros(n)
    start_time = time.time()
    for epoch in range(epochs):
        for i in range(0, m, batch_size):
            batch_X = X[i:i +batch_size, :]
            batch_y = y[i:i +batch_size]
            predictions = batch_X.dot(theta)
            errors = predictions - batch_y
            gradient = (1/batch_size) * batch_X.T.dot(errors)
            theta -= learning_rate * gradient
    end_time = time.time()
    train_time = end_time - start_time
    return theta, train_time

In [7]:
def main():
    X, y = generate_synthetic_data()
    theta_batch, training_time_batch = batch_gradient_decent(X, y, learning_rate=0.01, epochs=1000)
    mse_batch = calculating_mse(X.dot(theta_batch,), y)
    
    theta_stochastic, training_time_stoch = stochastic_gradient_decent(X, y, learning_rate=0.01, epochs=1000)
    mse_stoch = calculating_mse(X.dot(theta_stochastic,), y)
    
    theta_mini_batch, training_time_mini_batch = mini_batch_gradient_decent(X, y, learning_rate=0.01, epochs=1000, batch_size = 16)
    mse_mini_batch = calculating_mse(X.dot(theta_mini_batch,), y)
    
    print("Gradient Decent Method      | MSE |     Training Time (sec)")
    print("Batch GD                    | {}  |          {}            ".format(mse_batch, training_time_batch))
    print("Stochastic GD                    | {}  |          {}            ".format(mse_stoch, training_time_stoch))
    print("Mini Batch GD                    | {}  |          {}            ".format(mse_mini_batch, training_time_mini_batch))

In [8]:
if __name__ == "__main__":
    main()

Gradient Decent Method      | MSE |     Training Time (sec)
Batch GD                    | 0.9727138615974852  |          0.0050580501556396484            
Stochastic GD                    | 0.9727140423228863  |          0.45127034187316895            
Mini Batch GD                    | 0.9727162457652786  |          0.03677535057067871            


## using SKlearn

In [1]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import SGDRegressor

In [2]:
#Synthetic dataset
X, y = make_regression(n_samples = 1000, n_features=5, noise = 0.2, random_state=2)

In [3]:
y = y.reshape(-1)

In [4]:
#Batch GD
batch_gd = SGDRegressor(learning_rate='constant', eta0 = 0.01, max_iter = 100, penalty = None)
batch_gd.fit(X,y)
batch_gd_coef = batch_gd.coef_
batch_dg_intercept = batch_gd.intercept_

In [5]:
#Stochastic GD
sgd_gd = SGDRegressor(learning_rate='constant', eta0 = 0.01, max_iter = 100, penalty = None, shuffle=True)
sgd_gd.fit(X,y)
sgd_gd_coef = sgd_gd.coef_
sgd_dg_intercept = sgd_gd.intercept_

In [6]:
#Mini Batch GD
mini_batch_gd = SGDRegressor(learning_rate='constant', eta0 = 0.01, max_iter = 100, penalty = None, shuffle=True, average=True)
mini_batch_gd.fit(X,y)
mini_batch_gd_coef = mini_batch_gd.coef_
mini_batch_dg_intercept = mini_batch_gd.intercept_

In [7]:
print("Batch GD")
print("Coefficient", batch_gd_coef)
print("Intercept", batch_dg_intercept)

Batch GD
Coefficient [58.43152709 61.03051627 55.24939478 91.7721952  20.97079464]
Intercept [-0.00287888]


In [8]:
print("Stochastic GD")
print("Coefficient", sgd_gd_coef)
print("Intercept", sgd_dg_intercept)

Stochastic GD
Coefficient [58.42854908 61.04310667 55.21799773 91.78369602 20.95509972]
Intercept [-0.01290805]


In [9]:
print("Mini Batch GD")
print("Coefficient", mini_batch_gd_coef)
print("Intercept", mini_batch_dg_intercept)

Mini Batch GD
Coefficient [57.46804705 60.21223582 54.23121625 90.57071855 20.50637689]
Intercept [-0.06860011]
