In [163]:
import numpy as np

In [199]:
def gradient_descent(X, y, weights, learning_rate, n_iterations, batch_size=1, method='batch'):
    m,n = X.shape
    loss_history = []
    
    ## init weight ##
    #weights = np.zeros(X.shape[1])
    for _ in range(n_iterations):
        # Calculate  Gradient 
        
        if method == 'batch':
            ## all data
            y_pred = X @ weights
            error = y_pred - y
            dw = (2/m) * (X.T @ error)
            weights = weights - learning_rate * dw
            
        elif method == 'stochastic':
            for i in range(m):
                Xi = X[i:i+1]
                yi = y[i : i+1]
                y_pred = Xi @ weights
                error = y_pred - yi
                #print(Xi)
                #print(Xi.T.shape)
                #print(error)
                dw = 2 * (Xi.T @ error)
                #print(dw)
                weights = weights - learning_rate * dw
                #print(weights)
                
        elif method == 'mini_batch':
            for start in range(0, m, batch_size):
                #start = i
                end = start + batch_size
                Xb = X[start : end]
                yb = y[start : end]
                y_pred = Xb @ weights
                error = y_pred - yb
                dw = (2/batch_size) * (Xb.T @ error)
                #print(dw)
                weights = weights - learning_rate * dw
                #print(weights)
        mse = np.mean((X @ weights - y) ** 2)
        loss_history.append(mse)
        
    return weights, loss_history

In [200]:
# Sample data
X = np.array([[1, 1], [2, 1], [3, 1], [4, 1]])
y = np.array([2, 3, 4, 5])

In [201]:
# Parameters
learning_rate = 0.01
n_iterations = 1000
batch_size = 2

In [202]:
# Initialize weights
weights = np.zeros(X.shape[1])

In [203]:
# Test Batch Gradient Descent
final_weights, loss_history = gradient_descent(X, y, weights, learning_rate, n_iterations, method='batch')

In [204]:
final_weights

array([1.01003164, 0.97050576])

In [205]:
loss_history

[np.float64(9.3849),
 np.float64(6.529414335),
 np.float64(4.5479490328215),
 np.float64(3.172948590866277),
 np.float64(2.2187619031279566),
 np.float64(1.556569591474333),
 np.float64(1.0969866113146056),
 np.float64(0.7779905677679329),
 np.float64(0.5565454296668182),
 np.float64(0.4027894019304045),
 np.float64(0.2960020131778773),
 np.float64(0.22180576315213096),
 np.float64(0.1702244050748014),
 np.float64(0.13433562885539707),
 np.float64(0.10933618647668059),
 np.float64(0.09189320227964906),
 np.float64(0.0796940608952122),
 np.float64(0.07113408400338564),
 np.float64(0.06509981584454479),
 np.float64(0.060818649620830924),
 np.float64(0.057754486437588626),
 np.float64(0.05553533525036772),
 np.float64(0.05390307599987616),
 np.float64(0.05267860131412101),
 np.float64(0.051737629072941195),
 np.float64(0.05099391925817851),
 np.float64(0.05038762848099248),
 np.float64(0.04987722943464386),
 np.float64(0.049433903973633384),
 np.float64(0.04903765258983151),
 np.float64(0

In [206]:
# Test Stochastic Gradient Descent
final_weights, loss_history = gradient_descent(X, y, weights, learning_rate, n_iterations, method='stochastic')

In [207]:
final_weights

array([1.00000058, 0.99999813])

In [208]:
loss_history

[np.float64(3.0073391435022336),
 np.float64(0.721859199975591),
 np.float64(0.21253068446745296),
 np.float64(0.0934554020618747),
 np.float64(0.06277698415275691),
 np.float64(0.05330654341648268),
 np.float64(0.04944350576386489),
 np.float64(0.04728304798761002),
 np.float64(0.045729078271671324),
 np.float64(0.04442752016221751),
 np.float64(0.043247697435767465),
 np.float64(0.042136287662137964),
 np.float64(0.04107002194097512),
 np.float64(0.04003823142063254),
 np.float64(0.039035765315907286),
 np.float64(0.038059947850268935),
 np.float64(0.0371092299271218),
 np.float64(0.036182582381656454),
 np.float64(0.03527922080782551),
 np.float64(0.03439848021524398),
 np.float64(0.033539757735584035),
 np.float64(0.032702486312035),
 np.float64(0.03188612251379361),
 np.float64(0.031090140795957386),
 np.float64(0.030314030703664806),
 np.float64(0.029557295423671765),
 np.float64(0.028819450954906768),
 np.float64(0.02810002556560521),
 np.float64(0.02739855938528224),
 np.float6

In [209]:
# Test Mini-Batch Gradient Descent
final_weights,  loss_history= gradient_descent(X, y, weights, learning_rate, n_iterations, batch_size, method='mini_batch')
final_weights

array([1.0003804 , 0.99883421])

In [210]:
loss_history

[np.float64(6.368262934999999),
 np.float64(3.0268837414725356),
 np.float64(1.458835106291171),
 np.float64(0.7212012312210828),
 np.float64(0.3729472600376114),
 np.float64(0.2076212018747166),
 np.float64(0.1284696955368972),
 np.float64(0.09007391062196851),
 np.float64(0.07106181834900215),
 np.float64(0.06134218501235354),
 np.float64(0.056126954740208),
 np.float64(0.053128667535188974),
 np.float64(0.05124404680691862),
 np.float64(0.049934016355742405),
 np.float64(0.04893059994704426),
 np.float64(0.04809760871770513),
 np.float64(0.04736392348485596),
 np.float64(0.04669120869179433),
 np.float64(0.04605806705114178),
 np.float64(0.0454521178460998),
 np.float64(0.0448659428225628),
 np.float64(0.04429495195144629),
 np.float64(0.043736223047576905),
 np.float64(0.0431878486856926),
 np.float64(0.04264855569322255),
 np.float64(0.042117476164565605),
 np.float64(0.04159400569147181),
 np.float64(0.04107771349919976),
 np.float64(0.04056828441250071),
 np.float64(0.0400654808