In [48]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from numpy import linalg as LA
from sklearn.preprocessing import StandardScaler
import time

In [3]:
data = pd.read_csv('https://stat4ds.rwth-aachen.de/data/Florida.dat', delim_whitespace=True)
print(data.head())

     County  Crime  Income    HS  Urban
0   ALACHUA    104    22.1  82.7   73.2
1     BAKER     20    25.8  64.1   21.5
2       BAY     64    24.7  74.7   85.0
3  BRADFORD     50    24.6  65.0   23.2
4   BREVARD     64    30.5  82.3   91.9


In [53]:
X = data[['Income', 'HS']].values
y = data['Crime'].values.reshape(-1, 1)

X = np.hstack([np.ones((X.shape[0], 1)), X])

scaler = StandardScaler()
X[:, 1:] = scaler.fit_transform(X[:, 1:])

In [54]:
# Gradient Descent 
def gd(X, y, step_size=0.001, tol=1e-6, max_iter=10000):
    m, n = X.shape
    theta = np.zeros((n, 1))
    iter = 0

    for i in range(max_iter):
        y_tilde = X @ theta
        error = y_tilde - y
        gradient = (1 / m) * (X.T @ error)
        theta -= step_size * gradient
        if np.linalg.norm(gradient) < tol:
            break
        iter += 1

    return theta, iter

start_time = time.time()
theta_gd, iters_gd = gd(X, y, step_size=0.001, tol=1e-6, max_iter=10000)
end_time = time.time()

runtime_gd = end_time - start_time

print("GD Parameters:\n", theta_gd)
print("Iterations:", iters_gd)
print("Runtime :", runtime_gd)

GD Parameters:
 [[52.40061786]
 [ 5.07344906]
 [ 8.98569416]]
Iterations: 10000
Runtime : 0.1248772144317627


In [57]:
def sgd(X, y, step_size=0.001, tol=1e-6, max_iter=10000):
    m, n = X.shape
    theta = np.zeros((n, 1)) 
    iter = 0
    for epoch in range(max_iter):
        for i in range(m): 
            yi = y[i].reshape(-1, 1) 
            xi = X[i, :].reshape(1, -1)  
            y_tilde = xi @ theta
            error = y_tilde - yi
            gradient = xi.T @ error
            theta -= step_size * gradient
            if np.linalg.norm(gradient) < tol:
                break
        iter += 1
        if np.linalg.norm(gradient) < tol:
            break
    return theta, iter

start_time = time.time()
theta_sgd, iters_sgd = sgd(X, y, step_size=0.001, tol=1e-6, max_iter=10000)
end_time = time.time()

runtime_sgd = end_time - start_time

print("SGD:\n", theta_sgd)
print("Iterations:", iters_sgd)
print("Runtime :", runtime_sgd)

SGD:
 [[52.30504008]
 [ 4.78878591]
 [ 9.26698609]]
Iterations: 10000
Runtime : 11.29434084892273


In [55]:
def gdm(X, y, step_size=0.001, tol=1e-6, max_iter=10000, beta=0.9):
    m, n = X.shape
    theta = np.zeros((n, 1))  
    velocity = np.zeros((n, 1)) 
    iter = 0

    for i in range(max_iter):
        y_tilde = X @ theta
        error = y_tilde - y
        gradient = (1 / m) * (X.T @ error)
        velocity = beta * velocity + step_size * gradient
        theta -= velocity
        if np.linalg.norm(gradient) < tol:
            break
        iter += 1
    return theta, iter

start_time = time.time()
theta_gdm, iters_gdm = gdm(X, y, step_size=0.001, tol=1e-6, max_iter=10000, beta=0.9)
end_time = time.time()

runtime_gdm = end_time - start_time

print("GD w Momentum Parameters:\n", theta_gdm)
print("Iterations:", iters_gdm)
print("Runtime :", runtime_gdm)


GD w Momentum Parameters:
 [[52.40298507]
 [ 4.79226272]
 [ 9.26688073]]
Iterations: 6339
Runtime : 0.11550235748291016
