In [52]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.datasets import load_diabetes

from sklearn.metrics import r2_score

In [None]:
# Load diabetes dataset (features X, target y)
X, y = load_diabetes(return_X_y=True)

In [None]:
# Split into train/test for evaluation (80/20 split)
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Fit ordinary least squares linear regression (closed-form solution)
lr = LinearRegression()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)

# Print evaluation metrics
print("For Linear Regression")
print("The R2 score is: ", r2_score(y_test, y_pred))
print("The cross val score is: ", np.mean(cross_val_score(lr, X, y, cv=10)))

For Linear Regression
The R2 score is:  0.4526027629719197
The cross val score is:  0.4619602420450601


In [None]:
# Show learned coefficients and intercept from closed-form model
print(lr.coef_)
print(lr.intercept_)

[  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588
  518.06227698  163.41998299  275.31790158  736.1988589    48.67065743]
151.34560453985995


In [None]:
class StochasticGD:
    # Simple stochastic gradient descent implementation
    def __init__(self, learning_rate, epochs):
        self.coef_ = None
        self.intercept_ = None
        self.learning_rate = learning_rate
        self.epochs = epochs
        
    def fit(self, x_train, y_train):
        # Initialize parameters: bias as 1 and coefficients as ones
        self.intercept_ = 1
        self.coef_ = np.ones(x_train.shape[1])
        
        # Loop over epochs and single-sample updates (stochastic)
        for i in range(self.epochs):
            for j in range(x_train.shape[0]):
                idx = np.random.randint(0, x_train.shape[0])
                
                # Compute prediction for one random sample
                y_hat = self.intercept_ + np.dot(x_train[idx], self.coef_)
                # Gradient of squared error w.r.t. bias and weights
                loss_func = -2 * (y_train[idx] - y_hat)
                step_size = self.learning_rate * loss_func
                self.intercept_ = self.intercept_ - step_size
                
                loss_func_coef = -2 * np.dot((y_train[idx] - y_hat), (x_train[idx]))
                step_size_coef = self.learning_rate * loss_func_coef
                self.coef_ = self.coef_ - step_size_coef
                
        print(self.coef_)
        print(self.intercept_)
        

    def predict(self, x_test):
        # Vectorized prediction using learned parameters
        pred = np.dot(x_test, self.coef_) + self.intercept_
        return pred

In [None]:
# Train stochastic GD model
model = StochasticGD(learning_rate=0.1, epochs=10)
model.fit(x_train, y_train)

[  50.60334341 -154.4785072   446.40174712  332.27553948  -60.94330714
  -97.01255353 -202.62272568  152.47182834  332.65810914  143.01153613]
154.08559009697134


In [None]:
# Re-print closed-form model params for comparison
print(lr.coef_)
print(lr.intercept_)

[  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588
  518.06227698  163.41998299  275.31790158  736.1988589    48.67065743]
151.34560453985995


In [None]:
# Evaluate stochastic model on test set
pred2 = model.predict(x_test)
print(r2_score(y_test, pred2))

0.4539525079174145


In [None]:
class MiniBatchGD:
    # Mini-batch gradient descent implementation
    def __init__(self, learning_rate, epochs, batch_size):
        self.coef_ = None
        self.intercept_ = None
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.batch_size = batch_size
        
    def fit(self, x_train, y_train):
        # Initialize parameters to ones/zeros
        self.coef_ = np.ones(x_train.shape[1])
        self.intercept_ = 0
        
        # Loop over epochs and batches
        for i in range(self.epochs):
            for j in range(int(x_train.shape[0] / self.batch_size)):
                # Randomly sample a minibatch (unbiased sampling)
                idx = random.sample(range(x_train.shape[0]), self.batch_size)
            
                # Compute batch predictions and gradient estimates
                yhat = self.intercept_ + np.dot(x_train[idx], self.coef_)
                loss_func = -2 * np.mean(y_train[idx] - yhat)
                step_size = self.learning_rate * loss_func
                self.intercept_ = self.intercept_ - step_size
                
                loss_func_coef = -2 * np.dot((y_train[idx] - yhat), x_train[idx]) 
                step_size_coef = self.learning_rate * loss_func_coef
                self.coef_ = self.coef_ - step_size_coef
                
        # Print parameters after training for quick check
        print(self.intercept_)
        print(self.coef_)
                
    
    def predict(self, x_test):
        # Vectorized prediction for test data
        pred = self.intercept_ + np.dot(x_test, self.coef_)
        return pred
    

In [67]:
model2 = MiniBatchGD(learning_rate=0.01, epochs=100, batch_size=32)
model2.fit(x_train, y_train)

150.895005418002
[  49.10736088 -166.1637157   477.79185832  305.59559249  -47.89716648
  -94.49919422 -206.44714151  151.22152275  346.20352146  126.51327416]


In [68]:
print(lr.coef_)
print(lr.intercept_)

[  37.90402135 -241.96436231  542.42875852  347.70384391 -931.48884588
  518.06227698  163.41998299  275.31790158  736.1988589    48.67065743]
151.34560453985995


In [69]:
pred2 = model2.predict(x_test)
print(r2_score(y_test, pred2))

0.45757544381359205


In [86]:
sgd = SGDRegressor(loss='squared_error', max_iter=120, tol=1e-3, learning_rate='constant', eta0=0.1)

In [87]:
sgd.fit(x_train, y_train)
pred3 = sgd.predict(x_test)

In [88]:
print(r2_score(y_test, pred3))
print(np.mean(cross_val_score(sgd, X, y, cv=10)))

0.4618410369838265
0.44720584575357225
