In [22]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import Lasso as SKLasso, Ridge as SKRidge, ElasticNet as SKEnet
import numpy as np
from numpy import ndarray
from enum import Enum
from matplotlib import pyplot as plt

X = np.genfromtxt('easier_data.csv', delimiter=',')
y = np.genfromtxt('label.csv', delimiter=',')
y = y.reshape(-1, 1)

X_train, X_rest, y_train, y_rest = train_test_split(X, y, test_size=0.2, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_rest, y_rest, test_size=0.5, random_state=42)

# use standard scaler to normalize the data
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

X_train = PolynomialFeatures(degree=2, include_bias=True).fit_transform(X_train)
X_val = PolynomialFeatures(degree=2, include_bias=True).fit_transform(X_val)
X_test = PolynomialFeatures(degree=2, include_bias=True).fit_transform(X_test)

def ߜLasso(w: ndarray, Φ: ndarray, y: ndarray, λ: float) -> ndarray:
    """
    calcs the gradient of the Lasso loss function with respect to weights (w).
    """
    n, m = Φ.shape
    return 2/n * Φ.T.dot(Φ.dot(w) - y) + λ * np.sign(w) # if λ = 0, this is the gradient of the MSE loss function

def grad_lasso(Φ: ndarray, y: ndarray, α: float = 0.01, num_iter: int = 10_000, λ: float = 1) -> ndarray:
    """
    Performs gradient descent on Lasso Rregression Objective to find optimal w vector.


    Parameters:
    - Φ: ndarray, The feature matrix (Phi).
    - y: ndarray, The target values.
    - α: float, The learning rate.
    - num_iter: int, The number of training iterations.
    - λ: float, The regularization parameter.

    Returns:
    - ndarray: The optimized weights vector.
    """
    n, m = Φ.shape
    w = np.zeros((m, 1))
    for _ in range(num_iter):
        gradient = ߜLasso(w, Φ, y, λ=λ)  # gradient of lasso with respect to w
        
        #  convergence 
        if np.all(np.abs(gradient) < 1e-5) or np.isnan(gradient).any():
            break
            
       
        if np.isinf(gradient).any(): 
            raise ValueError("Gradient exploded")

        w -= α * gradient
    return w

def predict(Φ: ndarray, w: ndarray) -> ndarray:
    """
    Predicts the target values using the linear model.
    """
    return Φ.dot(w)

def mse(y: ndarray, y_hat: ndarray) -> float:
    """
    Calculates (MSE) between actual and predicted values.
    """
    return np.mean((y - y_hat)**2)


## MY GRAD DESCENT LASSO
w_lasso_gd = grad_lasso(X_train, y_train, λ=1)
pred_train = predict(X_train, w_lasso_gd)
print(f"Lasso Regression Train MSE: {mse(y_train, pred_train)}")
pred_val = predict(X_val, w_lasso_gd)
print(f"Lasso Regression Validation MSE: {mse(y_val, pred_val)}")


## SKLEARN'S LASSO
sk_poly_lasso = SKLasso(alpha=1)
sk_poly_lasso.fit(X_train,y_train.flatten()) # y is 2D, but scikit-learn expects 1D
pred_train = sk_poly_lasso.predict(X_train).reshape(-1,1)
print(f"SKLearn Lasso Train MSE: {mse(y_train, pred_train)}")
pred_val = sk_poly_lasso.predict(X_val).reshape(-1,1)
print(f"SKLearn Lasso Validation MSE: {mse(y_val, pred_val)}")

## USE VALIDATION - PICK BEST LAMBDA FOR LASSO
valid_lambdas = [0, 0.1, 1, 10] # possible lambda
best_lambda_lasso = None 
best_mse_lasso = float('inf') 
for λ in valid_lambdas:
    w_lasso_gd = grad_lasso(X_train, y_train, λ=λ) # train
    pred_val = predict(X_val, w_lasso_gd) # predict 
    mse_ = mse(y_val, pred_val)
    print(f"My Lasso Regression Validation MSE: {mse_} for lambda: {λ}")
    if mse_ < best_mse_lasso:
        best_mse_lasso = mse_
        best_lambda_lasso = λ
print(f"Best lambda: {best_lambda_lasso} with MSE: {best_mse_lasso}")

Lasso Regression Train MSE: 2.122974823830284
Lasso Regression Validation MSE: 2.36652082021916
SKLearn Lasso Train MSE: 6.831045121140056
SKLearn Lasso Validation MSE: 7.629002980184825
My Lasso Regression Validation MSE: 0.09295127686258224 for lambda: 0
My Lasso Regression Validation MSE: 0.09554581350427192 for lambda: 0.1
My Lasso Regression Validation MSE: 2.36652082021916 for lambda: 1
My Lasso Regression Validation MSE: 46.705761270943 for lambda: 10
Best lambda: 0 with MSE: 0.09295127686258224


In [23]:
def ߜRidge(w: ndarray, Φ: ndarray, y: ndarray, λ: float) -> ndarray:
    """
    Calc Gradient of the Ridge loss  with respect to weights (w).
    """
    n, m = Φ.shape
    return 2/n * Φ.T.dot(Φ.dot(w) - y) + 2 * λ * w # if λ = 0, this is the gradient of the MSE loss function

def gradient_descent_ridge(Φ: ndarray, y: ndarray, α: float = 0.01, num_iter: int = 10_000, λ: float = 1) -> ndarray:
    """
    gradient descent on Ridge Rregression Objective to find best w vector.
    """
    n, m = Φ.shape
    w = np.zeros((m, 1))
    for _ in range(num_iter):
        gradient = ߜRidge(w, Φ, y, λ=λ)  # Grad Ridge  with respect to w 
        
        # convergence 
        if np.all(np.abs(gradient) < 1e-5) or np.isnan(gradient).any():
            break
            
        
        if np.isinf(gradient).any(): 
            raise ValueError("Gradient exploded")

        w -= α * gradient
    return w

## MY GRAD DESCENT RIDGE
w_ridge_gd = gradient_descent_ridge(X_train, y_train, λ=1)
pred_train = predict(X_train, w_ridge_gd)
print(f"My Ridge Regression Train MSE: {mse(y_train, pred_train)}")
pred_val = predict(X_val, w_ridge_gd)
print(f"My Ridge Regression Validation MSE: {mse(y_val, pred_val)}")

## SKLEARNS RIDGE REGRESSION 
sk_poly_ridge = SKRidge(alpha=1)
sk_poly_ridge.fit(X_train,y_train.flatten()) # y is 2D, but scikit-learn expects 1D
pred_train = sk_poly_ridge.predict(X_train).reshape(-1,1)
print(f"SKLearn Ridge Train MSE: {mse(y_train, pred_train)}")
pred_val = sk_poly_ridge.predict(X_val).reshape(-1,1)
print(f"SKLearn Ridge Validation MSE: {mse(y_val, pred_val)}")

## PICK BEST LAMBDAS 
valid_lambdas = [0, 0.1, 1, 10] # possible values for lambda 
best_lambda_ridge = None # best lambda value
best_mse_ridge = float('inf') # best mse value
for λ in valid_lambdas:
    w_ridge_gd = gradient_descent_ridge(X_train, y_train, λ=λ) 
    pred_val = predict(X_val, w_ridge_gd) # predict 
    mse_ = mse(y_val, pred_val)
    print(f"My Ridge Regression Validation MSE: {mse_} for lambda: {λ}")
    if mse_ < best_mse_ridge:
        best_mse_ridge = mse_
        best_lambda_ridge = λ
print(f"Best lambda: {best_lambda_ridge} with MSE: {best_mse_ridge}")

My Ridge Regression Train MSE: 12.639576707863059
My Ridge Regression Validation MSE: 16.015452828238868
SKLearn Ridge Train MSE: 0.03284528040883187
SKLearn Ridge Validation MSE: 0.10389999417330299
My Ridge Regression Validation MSE: 0.09295127686258224 for lambda: 0
My Ridge Regression Validation MSE: 1.1402329224528367 for lambda: 0.1
My Ridge Regression Validation MSE: 16.015452828238868 for lambda: 1
My Ridge Regression Validation MSE: 42.66613532773383 for lambda: 10
Best lambda: 0 with MSE: 0.09295127686258224


In [24]:
def elastic_net(w: ndarray, Φ: ndarray, y: ndarray, λ: float, α: float) -> ndarray:
    """
    Calculate Gradient of the Elastic Net loss with respect to weights (w).
    """
    n, m = Φ.shape
    ridge_grad = 2/n * Φ.T.dot(Φ.dot(w) - y) + 2 * λ * w
    lasso_grad = λ * np.sign(w)
    return ridge_grad + α * lasso_grad

def gradient_descent_elastic_net(Φ: ndarray, y: ndarray, α: float = 0.01, num_iter: int = 10_000, λ: float = 1, l1_ratio: float = 0.5) -> ndarray:
    """
    Gradient descent on Elastic Net Regression Objective to find the best w vector.
    """
    n, m = Φ.shape
    w = np.zeros((m, 1))
    for _ in range(num_iter):
        gradient = elastic_net(w, Φ, y, λ, α)  # Gradient of Elastic Net with respect to w
        
        # Convergence
        if np.all(np.abs(gradient) < 1e-5) or np.isnan(gradient).any():
            break
            
        if np.isinf(gradient).any(): 
            raise ValueError("Gradient exploded")

        w -= α * gradient
    return w

# GRAD DESC ELASTIC NET
w_elastic_net_gd = gradient_descent_elastic_net(X_train, y_train, α=0.01, λ=1, l1_ratio=0.5)
pred_train = predict(X_train, w_elastic_net_gd)
print(f"My Elastic Net Regression Train MSE: {mse(y_train, pred_train)}")
pred_val = predict(X_val, w_elastic_net_gd)
print(f"My Elastic Net Regression Validation MSE: {mse(y_val, pred_val)}")

# Create an instance of the ElasticNet model
sk_elastic_net = SKEnet(alpha=1, l1_ratio=0.5)  # Set alpha (λ) and l1_ratio

sk_elastic_net.fit(X_train, y_train.flatten())  # y_train should be 1D

pred_train = sk_elastic_net.predict(X_train).reshape(-1, 1)
print(f"SKLearn Elastic Net Train MSE: {mse(y_train, pred_train)}")
pred_val = sk_elastic_net.predict(X_val).reshape(-1, 1)
print(f"SKLearn Elastic Net Validation MSE: {mse(y_val, pred_val)}")

## PICK BEST LAMBDA/ALPHA
valid_alphas = [0.1, 0.5, 1]  # possible values for alpha (λ)
valid_l1_ratios = [0.1, 0.5, 0.9]  # possible values for l1_ratio

best_alpha_elastic_net = None
best_l1_ratio_elastic_net = None
best_mse_elastic_net = float('inf')

for alpha in valid_alphas:
    for l1_ratio in valid_l1_ratios:
        sk_elastic_net = SKEnet(alpha=alpha, l1_ratio=l1_ratio)
        sk_elastic_net.fit(X_train, y_train.flatten()) 
        pred_val = sk_elastic_net.predict(X_val).reshape(-1, 1) 
        mse_val = mse(y_val, pred_val)
        print(f"Elastic Net Validation MSE: {mse_val} for alpha: {alpha} and l1_ratio: {l1_ratio}")
        
        if mse_val < best_mse_elastic_net:
            best_mse_elastic_net = mse_val
            best_alpha = alpha
            best_l1_ratio = l1_ratio

print(f"Best alpha: {best_alpha_elastic_net}, Best l1_ratio: {best_l1_ratio_elastic_net} with MSE: {best_mse_elastic_net}")


My Elastic Net Regression Train MSE: 12.683717066642759
My Elastic Net Regression Validation MSE: 15.999226190919423
SKLearn Elastic Net Train MSE: 10.59413111704921
SKLearn Elastic Net Validation MSE: 10.827746499666107
Elastic Net Validation MSE: 0.8537073116711 for alpha: 0.1 and l1_ratio: 0.1
Elastic Net Validation MSE: 0.30885498900871583 for alpha: 0.1 and l1_ratio: 0.5
Elastic Net Validation MSE: 0.17011228553396876 for alpha: 0.1 and l1_ratio: 0.9
Elastic Net Validation MSE: 7.216370675118712 for alpha: 0.5 and l1_ratio: 0.1
Elastic Net Validation MSE: 3.9952725054312244 for alpha: 0.5 and l1_ratio: 0.5
Elastic Net Validation MSE: 2.40985903418246 for alpha: 0.5 and l1_ratio: 0.9
Elastic Net Validation MSE: 14.472627966055745 for alpha: 1 and l1_ratio: 0.1
Elastic Net Validation MSE: 10.827746499666107 for alpha: 1 and l1_ratio: 0.5
Elastic Net Validation MSE: 8.447437223690004 for alpha: 1 and l1_ratio: 0.9
Best alpha: None, Best l1_ratio: None with MSE: 0.17011228553396876


In [34]:
mse_dict = {"Elastic Net": best_mse_elastic_net, "Lasso": best_mse_lasso, "Ridge": best_mse_ridge}

sorted_models = sorted(mse_dict.items(), key=lambda x: x[1])

print("Model Rankings based on MSE:")
for rank, (model, mse) in enumerate(sorted_models, start=1):
    print(f"{rank}. {model}: {mse}")

best_model, best_mse = sorted_models[0]

print(f"\nBest Model: {best_model} with MSE: {best_mse}")


Model Rankings based on MSE:
1. Lasso: 0.09295127686258224
2. Ridge: 0.09295127686258224
3. Elastic Net: 0.17011228553396876

Best Model: Lasso with MSE: 0.09295127686258224
