# LASSO with AIC/BIC/LOO-CV - Sparse signal

In [19]:
import numpy as np
from sklearn.linear_model import LassoCV, LassoLarsIC
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import LeaveOneOut
from scipy.stats import norm
import pandas as pd

# Parameters
n = 100
p_values = [10, 25, 50]
rho_values = [0, 0.25, 0.5]

# Function to generate covariance matrix
def generate_covariance_matrix(p, rho):
    return np.array([[rho ** abs(i-j) for j in range(p)] for i in range(p)])

# Function to generate beta_star for sparse signal
def generate_beta_star_sparse(p, n):
    return np.array([2 / np.sqrt(n) if j <= np.sqrt(p) else 0 for j in range(p)])

# Function to calculate sigma squared for R^2 = 0.8
def calculate_sigma_squared(X, beta_star):
    # Theoretical mean squared error when R^2 = 0.8
    mse_theoretical = np.var(np.dot(X, beta_star)) * 0.2
    return mse_theoretical

# Function to generate dataset
def generate_dataset(n, p, rho, beta_star):
    Sigma = generate_covariance_matrix(p, rho)
    X = np.random.multivariate_normal(np.zeros(p), Sigma, n)
    epsilon = np.random.normal(0, np.sqrt(calculate_sigma_squared(X, beta_star)), n)
    Y = np.dot(X, beta_star) + epsilon
    return X, Y

def calculate_beta_star_dense(p, n):
    return np.array([5 / (j * np.sqrt(n)) for j in range(1, p+1)])

In [29]:
import numpy as np
from sklearn.linear_model import LassoCV, LassoLarsIC
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import LeaveOneOut
from scipy.stats import norm

# Parameters
n = 100
p_values = [10, 25, 50]
rho_values = [0, 0.25, 0.5]

# Function to generate covariance matrix
def generate_covariance_matrix(p, rho):
    return np.array([[rho ** abs(i-j) for j in range(p)] for i in range(p)])

# Function to generate beta_star for sparse signal
def generate_beta_star_sparse(p, n):
    return np.array([2 / np.sqrt(n) if j <= np.sqrt(p) else 0 for j in range(p)])

# Function to calculate sigma squared for R^2 = 0.8
def calculate_sigma_squared(X, beta_star):
    # Theoretical mean squared error when R^2 = 0.8
    mse_theoretical = np.var(np.dot(X, beta_star)) * 0.2
    return mse_theoretical

# Function to generate dataset
def generate_dataset(n, p, rho, beta_star):
    Sigma = generate_covariance_matrix(p, rho)
    X = np.random.multivariate_normal(np.zeros(p), Sigma, n)
    epsilon = np.random.normal(0, np.sqrt(calculate_sigma_squared(X, beta_star)), n)
    Y = np.dot(X, beta_star) + epsilon
    return X, Y

# Function to perform Lasso regression with different tuning methods
def lasso_regression(X, Y, method):
    if method == "AIC" or method == "BIC":
        model = LassoLarsIC(criterion=method.lower())
    elif method == "LOO-CV":
        model = LassoCV(cv=LeaveOneOut())
    else:
        raise ValueError("Invalid tuning method")

    model.fit(X, Y)
    return model

# Initializing the results dictionary
results = {}

# Loop over different p and rho values for sparse signal
for p in p_values:
    for rho in rho_values:
        # Generate beta_star for sparse signal
        beta_star = generate_beta_star_sparse(p, n)
        # Initializing MSE for each method
        mse_values = {"AIC": [], "BIC": [], "LOO-CV": []}

        # Generate 1000 datasets and compute MSE for each method
        for _ in range(1000):
            X, Y = generate_dataset(n, p, rho, beta_star)
            for method in mse_values.keys():
                model = lasso_regression(X, Y, method)
                Y_pred = model.predict(X)
                mse = mean_squared_error(Y, Y_pred)
                mse_values[method].append(mse)

        # Compute average MSE
        for method in mse_values:
            avg_mse = np.mean(mse_values[method])
            results[(p, rho, method, "Lasso Sparse")] = avg_mse

results



{(10, 0, 'AIC', 'Lasso Sparse'): 0.028817916288540975,
 (10, 0, 'BIC', 'Lasso Sparse'): 0.030237729934784175,
 (10, 0, 'LOO-CV', 'Lasso Sparse'): 0.028833133926095676,
 (10, 0.25, 'AIC', 'Lasso Sparse'): 0.041783760271269205,
 (10, 0.25, 'BIC', 'Lasso Sparse'): 0.043554568726203446,
 (10, 0.25, 'LOO-CV', 'Lasso Sparse'): 0.04188944544642628,
 (10, 0.5, 'AIC', 'Lasso Sparse'): 0.05990435049153011,
 (10, 0.5, 'BIC', 'Lasso Sparse'): 0.06197699763843538,
 (10, 0.5, 'LOO-CV', 'Lasso Sparse'): 0.0600960649325008,
 (25, 0, 'AIC', 'Lasso Sparse'): 0.03984928283237992,
 (25, 0, 'BIC', 'Lasso Sparse'): 0.045480790585938506,
 (25, 0, 'LOO-CV', 'Lasso Sparse'): 0.04007638803938852,
 (25, 0.25, 'AIC', 'Lasso Sparse'): 0.06079213570872582,
 (25, 0.25, 'BIC', 'Lasso Sparse'): 0.06860896613269651,
 (25, 0.25, 'LOO-CV', 'Lasso Sparse'): 0.061423226630409514,
 (25, 0.5, 'AIC', 'Lasso Sparse'): 0.09428190745943214,
 (25, 0.5, 'BIC', 'Lasso Sparse'): 0.10470471723688196,
 (25, 0.5, 'LOO-CV', 'Lasso Spars

In [31]:
import pandas as pd
data_list = [(key[0], key[1], key[2], key[3], value) for key, value in results.items()]

# Create a DataFrame
df = pd.DataFrame(data_list, columns=['p', 'rho', 'Method', 'Estimator&Signal', 'Average MSE'])

df

df.to_csv("lasso_sparse.csv", index=False)

# LASSO with AIC/BIC/LOO-CV - Dense signal

In [3]:
# Function to generate beta_star for dense signal
def calculate_beta_star_dense(p, n):
    return np.array([5 / (j * np.sqrt(n)) for j in range(1, p+1)])

results1 = {}
  # Loop over different p and rho values for sparse signal
for p in p_values:
    for rho in rho_values:
        # Generate beta_star for sparse signal
        beta_star = calculate_beta_star_dense(p, n)
        # Initializing MSE for each method
        mse_values = {"AIC": [], "BIC": [], "LOO-CV": []}

        # Generate 3 datasets and compute MSE for each method
        for _ in range(1000):
            X, Y = generate_dataset(n, p, rho, beta_star)
            for method in mse_values.keys():
                model = lasso_regression(X, Y, method)
                Y_pred = model.predict(X)
                mse = mean_squared_error(Y, Y_pred)
                mse_values[method].append(mse)

        # Compute average MSE
        for method in mse_values:
            avg_mse = np.mean(mse_values[method])
            results1[(p, rho, method, "Lasso Dense")] = avg_mse

results1


{(10, 0, 'AIC', 'Lasso Dense'): 0.06843325489274954,
 (10, 0, 'BIC', 'Lasso Dense'): 0.06924546507220992,
 (10, 0, 'LOO-CV', 'Lasso Dense'): 0.06840818685957752,
 (10, 0.25, 'AIC', 'Lasso Dense'): 0.092734494895623,
 (10, 0.25, 'BIC', 'Lasso Dense'): 0.0939706431152333,
 (10, 0.25, 'LOO-CV', 'Lasso Dense'): 0.09265891400157583,
 (10, 0.5, 'AIC', 'Lasso Dense'): 0.13359343512145738,
 (10, 0.5, 'BIC', 'Lasso Dense'): 0.13536340478899114,
 (10, 0.5, 'LOO-CV', 'Lasso Dense'): 0.13351569288309795,
 (25, 0, 'AIC', 'Lasso Dense'): 0.06250763296010298,
 (25, 0, 'BIC', 'Lasso Dense'): 0.07670720135094365,
 (25, 0, 'LOO-CV', 'Lasso Dense'): 0.06261819374033475,
 (25, 0.25, 'AIC', 'Lasso Dense'): 0.08620318865614146,
 (25, 0.25, 'BIC', 'Lasso Dense'): 0.10296311032328102,
 (25, 0.25, 'LOO-CV', 'Lasso Dense'): 0.08643794070749318,
 (25, 0.5, 'AIC', 'Lasso Dense'): 0.1246597171447522,
 (25, 0.5, 'BIC', 'Lasso Dense'): 0.14236940857278718,
 (25, 0.5, 'LOO-CV', 'Lasso Dense'): 0.12503759292208885,
 (

In [26]:
data_list = [(key[0], key[1], key[2], key[3],value) for key, value in results1.items()]

# Create a DataFrame
df1 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method', 'Estimator&Signal','Average MSE'])

print(df1)

df1.to_csv('lasso_dense1.csv', index=False)

     p   rho  Method Estimator&Signal  Average MSE
0   10  0.00     AIC      Lasso Dense     0.068433
1   10  0.00     BIC      Lasso Dense     0.069245
2   10  0.00  LOO-CV      Lasso Dense     0.068408
3   10  0.25     AIC      Lasso Dense     0.092734
4   10  0.25     BIC      Lasso Dense     0.093971
5   10  0.25  LOO-CV      Lasso Dense     0.092659
6   10  0.50     AIC      Lasso Dense     0.133593
7   10  0.50     BIC      Lasso Dense     0.135363
8   10  0.50  LOO-CV      Lasso Dense     0.133516
9   25  0.00     AIC      Lasso Dense     0.062508
10  25  0.00     BIC      Lasso Dense     0.076707
11  25  0.00  LOO-CV      Lasso Dense     0.062618
12  25  0.25     AIC      Lasso Dense     0.086203
13  25  0.25     BIC      Lasso Dense     0.102963
14  25  0.25  LOO-CV      Lasso Dense     0.086438
15  25  0.50     AIC      Lasso Dense     0.124660
16  25  0.50     BIC      Lasso Dense     0.142369
17  25  0.50  LOO-CV      Lasso Dense     0.125038
18  50  0.00     AIC      Lasso

# Adaptive LASSO with AIC/BIC/LOO-CV - Sparse signal

In [4]:
from sklearn.linear_model import Lasso, LinearRegression

# Function for fitting adaptive Lasso
def adaptive_lasso_regression(X, Y, method):
    # Initial fit using ordinary least squares
    ols = LinearRegression()
    ols.fit(X, Y)
    initial_coef = ols.coef_

    # Weights for the adaptive lasso
    weights = 1 / (np.abs(initial_coef) + 1e-5)  # adding a small constant to avoid division by zero

    # Adaptive Lasso model
    if method == "AIC" or method == "BIC":
        model = LassoLarsIC(criterion=method.lower(), fit_intercept=True)
    elif method == "LOO-CV":
        model = LassoCV(alphas=1 / (n * weights), cv=LeaveOneOut(), fit_intercept=True)
    else:
        raise ValueError("Invalid tuning method")

    model.fit(X, Y)
    return model

# Initializing results for adaptive Lasso
results_adaptive_lasso = {}

# Loop over p and rho values for sparse signal and perform adaptive Lasso
for p in p_values:
    for rho in rho_values:
        # Generate beta_star for sparse signal
        beta_star_sparse = generate_beta_star_sparse(p, n)
        # Initializing MSE for each method
        mse_values_adaptive = {"AIC": [], "BIC": [], "LOO-CV": []}

        # Generate 3 datasets and compute MSE for each method
        for _ in range(1000):
            X, Y = generate_dataset(n, p, rho, beta_star_sparse)
            for method in mse_values_adaptive.keys():
                model_adaptive = adaptive_lasso_regression(X, Y, method)
                Y_pred_adaptive = model_adaptive.predict(X)
                mse_adaptive = mean_squared_error(Y, Y_pred_adaptive)
                mse_values_adaptive[method].append(mse_adaptive)

        # Compute average MSE for adaptive Lasso
        for method in mse_values_adaptive:
            avg_mse_adaptive = np.mean(mse_values_adaptive[method])
            results_adaptive_lasso[(p, rho, method, "Adaptive Lasso Sparse")] = avg_mse_adaptive

results_adaptive_lasso



{(10, 0, 'AIC', 'Adaptive Lasso Sparse'): 0.028903960317075716,
 (10, 0, 'BIC', 'Adaptive Lasso Sparse'): 0.030270764001891336,
 (10, 0, 'LOO-CV', 'Adaptive Lasso Sparse'): 0.028152115967034902,
 (10, 0.25, 'AIC', 'Adaptive Lasso Sparse'): 0.04161193924525888,
 (10, 0.25, 'BIC', 'Adaptive Lasso Sparse'): 0.04324451397727686,
 (10, 0.25, 'LOO-CV', 'Adaptive Lasso Sparse'): 0.04041414840669128,
 (10, 0.5, 'AIC', 'Adaptive Lasso Sparse'): 0.05991049289160909,
 (10, 0.5, 'BIC', 'Adaptive Lasso Sparse'): 0.062093490995061326,
 (10, 0.5, 'LOO-CV', 'Adaptive Lasso Sparse'): 0.05800851480176078,
 (25, 0, 'AIC', 'Adaptive Lasso Sparse'): 0.04008790799873899,
 (25, 0, 'BIC', 'Adaptive Lasso Sparse'): 0.04602742054451062,
 (25, 0, 'LOO-CV', 'Adaptive Lasso Sparse'): 0.03553011671026563,
 (25, 0.25, 'AIC', 'Adaptive Lasso Sparse'): 0.06105624006282663,
 (25, 0.25, 'BIC', 'Adaptive Lasso Sparse'): 0.06913259253757056,
 (25, 0.25, 'LOO-CV', 'Adaptive Lasso Sparse'): 0.0533917023695676,
 (25, 0.5, 'A

In [27]:
data_list = [(key[0], key[1], key[2], key[3],value) for key, value in results_adaptive_lasso.items()]

# Create a DataFrame
df2 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method', 'Estimator&Signal','Average MSE'])

print(df2)

df2.to_csv('adaptive_lasso_sparse.csv', index=False)

     p   rho  Method       Estimator&Signal  Average MSE
0   10  0.00     AIC  Adaptive Lasso Sparse     0.028904
1   10  0.00     BIC  Adaptive Lasso Sparse     0.030271
2   10  0.00  LOO-CV  Adaptive Lasso Sparse     0.028152
3   10  0.25     AIC  Adaptive Lasso Sparse     0.041612
4   10  0.25     BIC  Adaptive Lasso Sparse     0.043245
5   10  0.25  LOO-CV  Adaptive Lasso Sparse     0.040414
6   10  0.50     AIC  Adaptive Lasso Sparse     0.059910
7   10  0.50     BIC  Adaptive Lasso Sparse     0.062093
8   10  0.50  LOO-CV  Adaptive Lasso Sparse     0.058009
9   25  0.00     AIC  Adaptive Lasso Sparse     0.040088
10  25  0.00     BIC  Adaptive Lasso Sparse     0.046027
11  25  0.00  LOO-CV  Adaptive Lasso Sparse     0.035530
12  25  0.25     AIC  Adaptive Lasso Sparse     0.061056
13  25  0.25     BIC  Adaptive Lasso Sparse     0.069133
14  25  0.25  LOO-CV  Adaptive Lasso Sparse     0.053392
15  25  0.50     AIC  Adaptive Lasso Sparse     0.095659
16  25  0.50     BIC  Adaptive 

# Adaptive LASSO with AIC/BIC/LOO-CV - Dense signal

In [6]:
# Initializing the results dictionary for dense adaptive Lasso
results_adaptive_dense = {}

# Loop over p and rho values for sparse signal and perform adaptive Lasso
for p in p_values:
    for rho in rho_values:
        # Generate beta_star for sparse signal
        beta_star_sparse = calculate_beta_star_dense(p, n)
        # Initializing MSE for each method
        mse_values_adaptive = {"AIC": [], "BIC": [], "LOO-CV": []}

        # Generate 3 datasets and compute MSE for each method
        for _ in range(1000):
            X, Y = generate_dataset(n, p, rho, beta_star_sparse)
            for method in mse_values_adaptive.keys():
                model_adaptive = adaptive_lasso_regression(X, Y, method)
                Y_pred_adaptive = model_adaptive.predict(X)
                mse_adaptive = mean_squared_error(Y, Y_pred_adaptive)
                mse_values_adaptive[method].append(mse_adaptive)

        # Compute average MSE for adaptive Lasso
        for method in mse_values_adaptive:
            avg_mse_adaptive = np.mean(mse_values_adaptive[method])
            results_adaptive_dense[(p, rho, method, "Adaptive Lasso Dense")] = avg_mse_adaptive


results_adaptive_dense



{(10, 0, 'AIC', 'Adaptive Lasso Dense'): 0.06876307582765796,
 (10, 0, 'BIC', 'Adaptive Lasso Dense'): 0.06946550638105377,
 (10, 0, 'LOO-CV', 'Adaptive Lasso Dense'): 0.06863576132156572,
 (10, 0.25, 'AIC', 'Adaptive Lasso Dense'): 0.09249413756325811,
 (10, 0.25, 'BIC', 'Adaptive Lasso Dense'): 0.09357687414249127,
 (10, 0.25, 'LOO-CV', 'Adaptive Lasso Dense'): 0.0922574269800926,
 (10, 0.5, 'AIC', 'Adaptive Lasso Dense'): 0.12990195839800323,
 (10, 0.5, 'BIC', 'Adaptive Lasso Dense'): 0.1317102865915802,
 (10, 0.5, 'LOO-CV', 'Adaptive Lasso Dense'): 0.12930201394826327,
 (25, 0, 'AIC', 'Adaptive Lasso Dense'): 0.06224372109014154,
 (25, 0, 'BIC', 'Adaptive Lasso Dense'): 0.07707592366999252,
 (25, 0, 'LOO-CV', 'Adaptive Lasso Dense'): 0.05943629484141189,
 (25, 0.25, 'AIC', 'Adaptive Lasso Dense'): 0.085660237992829,
 (25, 0.25, 'BIC', 'Adaptive Lasso Dense'): 0.10210454171644001,
 (25, 0.25, 'LOO-CV', 'Adaptive Lasso Dense'): 0.08085046861389135,
 (25, 0.5, 'AIC', 'Adaptive Lasso D

In [28]:
data_list = [(key[0], key[1], key[2], key[3], value) for key, value in results_adaptive_dense.items()]

# Create a DataFrame
df3 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method','Estimator&Signal', 'Average MSE'])

print(df3)

df3.to_csv('adaptive_lasso_dense.csv', index=False)

     p   rho  Method      Estimator&Signal  Average MSE
0   10  0.00     AIC  Adaptive Lasso Dense     0.068763
1   10  0.00     BIC  Adaptive Lasso Dense     0.069466
2   10  0.00  LOO-CV  Adaptive Lasso Dense     0.068636
3   10  0.25     AIC  Adaptive Lasso Dense     0.092494
4   10  0.25     BIC  Adaptive Lasso Dense     0.093577
5   10  0.25  LOO-CV  Adaptive Lasso Dense     0.092257
6   10  0.50     AIC  Adaptive Lasso Dense     0.129902
7   10  0.50     BIC  Adaptive Lasso Dense     0.131710
8   10  0.50  LOO-CV  Adaptive Lasso Dense     0.129302
9   25  0.00     AIC  Adaptive Lasso Dense     0.062244
10  25  0.00     BIC  Adaptive Lasso Dense     0.077076
11  25  0.00  LOO-CV  Adaptive Lasso Dense     0.059436
12  25  0.25     AIC  Adaptive Lasso Dense     0.085660
13  25  0.25     BIC  Adaptive Lasso Dense     0.102105
14  25  0.25  LOO-CV  Adaptive Lasso Dense     0.080850
15  25  0.50     AIC  Adaptive Lasso Dense     0.124868
16  25  0.50     BIC  Adaptive Lasso Dense     0

# Ridge with AIC/BIC/LOO-CV - Sparse signal

In [11]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
import numpy as np


# Function to estimate the effective degrees of freedom for Ridge
def estimate_degrees_of_freedom(X, alpha):
    U, S, Vt = np.linalg.svd(X, full_matrices=False)
    d = S / (S**2 + alpha)
    return np.sum(d**2)

# Function to calculate AIC and BIC for Ridge regression
def calculate_aic_bic(Y, Y_pred, alpha, X):
    rss = np.sum((Y - Y_pred) ** 2)
    n = len(Y)
    dof = estimate_degrees_of_freedom(X, alpha)
    aic = n * np.log(rss / n) + 2 * dof
    bic = n * np.log(rss / n) + np.log(n) * dof
    return aic, bic

# Function to perform Ridge regression with AIC or BIC tuning
def ridge_regression(X, Y, alphas, tuning_method):
    best_model = None
    best_score = np.inf

    for alpha in alphas:
        model = Ridge(alpha=alpha)
        model.fit(X, Y)
        Y_pred = model.predict(X)
        aic, bic = calculate_aic_bic(Y, Y_pred, alpha, X)
        score = aic if tuning_method == "AIC" else bic

        if score < best_score:
            best_score = score
            best_model = model

    return best_model

# Alphas for Ridge grid search
alphas = np.logspace(-3, 3, 100)

# Initializing the results dictionary for sparse Ridge
results_sparse_ridge = {}

# Loop over different p and rho values for sparse Ridge
for p in p_values:
    for rho in rho_values:
        beta_star_sparse = generate_beta_star_sparse(p, n)
        mse_values_ridge = {"AIC": [], "BIC": []}

        for _ in range(1000):
            X_ridge, Y_ridge = generate_dataset(n, p, rho, beta_star_sparse)

            for method in mse_values_ridge.keys():
                model_ridge = ridge_regression(X_ridge, Y_ridge, alphas, method)
                Y_pred_ridge = model_ridge.predict(X_ridge)
                mse_ridge = mean_squared_error(Y_ridge, Y_pred_ridge)
                mse_values_ridge[method].append(mse_ridge)

        # Compute average MSE for Ridge
        for method in mse_values_ridge:
            avg_mse_ridge = np.mean(mse_values_ridge[method])
            results_sparse_ridge[(p, rho, method, "Ridge Sparse")] = avg_mse_ridge

results_sparse_ridge



{(10, 0, 'AIC', 'Ridge Sparse'): 0.028356492357029514,
 (10, 0, 'BIC', 'Ridge Sparse'): 0.028356657464714587,
 (10, 0.25, 'AIC', 'Ridge Sparse'): 0.041035609943821556,
 (10, 0.25, 'BIC', 'Ridge Sparse'): 0.0410364707722439,
 (10, 0.5, 'AIC', 'Ridge Sparse'): 0.05821840153275188,
 (10, 0.5, 'BIC', 'Ridge Sparse'): 0.058227617065165244,
 (25, 0, 'AIC', 'Ridge Sparse'): 0.03525392055313876,
 (25, 0, 'BIC', 'Ridge Sparse'): 0.0352564612501253,
 (25, 0.25, 'AIC', 'Ridge Sparse'): 0.05376847319605619,
 (25, 0.25, 'BIC', 'Ridge Sparse'): 0.053781329403454405,
 (25, 0.5, 'AIC', 'Ridge Sparse'): 0.08296448293942331,
 (25, 0.5, 'BIC', 'Ridge Sparse'): 0.08306401251518472,
 (50, 0, 'AIC', 'Ridge Sparse'): 0.03108603955481364,
 (50, 0, 'BIC', 'Ridge Sparse'): 0.031117472479003906,
 (50, 0.25, 'AIC', 'Ridge Sparse'): 0.04814015545471513,
 (50, 0.25, 'BIC', 'Ridge Sparse'): 0.04824581768464309,
 (50, 0.5, 'AIC', 'Ridge Sparse'): 0.07762461114375446,
 (50, 0.5, 'BIC', 'Ridge Sparse'): 0.0780918850280

In [12]:
data_list = [(key[0], key[1], key[2], key[3], value) for key, value in results_sparse_ridge.items()]

# Create a DataFrame
df4_1 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method','Estimator&Signal', 'Average MSE'])

print(df4_1)

     p   rho Method Estimator&Signal  Average MSE
0   10  0.00    AIC     Ridge Sparse     0.028356
1   10  0.00    BIC     Ridge Sparse     0.028357
2   10  0.25    AIC     Ridge Sparse     0.041036
3   10  0.25    BIC     Ridge Sparse     0.041036
4   10  0.50    AIC     Ridge Sparse     0.058218
5   10  0.50    BIC     Ridge Sparse     0.058228
6   25  0.00    AIC     Ridge Sparse     0.035254
7   25  0.00    BIC     Ridge Sparse     0.035256
8   25  0.25    AIC     Ridge Sparse     0.053768
9   25  0.25    BIC     Ridge Sparse     0.053781
10  25  0.50    AIC     Ridge Sparse     0.082964
11  25  0.50    BIC     Ridge Sparse     0.083064
12  50  0.00    AIC     Ridge Sparse     0.031086
13  50  0.00    BIC     Ridge Sparse     0.031117
14  50  0.25    AIC     Ridge Sparse     0.048140
15  50  0.25    BIC     Ridge Sparse     0.048246
16  50  0.50    AIC     Ridge Sparse     0.077625
17  50  0.50    BIC     Ridge Sparse     0.078092


In [19]:
from sklearn.linear_model import RidgeCV

# Function to perform Ridge regression with LOO-CV
def ridge_regression_loocv(X, Y):
    model = RidgeCV(cv=LeaveOneOut(), fit_intercept=True)
    model.fit(X, Y)
    return model

# Initializing the results dictionary for sparse Ridge with LOO-CV
results_ridge_sparse_loocv = {}

# Loop over different p and rho values for sparse Ridge with LOO-CV
for p in p_values:
    for rho in rho_values:
        beta_star_sparse = generate_beta_star_sparse(p, n)
        mse_values_ridge_loocv = []

        for _ in range(1000):
            X_ridge, Y_ridge = generate_dataset(n, p, rho, beta_star_sparse)
            model_ridge_loocv = ridge_regression_loocv(X_ridge, Y_ridge)
            Y_pred_ridge = model_ridge_loocv.predict(X_ridge)
            mse_ridge_loocv = mean_squared_error(Y_ridge, Y_pred_ridge)
            mse_values_ridge_loocv.append(mse_ridge_loocv)

        # Compute average MSE for Ridge with LOO-CV
        avg_mse_ridge_loocv = np.mean(mse_values_ridge_loocv)
        results_ridge_sparse_loocv[(p, rho, "LOO-CV", "Ridge Sparse")] = avg_mse_ridge_loocv

results_ridge_sparse_loocv



[1;30;43mStreaming output truncated to the last 5000 lines.[0m


{(10, 0, 'LOO-CV', 'Ridge Sparse'): 0.028408511443536167,
 (10, 0.25, 'LOO-CV', 'Ridge Sparse'): 0.040676255632760505,
 (10, 0.5, 'LOO-CV', 'Ridge Sparse'): 0.05831606754435595,
 (25, 0, 'LOO-CV', 'Ridge Sparse'): 0.03561544087935016,
 (25, 0.25, 'LOO-CV', 'Ridge Sparse'): 0.053443078484835024,
 (25, 0.5, 'LOO-CV', 'Ridge Sparse'): 0.08244931769317865,
 (50, 0, 'LOO-CV', 'Ridge Sparse'): 0.030862269416478516,
 (50, 0.25, 'LOO-CV', 'Ridge Sparse'): 0.04889395053305507,
 (50, 0.5, 'LOO-CV', 'Ridge Sparse'): 0.07827559428941078}

In [20]:
data_list = [(key[0], key[1], key[2], key[3], value) for key, value in results_ridge_sparse_loocv.items()]

# Create a DataFrame
df4_2 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method','Estimator&Signal', 'Average MSE'])

print(df4_2)

    p   rho  Method Estimator&Signal  Average MSE
0  10  0.00  LOO-CV     Ridge Sparse     0.028409
1  10  0.25  LOO-CV     Ridge Sparse     0.040676
2  10  0.50  LOO-CV     Ridge Sparse     0.058316
3  25  0.00  LOO-CV     Ridge Sparse     0.035615
4  25  0.25  LOO-CV     Ridge Sparse     0.053443
5  25  0.50  LOO-CV     Ridge Sparse     0.082449
6  50  0.00  LOO-CV     Ridge Sparse     0.030862
7  50  0.25  LOO-CV     Ridge Sparse     0.048894
8  50  0.50  LOO-CV     Ridge Sparse     0.078276


In [22]:
df4 = pd.concat([df4_1, df4_2], ignore_index=True)

print(df4)

df4.to_csv('ridge_sparse.csv', index=False)

     p   rho  Method Estimator&Signal  Average MSE
0   10  0.00     AIC     Ridge Sparse     0.028356
1   10  0.00     BIC     Ridge Sparse     0.028357
2   10  0.25     AIC     Ridge Sparse     0.041036
3   10  0.25     BIC     Ridge Sparse     0.041036
4   10  0.50     AIC     Ridge Sparse     0.058218
5   10  0.50     BIC     Ridge Sparse     0.058228
6   25  0.00     AIC     Ridge Sparse     0.035254
7   25  0.00     BIC     Ridge Sparse     0.035256
8   25  0.25     AIC     Ridge Sparse     0.053768
9   25  0.25     BIC     Ridge Sparse     0.053781
10  25  0.50     AIC     Ridge Sparse     0.082964
11  25  0.50     BIC     Ridge Sparse     0.083064
12  50  0.00     AIC     Ridge Sparse     0.031086
13  50  0.00     BIC     Ridge Sparse     0.031117
14  50  0.25     AIC     Ridge Sparse     0.048140
15  50  0.25     BIC     Ridge Sparse     0.048246
16  50  0.50     AIC     Ridge Sparse     0.077625
17  50  0.50     BIC     Ridge Sparse     0.078092
18  10  0.00  LOO-CV     Ridge 

# Ridge with AIC/BIC/LOO-CV - Dense signal

In [29]:
# Alphas for Ridge grid search
alphas = np.logspace(-3, 3, 100)

# Initializing the results dictionary for sparse Ridge
results_dense_ridge = {}

# Loop over different p and rho values for sparse Ridge
for p in p_values:
    for rho in rho_values:
        beta_star_sparse = calculate_beta_star_dense(p, n)
        mse_values_ridge = {"AIC": [], "BIC": []}

        for _ in range(1000):
            X_ridge, Y_ridge = generate_dataset(n, p, rho, beta_star_sparse)

            for method in mse_values_ridge.keys():
                model_ridge = ridge_regression(X_ridge, Y_ridge, alphas, method)
                Y_pred_ridge = model_ridge.predict(X_ridge)
                mse_ridge = mean_squared_error(Y_ridge, Y_pred_ridge)
                mse_values_ridge[method].append(mse_ridge)

        # Compute average MSE for Ridge
        for method in mse_values_ridge:
            avg_mse_ridge = np.mean(mse_values_ridge[method])
            results_dense_ridge[(p, rho, method, "Ridge Dense")] = avg_mse_ridge

results_dense_ridge

{(10, 0, 'AIC', 'Ridge Dense'): 0.06884924226333453,
 (10, 0, 'BIC', 'Ridge Dense'): 0.0688496469113973,
 (10, 0.25, 'AIC', 'Ridge Dense'): 0.092749817367798,
 (10, 0.25, 'BIC', 'Ridge Dense'): 0.09275147668034019,
 (10, 0.5, 'AIC', 'Ridge Dense'): 0.13085160681942917,
 (10, 0.5, 'BIC', 'Ridge Dense'): 0.13086863308880195,
 (25, 0, 'AIC', 'Ridge Dense'): 0.05895463384516501,
 (25, 0, 'BIC', 'Ridge Dense'): 0.05895878296066749,
 (25, 0.25, 'AIC', 'Ridge Dense'): 0.08081414263464948,
 (25, 0.25, 'BIC', 'Ridge Dense'): 0.08083019608875602,
 (25, 0.5, 'AIC', 'Ridge Dense'): 0.11710490001875373,
 (25, 0.5, 'BIC', 'Ridge Dense'): 0.11722337970019404,
 (50, 0, 'AIC', 'Ridge Dense'): 0.03976090143783883,
 (50, 0, 'BIC', 'Ridge Dense'): 0.0398012998008116,
 (50, 0.25, 'AIC', 'Ridge Dense'): 0.05399684245630557,
 (50, 0.25, 'BIC', 'Ridge Dense'): 0.054101660325607814,
 (50, 0.5, 'AIC', 'Ridge Dense'): 0.07793806637158077,
 (50, 0.5, 'BIC', 'Ridge Dense'): 0.07836810930017557}

In [30]:
data_list = [(key[0], key[1], key[2], key[3], value) for key, value in results_dense_ridge.items()]

# Create a DataFrame
df5_1 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method','Estimator&Signal', 'Average MSE'])

print(df5_1)

     p   rho Method Estimator&Signal  Average MSE
0   10  0.00    AIC      Ridge Dense     0.068849
1   10  0.00    BIC      Ridge Dense     0.068850
2   10  0.25    AIC      Ridge Dense     0.092750
3   10  0.25    BIC      Ridge Dense     0.092751
4   10  0.50    AIC      Ridge Dense     0.130852
5   10  0.50    BIC      Ridge Dense     0.130869
6   25  0.00    AIC      Ridge Dense     0.058955
7   25  0.00    BIC      Ridge Dense     0.058959
8   25  0.25    AIC      Ridge Dense     0.080814
9   25  0.25    BIC      Ridge Dense     0.080830
10  25  0.50    AIC      Ridge Dense     0.117105
11  25  0.50    BIC      Ridge Dense     0.117223
12  50  0.00    AIC      Ridge Dense     0.039761
13  50  0.00    BIC      Ridge Dense     0.039801
14  50  0.25    AIC      Ridge Dense     0.053997
15  50  0.25    BIC      Ridge Dense     0.054102
16  50  0.50    AIC      Ridge Dense     0.077938
17  50  0.50    BIC      Ridge Dense     0.078368


In [31]:
# Initializing the results dictionary for sparse Ridge with LOO-CV
results_ridge_dense_loocv = {}

# Loop over different p and rho values for sparse Ridge with LOO-CV
for p in p_values:
    for rho in rho_values:
        beta_star_sparse = calculate_beta_star_dense(p, n)
        mse_values_ridge_loocv = []

        for _ in range(1000):
            X_ridge, Y_ridge = generate_dataset(n, p, rho, beta_star_sparse)
            model_ridge_loocv = ridge_regression_loocv(X_ridge, Y_ridge)
            Y_pred_ridge = model_ridge_loocv.predict(X_ridge)
            mse_ridge_loocv = mean_squared_error(Y_ridge, Y_pred_ridge)
            mse_values_ridge_loocv.append(mse_ridge_loocv)

        # Compute average MSE for Ridge with LOO-CV
        avg_mse_ridge_loocv = np.mean(mse_values_ridge_loocv)
        results_ridge_dense_loocv[(p, rho, "LOO-CV", "Ridge Dense")] = avg_mse_ridge_loocv

results_ridge_dense_loocv


[1;30;43mStreaming output truncated to the last 5000 lines.[0m


{(10, 0, 'LOO-CV', 'Ridge Dense'): 0.06870390835459086,
 (10, 0.25, 'LOO-CV', 'Ridge Dense'): 0.09159423380302754,
 (10, 0.5, 'LOO-CV', 'Ridge Dense'): 0.13127345619687883,
 (25, 0, 'LOO-CV', 'Ridge Dense'): 0.05871135996188535,
 (25, 0.25, 'LOO-CV', 'Ridge Dense'): 0.08086550667844816,
 (25, 0.5, 'LOO-CV', 'Ridge Dense'): 0.11453132351732757,
 (50, 0, 'LOO-CV', 'Ridge Dense'): 0.039503408438187566,
 (50, 0.25, 'LOO-CV', 'Ridge Dense'): 0.05341316653834592,
 (50, 0.5, 'LOO-CV', 'Ridge Dense'): 0.07974412584555665}

In [32]:
data_list = [(key[0], key[1], key[2], key[3], value) for key, value in results_ridge_dense_loocv.items()]

# Create a DataFrame
df5_2 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method','Estimator&Signal', 'Average MSE'])

print(df5_2)

    p   rho  Method Estimator&Signal  Average MSE
0  10  0.00  LOO-CV      Ridge Dense     0.068704
1  10  0.25  LOO-CV      Ridge Dense     0.091594
2  10  0.50  LOO-CV      Ridge Dense     0.131273
3  25  0.00  LOO-CV      Ridge Dense     0.058711
4  25  0.25  LOO-CV      Ridge Dense     0.080866
5  25  0.50  LOO-CV      Ridge Dense     0.114531
6  50  0.00  LOO-CV      Ridge Dense     0.039503
7  50  0.25  LOO-CV      Ridge Dense     0.053413
8  50  0.50  LOO-CV      Ridge Dense     0.079744


In [34]:
df5 = pd.concat([df5_1, df5_2], ignore_index=True)

print(df5)

df5.to_csv('ridge_dense.csv', index=False)

     p   rho  Method Estimator&Signal  Average MSE
0   10  0.00     AIC      Ridge Dense     0.068849
1   10  0.00     BIC      Ridge Dense     0.068850
2   10  0.25     AIC      Ridge Dense     0.092750
3   10  0.25     BIC      Ridge Dense     0.092751
4   10  0.50     AIC      Ridge Dense     0.130852
5   10  0.50     BIC      Ridge Dense     0.130869
6   25  0.00     AIC      Ridge Dense     0.058955
7   25  0.00     BIC      Ridge Dense     0.058959
8   25  0.25     AIC      Ridge Dense     0.080814
9   25  0.25     BIC      Ridge Dense     0.080830
10  25  0.50     AIC      Ridge Dense     0.117105
11  25  0.50     BIC      Ridge Dense     0.117223
12  50  0.00     AIC      Ridge Dense     0.039761
13  50  0.00     BIC      Ridge Dense     0.039801
14  50  0.25     AIC      Ridge Dense     0.053997
15  50  0.25     BIC      Ridge Dense     0.054102
16  50  0.50     AIC      Ridge Dense     0.077938
17  50  0.50     BIC      Ridge Dense     0.078368
18  10  0.00  LOO-CV      Ridge

# Adaptive Ridge with AIC/BIC/LOO-CV - Sparse signal

In [6]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
import warnings

# Suppress future warnings from sklearn
warnings.filterwarnings('ignore', category=FutureWarning)

# Function to perform adaptive Ridge regression
def adaptive_ridge_regression(X, Y, weights):
    # Using Ridge regression with weighted penalty
    model = Ridge(alpha=1.0, fit_intercept=True)
    model.fit(X * weights, Y)
    return model

# Function to compute AIC or BIC for Ridge regression
def compute_aic_bic(Y, Y_pred, n, p, method):
    mse = mean_squared_error(Y, Y_pred)
    rss = mse * n  # Residual sum of squares
    if method == "AIC":
        return n * np.log(rss / n) + 2 * p
    elif method == "BIC":
        return n * np.log(rss / n) + np.log(n) * p
    else:
        raise ValueError("Invalid method")

# Initializing the results dictionary for sparse adaptive Ridge
results_adaptive_ridge_sparse = {}

# Loop over different p and rho values for sparse adaptive Ridge
for p in p_values:
    for rho in rho_values:
        beta_star_sparse = generate_beta_star_sparse(p, n)
        mse_values_adaptive_ridge = {"AIC": [], "BIC": []}

        for _ in range(1000):
            X_adaptive_ridge, Y_adaptive_ridge = generate_dataset(n, p, rho, beta_star_sparse)
            # Initial Ridge fit to obtain coefficients for adaptive weights
            initial_ridge = Ridge(alpha=1.0).fit(X_adaptive_ridge, Y_adaptive_ridge)
            initial_coefs = initial_ridge.coef_
            weights = 1 / (np.abs(initial_coefs) + 1e-3)  # Adding a small constant to avoid division by zero

            for method in mse_values_adaptive_ridge.keys():
                model_adaptive_ridge = adaptive_ridge_regression(X_adaptive_ridge, Y_adaptive_ridge, weights)
                Y_pred_adaptive_ridge = model_adaptive_ridge.predict(X_adaptive_ridge)
                mse_adaptive_ridge = mean_squared_error(Y_adaptive_ridge, Y_pred_adaptive_ridge)
                criterion = compute_aic_bic(Y_adaptive_ridge, Y_pred_adaptive_ridge, n, p, method)
                mse_values_adaptive_ridge[method].append((mse_adaptive_ridge, criterion))

        # Compute average MSE and AIC/BIC for adaptive Ridge
        for method in mse_values_adaptive_ridge:
            avg_mse, avg_criterion = np.mean(mse_values_adaptive_ridge[method], axis=0)
            results_adaptive_ridge_sparse[(p, rho, method, "Adaptive Ridge Sparse")] = (avg_mse)

results_adaptive_ridge_sparse



{(10, 0, 'AIC', 'Adaptive Ridge Sparse'): 0.13290775892485024,
 (10, 0, 'BIC', 'Adaptive Ridge Sparse'): 0.13290775892485024,
 (10, 0.25, 'AIC', 'Adaptive Ridge Sparse'): 0.19111221371208417,
 (10, 0.25, 'BIC', 'Adaptive Ridge Sparse'): 0.19111221371208417,
 (10, 0.5, 'AIC', 'Adaptive Ridge Sparse'): 0.27214178953473556,
 (10, 0.5, 'BIC', 'Adaptive Ridge Sparse'): 0.27214178953473556,
 (25, 0, 'AIC', 'Adaptive Ridge Sparse'): 0.19773620831376226,
 (25, 0, 'BIC', 'Adaptive Ridge Sparse'): 0.19773620831376226,
 (25, 0.25, 'AIC', 'Adaptive Ridge Sparse'): 0.29592737747938386,
 (25, 0.25, 'BIC', 'Adaptive Ridge Sparse'): 0.29592737747938386,
 (25, 0.5, 'AIC', 'Adaptive Ridge Sparse'): 0.45225410767483926,
 (25, 0.5, 'BIC', 'Adaptive Ridge Sparse'): 0.45225410767483926,
 (50, 0, 'AIC', 'Adaptive Ridge Sparse'): 0.26011802315864385,
 (50, 0, 'BIC', 'Adaptive Ridge Sparse'): 0.26011802315864385,
 (50, 0.25, 'AIC', 'Adaptive Ridge Sparse'): 0.3997763380468953,
 (50, 0.25, 'BIC', 'Adaptive Ridg

In [9]:
data_list = [(key[0], key[1], key[2], key[3], value) for key, value in results_adaptive_ridge_sparse.items()]

# Create a DataFrame
df6_1 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method','Estimator&Signal', 'Average MSE'])

print(df6_1)

     p   rho Method       Estimator&Signal  Average MSE
0   10  0.00    AIC  Adaptive Ridge Sparse     0.132908
1   10  0.00    BIC  Adaptive Ridge Sparse     0.132908
2   10  0.25    AIC  Adaptive Ridge Sparse     0.191112
3   10  0.25    BIC  Adaptive Ridge Sparse     0.191112
4   10  0.50    AIC  Adaptive Ridge Sparse     0.272142
5   10  0.50    BIC  Adaptive Ridge Sparse     0.272142
6   25  0.00    AIC  Adaptive Ridge Sparse     0.197736
7   25  0.00    BIC  Adaptive Ridge Sparse     0.197736
8   25  0.25    AIC  Adaptive Ridge Sparse     0.295927
9   25  0.25    BIC  Adaptive Ridge Sparse     0.295927
10  25  0.50    AIC  Adaptive Ridge Sparse     0.452254
11  25  0.50    BIC  Adaptive Ridge Sparse     0.452254
12  50  0.00    AIC  Adaptive Ridge Sparse     0.260118
13  50  0.00    BIC  Adaptive Ridge Sparse     0.260118
14  50  0.25    AIC  Adaptive Ridge Sparse     0.399776
15  50  0.25    BIC  Adaptive Ridge Sparse     0.399776
16  50  0.50    AIC  Adaptive Ridge Sparse     0

In [13]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import RidgeCV
from sklearn.model_selection import GridSearchCV
import numpy as np

# Function to perform adaptive Ridge regression with LOO-CV
def adaptive_ridge_regression_loocv(X, Y, weights):
    model = RidgeCV(cv=LeaveOneOut(), alphas=[1.0], fit_intercept=True)
    model.fit(X * weights, Y)
    return model

# Initializing the results dictionary for sparse adaptive Ridge with LOO-CV
results_adaptive_ridge_sparse_loocv = {}

# Loop over different p and rho values for sparse adaptive Ridge with LOO-CV
for p in p_values:
    for rho in rho_values:
        beta_star_sparse = generate_beta_star_sparse(p, n)
        mse_values_adaptive_ridge_loocv = []

        for _ in range(1000):
            X_adaptive_ridge_loocv, Y_adaptive_ridge_loocv = generate_dataset(n, p, rho, beta_star_sparse)
            # Initial Ridge fit to obtain coefficients for adaptive weights
            initial_ridge_loocv = Ridge(alpha=1.0).fit(X_adaptive_ridge_loocv, Y_adaptive_ridge_loocv)
            initial_coefs_loocv = initial_ridge_loocv.coef_
            weights_loocv = 1 / (np.abs(initial_coefs_loocv) + 1e-3)  # Adding a small constant to avoid division by zero

            model_adaptive_ridge_loocv = adaptive_ridge_regression_loocv(X_adaptive_ridge_loocv, Y_adaptive_ridge_loocv, weights_loocv)
            Y_pred_adaptive_ridge_loocv = model_adaptive_ridge_loocv.predict(X_adaptive_ridge_loocv)
            mse_adaptive_ridge_loocv = mean_squared_error(Y_adaptive_ridge_loocv, Y_pred_adaptive_ridge_loocv)
            mse_values_adaptive_ridge_loocv.append(mse_adaptive_ridge_loocv)

        # Compute average MSE for adaptive Ridge with LOO-CV
        avg_mse_adaptive_ridge_loocv = np.mean(mse_values_adaptive_ridge_loocv)
        results_adaptive_ridge_sparse_loocv[(p, rho, "LOO-CV", "Adaptive Ridge Sparse")] = avg_mse_adaptive_ridge_loocv

results_adaptive_ridge_sparse_loocv



[1;30;43mStreaming output truncated to the last 5000 lines.[0m


{(10, 0, 'LOO-CV', 'Adaptive Ridge Sparse'): 0.133627121569875,
 (10, 0.25, 'LOO-CV', 'Adaptive Ridge Sparse'): 0.1909761563181386,
 (10, 0.5, 'LOO-CV', 'Adaptive Ridge Sparse'): 0.269515658733021,
 (25, 0, 'LOO-CV', 'Adaptive Ridge Sparse'): 0.19758144186776638,
 (25, 0.25, 'LOO-CV', 'Adaptive Ridge Sparse'): 0.2960655497027128,
 (25, 0.5, 'LOO-CV', 'Adaptive Ridge Sparse'): 0.45236534603479933,
 (50, 0, 'LOO-CV', 'Adaptive Ridge Sparse'): 0.2592963418662467,
 (50, 0.25, 'LOO-CV', 'Adaptive Ridge Sparse'): 0.3971946373312668,
 (50, 0.5, 'LOO-CV', 'Adaptive Ridge Sparse'): 0.612779548330158}

In [14]:
data_list = [(key[0], key[1], key[2], key[3], value) for key, value in results_adaptive_ridge_sparse_loocv.items()]

# Create a DataFrame
df6_2 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method','Estimator&Signal', 'Average MSE'])

print(df6_2)

    p   rho  Method       Estimator&Signal  Average MSE
0  10  0.00  LOO-CV  Adaptive Ridge Sparse     0.133627
1  10  0.25  LOO-CV  Adaptive Ridge Sparse     0.190976
2  10  0.50  LOO-CV  Adaptive Ridge Sparse     0.269516
3  25  0.00  LOO-CV  Adaptive Ridge Sparse     0.197581
4  25  0.25  LOO-CV  Adaptive Ridge Sparse     0.296066
5  25  0.50  LOO-CV  Adaptive Ridge Sparse     0.452365
6  50  0.00  LOO-CV  Adaptive Ridge Sparse     0.259296
7  50  0.25  LOO-CV  Adaptive Ridge Sparse     0.397195
8  50  0.50  LOO-CV  Adaptive Ridge Sparse     0.612780


In [16]:
df6 = pd.concat([df6_1, df6_2], ignore_index=True)

print(df6)

df6.to_csv('adaptive_ridge_sparse.csv')

     p   rho  Method       Estimator&Signal  Average MSE
0   10  0.00     AIC  Adaptive Ridge Sparse     0.132908
1   10  0.00     BIC  Adaptive Ridge Sparse     0.132908
2   10  0.25     AIC  Adaptive Ridge Sparse     0.191112
3   10  0.25     BIC  Adaptive Ridge Sparse     0.191112
4   10  0.50     AIC  Adaptive Ridge Sparse     0.272142
5   10  0.50     BIC  Adaptive Ridge Sparse     0.272142
6   25  0.00     AIC  Adaptive Ridge Sparse     0.197736
7   25  0.00     BIC  Adaptive Ridge Sparse     0.197736
8   25  0.25     AIC  Adaptive Ridge Sparse     0.295927
9   25  0.25     BIC  Adaptive Ridge Sparse     0.295927
10  25  0.50     AIC  Adaptive Ridge Sparse     0.452254
11  25  0.50     BIC  Adaptive Ridge Sparse     0.452254
12  50  0.00     AIC  Adaptive Ridge Sparse     0.260118
13  50  0.00     BIC  Adaptive Ridge Sparse     0.260118
14  50  0.25     AIC  Adaptive Ridge Sparse     0.399776
15  50  0.25     BIC  Adaptive Ridge Sparse     0.399776
16  50  0.50     AIC  Adaptive 

# Adaptive Ridge with AIC/BIC/LOO-CV - Dense signal

In [20]:
# Initializing the results dictionary for sparse adaptive Ridge
results_adaptive_ridge_dense = {}

# Loop over different p and rho values for sparse adaptive Ridge
for p in p_values:
    for rho in rho_values:
        beta_star_sparse = calculate_beta_star_dense(p, n)
        mse_values_adaptive_ridge = {"AIC": [], "BIC": []}

        for _ in range(1000):
            X_adaptive_ridge, Y_adaptive_ridge = generate_dataset(n, p, rho, beta_star_sparse)
            # Initial Ridge fit to obtain coefficients for adaptive weights
            initial_ridge = Ridge(alpha=1.0).fit(X_adaptive_ridge, Y_adaptive_ridge)
            initial_coefs = initial_ridge.coef_
            weights = 1 / (np.abs(initial_coefs) + 1e-3)  # Adding a small constant to avoid division by zero

            for method in mse_values_adaptive_ridge.keys():
                model_adaptive_ridge = adaptive_ridge_regression(X_adaptive_ridge, Y_adaptive_ridge, weights)
                Y_pred_adaptive_ridge = model_adaptive_ridge.predict(X_adaptive_ridge)
                mse_adaptive_ridge = mean_squared_error(Y_adaptive_ridge, Y_pred_adaptive_ridge)
                criterion = compute_aic_bic(Y_adaptive_ridge, Y_pred_adaptive_ridge, n, p, method)
                mse_values_adaptive_ridge[method].append((mse_adaptive_ridge, criterion))

        # Compute average MSE and AIC/BIC for adaptive Ridge
        for method in mse_values_adaptive_ridge:
            avg_mse, avg_criterion = np.mean(mse_values_adaptive_ridge[method], axis=0)
            results_adaptive_ridge_dense[(p, rho, method, "Adaptive Ridge Dense")] = (avg_mse)

results_adaptive_ridge_dense



{(10, 0, 'AIC', 'Adaptive Ridge Dense'): 0.22852984724891132,
 (10, 0, 'BIC', 'Adaptive Ridge Dense'): 0.22852984724891132,
 (10, 0.25, 'AIC', 'Adaptive Ridge Dense'): 0.3240141341423454,
 (10, 0.25, 'BIC', 'Adaptive Ridge Dense'): 0.3240141341423454,
 (10, 0.5, 'AIC', 'Adaptive Ridge Dense'): 0.4829284805005878,
 (10, 0.5, 'BIC', 'Adaptive Ridge Dense'): 0.4829284805005878,
 (25, 0, 'AIC', 'Adaptive Ridge Dense'): 0.24282901405482604,
 (25, 0, 'BIC', 'Adaptive Ridge Dense'): 0.24282901405482604,
 (25, 0.25, 'AIC', 'Adaptive Ridge Dense'): 0.34675946376063527,
 (25, 0.25, 'BIC', 'Adaptive Ridge Dense'): 0.34675946376063527,
 (25, 0.5, 'AIC', 'Adaptive Ridge Dense'): 0.5218414121986094,
 (25, 0.5, 'BIC', 'Adaptive Ridge Dense'): 0.5218414121986094,
 (50, 0, 'AIC', 'Adaptive Ridge Dense'): 0.2466753819904896,
 (50, 0, 'BIC', 'Adaptive Ridge Dense'): 0.2466753819904896,
 (50, 0.25, 'AIC', 'Adaptive Ridge Dense'): 0.35205192396985946,
 (50, 0.25, 'BIC', 'Adaptive Ridge Dense'): 0.352051923

In [21]:
data_list = [(key[0], key[1], key[2], key[3], value) for key, value in results_adaptive_ridge_dense.items()]

# Create a DataFrame
df7_1 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method','Estimator&Signal', 'Average MSE'])

print(df7_1)

     p   rho Method      Estimator&Signal  Average MSE
0   10  0.00    AIC  Adaptive Ridge Dense     0.228530
1   10  0.00    BIC  Adaptive Ridge Dense     0.228530
2   10  0.25    AIC  Adaptive Ridge Dense     0.324014
3   10  0.25    BIC  Adaptive Ridge Dense     0.324014
4   10  0.50    AIC  Adaptive Ridge Dense     0.482928
5   10  0.50    BIC  Adaptive Ridge Dense     0.482928
6   25  0.00    AIC  Adaptive Ridge Dense     0.242829
7   25  0.00    BIC  Adaptive Ridge Dense     0.242829
8   25  0.25    AIC  Adaptive Ridge Dense     0.346759
9   25  0.25    BIC  Adaptive Ridge Dense     0.346759
10  25  0.50    AIC  Adaptive Ridge Dense     0.521841
11  25  0.50    BIC  Adaptive Ridge Dense     0.521841
12  50  0.00    AIC  Adaptive Ridge Dense     0.246675
13  50  0.00    BIC  Adaptive Ridge Dense     0.246675
14  50  0.25    AIC  Adaptive Ridge Dense     0.352052
15  50  0.25    BIC  Adaptive Ridge Dense     0.352052
16  50  0.50    AIC  Adaptive Ridge Dense     0.518725
17  50  0.

In [22]:
# Initializing the results dictionary for sparse adaptive Ridge with LOO-CV
results_adaptive_ridge_dense_loocv = {}

# Loop over different p and rho values for sparse adaptive Ridge with LOO-CV
for p in p_values:
    for rho in rho_values:
        beta_star_sparse = calculate_beta_star_dense(p, n)
        mse_values_adaptive_ridge_loocv = []

        for _ in range(1000):
            X_adaptive_ridge_loocv, Y_adaptive_ridge_loocv = generate_dataset(n, p, rho, beta_star_sparse)
            # Initial Ridge fit to obtain coefficients for adaptive weights
            initial_ridge_loocv = Ridge(alpha=1.0).fit(X_adaptive_ridge_loocv, Y_adaptive_ridge_loocv)
            initial_coefs_loocv = initial_ridge_loocv.coef_
            weights_loocv = 1 / (np.abs(initial_coefs_loocv) + 1e-3)  # Adding a small constant to avoid division by zero

            model_adaptive_ridge_loocv = adaptive_ridge_regression_loocv(X_adaptive_ridge_loocv, Y_adaptive_ridge_loocv, weights_loocv)
            Y_pred_adaptive_ridge_loocv = model_adaptive_ridge_loocv.predict(X_adaptive_ridge_loocv)
            mse_adaptive_ridge_loocv = mean_squared_error(Y_adaptive_ridge_loocv, Y_pred_adaptive_ridge_loocv)
            mse_values_adaptive_ridge_loocv.append(mse_adaptive_ridge_loocv)

        # Compute average MSE for adaptive Ridge with LOO-CV
        avg_mse_adaptive_ridge_loocv = np.mean(mse_values_adaptive_ridge_loocv)
        results_adaptive_ridge_dense_loocv[(p, rho, "LOO-CV", "Adaptive Ridge Dense")] = avg_mse_adaptive_ridge_loocv

results_adaptive_ridge_dense_loocv


[1;30;43mStreaming output truncated to the last 5000 lines.[0m


{(10, 0, 'LOO-CV', 'Adaptive Ridge Dense'): 0.22975536552568718,
 (10, 0.25, 'LOO-CV', 'Adaptive Ridge Dense'): 0.3261921549998127,
 (10, 0.5, 'LOO-CV', 'Adaptive Ridge Dense'): 0.48200031886038164,
 (25, 0, 'LOO-CV', 'Adaptive Ridge Dense'): 0.24326688298353638,
 (25, 0.25, 'LOO-CV', 'Adaptive Ridge Dense'): 0.3484308511748398,
 (25, 0.5, 'LOO-CV', 'Adaptive Ridge Dense'): 0.5207748991103942,
 (50, 0, 'LOO-CV', 'Adaptive Ridge Dense'): 0.24620892202424688,
 (50, 0.25, 'LOO-CV', 'Adaptive Ridge Dense'): 0.3504464136915196,
 (50, 0.5, 'LOO-CV', 'Adaptive Ridge Dense'): 0.5186504737189745}

In [26]:
data_list = [(key[0], key[1], key[2], key[3], value) for key, value in results_adaptive_ridge_dense_loocv.items()]

# Create a DataFrame
df7_2 = pd.DataFrame(data_list, columns=['p', 'rho', 'Method','Estimator&Signal', 'Average MSE'])

print(df7_2)

    p   rho  Method      Estimator&Signal  Average MSE
0  10  0.00  LOO-CV  Adaptive Ridge Dense     0.229755
1  10  0.25  LOO-CV  Adaptive Ridge Dense     0.326192
2  10  0.50  LOO-CV  Adaptive Ridge Dense     0.482000
3  25  0.00  LOO-CV  Adaptive Ridge Dense     0.243267
4  25  0.25  LOO-CV  Adaptive Ridge Dense     0.348431
5  25  0.50  LOO-CV  Adaptive Ridge Dense     0.520775
6  50  0.00  LOO-CV  Adaptive Ridge Dense     0.246209
7  50  0.25  LOO-CV  Adaptive Ridge Dense     0.350446
8  50  0.50  LOO-CV  Adaptive Ridge Dense     0.518650


In [25]:
df7 = pd.concat([df7_1, df7_2], ignore_index=True)

print(df7)

df7.to_csv('adaptive_ridge_dense.csv')

     p   rho  Method      Estimator&Signal  Average MSE
0   10  0.00     AIC  Adaptive Ridge Dense     0.228530
1   10  0.00     BIC  Adaptive Ridge Dense     0.228530
2   10  0.25     AIC  Adaptive Ridge Dense     0.324014
3   10  0.25     BIC  Adaptive Ridge Dense     0.324014
4   10  0.50     AIC  Adaptive Ridge Dense     0.482928
5   10  0.50     BIC  Adaptive Ridge Dense     0.482928
6   25  0.00     AIC  Adaptive Ridge Dense     0.242829
7   25  0.00     BIC  Adaptive Ridge Dense     0.242829
8   25  0.25     AIC  Adaptive Ridge Dense     0.346759
9   25  0.25     BIC  Adaptive Ridge Dense     0.346759
10  25  0.50     AIC  Adaptive Ridge Dense     0.521841
11  25  0.50     BIC  Adaptive Ridge Dense     0.521841
12  50  0.00     AIC  Adaptive Ridge Dense     0.246675
13  50  0.00     BIC  Adaptive Ridge Dense     0.246675
14  50  0.25     AIC  Adaptive Ridge Dense     0.352052
15  50  0.25     BIC  Adaptive Ridge Dense     0.352052
16  50  0.50     AIC  Adaptive Ridge Dense     0

In [33]:
df1 = pd.read_csv('/content/lasso_dense1.csv')
df2 = pd.read_csv('/content/adaptive_lasso_sparse.csv')
df3 = pd.read_csv('/content/adaptive_lasso_dense.csv')
df4 = pd.read_csv('/content/ridge_sparse.csv')
df5 = pd.read_csv('/content/ridge_dense.csv')


final = pd.concat([df,df1,df2,df3,df4,df5,df6,df7], ignore_index=True)

final

Unnamed: 0,p,rho,Method,Estimator&Signal,Average MSE
0,10,0.00,AIC,Lasso Sparse,0.028818
1,10,0.00,BIC,Lasso Sparse,0.030238
2,10,0.00,LOO-CV,Lasso Sparse,0.028833
3,10,0.25,AIC,Lasso Sparse,0.041784
4,10,0.25,BIC,Lasso Sparse,0.043555
...,...,...,...,...,...
211,25,0.25,LOO-CV,Adaptive Ridge Dense,0.348431
212,25,0.50,LOO-CV,Adaptive Ridge Dense,0.520775
213,50,0.00,LOO-CV,Adaptive Ridge Dense,0.246209
214,50,0.25,LOO-CV,Adaptive Ridge Dense,0.350446


In [34]:
final.to_csv('Final_results.csv')

In [None]:
from google.colab import files
files.download('Final_results.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [35]:
# First, let's set the 'Method' as an index and then unstack it to pivot the table.
df_pivoted = final.set_index(['p', 'rho', 'Estimator&Signal', 'Method']).unstack()

# Now, we need to flatten the MultiIndex in columns created by unstacking.
df_pivoted.columns = [f'{method} MSE' for method in df_pivoted.columns.get_level_values(1)]

# Reset the index to turn it back into columns
df_pivoted.reset_index(inplace=True)

# Let's convert the pivoted DataFrame into a dictionary.
data_dict = df_pivoted.to_dict(orient='list')

data_dict

{'p': [10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  25,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50,
  50],
 'rho': [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.25,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5,
  0.5],
 'Estimator&Signal': ['Ada

In [38]:
final_version2 = pd.DataFrame(data_dict)


Unnamed: 0,p,rho,Estimator&Signal,AIC MSE,BIC MSE,LOO-CV MSE
0,10,0.0,Adaptive Lasso Dense,0.068763,0.069466,0.068636
1,10,0.0,Adaptive Lasso Sparse,0.028904,0.030271,0.028152
2,10,0.0,Adaptive Ridge Dense,0.228530,0.228530,0.229755
3,10,0.0,Adaptive Ridge Sparse,0.132908,0.132908,0.133627
4,10,0.0,Lasso Dense,0.068433,0.069245,0.068408
...,...,...,...,...,...,...
67,50,0.5,Adaptive Ridge Sparse,0.621091,0.621091,0.612780
68,50,0.5,Lasso Dense,0.112032,0.157589,0.113290
69,50,0.5,Lasso Sparse,0.124853,0.150925,0.127259
70,50,0.5,Ridge Dense,0.077938,0.078368,0.079744


In [47]:
avg_aic_lasso_sparse = final_version2.groupby('Estimator&Signal')['AIC MSE'].mean()['Lasso Sparse']
avg_aic_lasso_sparse

avg_aic_lasso_dense = final_version2.groupby('Estimator&Signal')['AIC MSE'].mean()['Lasso Dense']
avg_aic_lasso_dense

avg_aic_addaptive_lasso_sparse = final_version2.groupby('Estimator&Signal')['AIC MSE'].mean()['Adaptive Lasso Sparse']
avg_aic_addaptive_lasso_sparse

avg_aic_addaptive_lasso_dense = final_version2.groupby('Estimator&Signal')['AIC MSE'].mean()['Adaptive Lasso Dense']
avg_aic_addaptive_lasso_dense

avg_aic_ridge_sparse = final_version2.groupby('Estimator&Signal')['AIC MSE'].mean()['Ridge Sparse']
avg_aic_ridge_sparse

avg_aic_ridge_dense = final_version2.groupby('Estimator&Signal')['AIC MSE'].mean()['Ridge Dense']
avg_aic_ridge_dense

avg_aic_addaptive_ridge_sparse = final_version2.groupby('Estimator&Signal')['AIC MSE'].mean()['Adaptive Ridge Sparse']
avg_aic_addaptive_ridge_sparse

avg_aic_addaptive_ridge_dense = final_version2.groupby('Estimator&Signal')['AIC MSE'].mean()['Adaptive Ridge Dense']
avg_aic_addaptive_ridge_dense


0.3627060760697293

In [48]:
print(avg_aic_lasso_sparse)
print(avg_aic_lasso_dense)
print(avg_aic_addaptive_lasso_sparse)
print(avg_aic_addaptive_lasso_dense)
print(avg_aic_ridge_sparse)
print(avg_aic_ridge_dense)
print(avg_aic_addaptive_ridge_sparse)
print(avg_aic_addaptive_ridge_dense)

0.06385486559550008
0.09023799666449024
0.0639433463215143
0.08959528968885798
0.05071646518616711
0.08011335035720607
0.3136738941969011
0.3627060760697293


In [49]:
avg_bic_lasso_sparse = final_version2.groupby('Estimator&Signal')['BIC MSE'].mean()['Lasso Sparse']
avg_bic_lasso_sparse

avg_bic_lasso_dense = final_version2.groupby('Estimator&Signal')['BIC MSE'].mean()['Lasso Dense']
avg_bic_lasso_dense

avg_bic_addaptive_lasso_sparse = final_version2.groupby('Estimator&Signal')['BIC MSE'].mean()['Adaptive Lasso Sparse']
avg_bic_addaptive_lasso_sparse

avg_bic_addaptive_lasso_dense = final_version2.groupby('Estimator&Signal')['BIC MSE'].mean()['Adaptive Lasso Dense']
avg_bic_addaptive_lasso_dense

avg_bic_ridge_sparse = final_version2.groupby('Estimator&Signal')['BIC MSE'].mean()['Ridge Sparse']
avg_bic_ridge_sparse

avg_bic_ridge_dense = final_version2.groupby('Estimator&Signal')['BIC MSE'].mean()['Ridge Dense']
avg_bic_ridge_dense

avg_bic_addaptive_ridge_sparse = final_version2.groupby('Estimator&Signal')['BIC MSE'].mean()['Adaptive Ridge Sparse']
avg_bic_addaptive_ridge_sparse

avg_bic_addaptive_ridge_dense = final_version2.groupby('Estimator&Signal')['BIC MSE'].mean()['Adaptive Ridge Dense']
avg_bic_addaptive_ridge_dense

0.3627060760697293

In [51]:
print(avg_bic_lasso_sparse)
print(avg_bic_lasso_dense)
print(avg_bic_addaptive_lasso_sparse)
print(avg_bic_addaptive_lasso_dense)
print(avg_bic_ridge_sparse)
print(avg_bic_ridge_dense)
print(avg_bic_addaptive_ridge_sparse)
print(avg_bic_addaptive_ridge_dense)

0.07380476790852092
0.1090837206688651
0.0739855765250904
0.10839554412039533
0.050797524851396846
0.08019479831741684
0.3136738941969011
0.3627060760697293


In [54]:
avg_loocv_lasso_sparse = final_version2.groupby('Estimator&Signal')['LOO-CV MSE'].mean()['Lasso Sparse']
avg_loocv_lasso_sparse

avg_loocv_lasso_dense = final_version2.groupby('Estimator&Signal')['LOO-CV MSE'].mean()['Lasso Dense']

avg_loocv_addaptive_lasso_sparse = final_version2.groupby('Estimator&Signal')['LOO-CV MSE'].mean()['Adaptive Lasso Sparse']
avg_loocv_addaptive_lasso_sparse

avg_loocv_addaptive_lasso_dense = final_version2.groupby('Estimator&Signal')['LOO-CV MSE'].mean()['Adaptive Lasso Dense']
avg_loocv_addaptive_lasso_dense


avg_loocv_ridge_sparse = final_version2.groupby('Estimator&Signal')['LOO-CV MSE'].mean()['Ridge Sparse']
avg_loocv_ridge_sparse

avg_loocv_addaptive_ridge_sparse = final_version2.groupby('Estimator&Signal')['LOO-CV MSE'].mean()['Adaptive Ridge Sparse']
avg_loocv_addaptive_ridge_sparse

avg_loocv_ridge_dense = final_version2.groupby('Estimator&Signal')['LOO-CV MSE'].mean()['Ridge Dense']
avg_loocv_ridge_dense

avg_loocv_addaptive_ridge_dense = final_version2.groupby('Estimator&Signal')['LOO-CV MSE'].mean()['Adaptive Ridge Dense']

In [55]:
print(avg_loocv_lasso_sparse)
print(avg_loocv_lasso_dense)
print(avg_loocv_addaptive_lasso_sparse)
print(avg_loocv_addaptive_lasso_dense)
print(avg_loocv_ridge_sparse)
print(avg_loocv_ridge_dense)
print(avg_loocv_addaptive_ridge_sparse)
print(avg_loocv_addaptive_ridge_dense)

0.06459410587699721
0.09097010703875066
0.05104796498618861
0.08079030316941366
0.05077116510188449
0.07981560992602756
0.31215575575044274
0.3628584757877103
