In [1]:
import time

import numpy as np
from torch import nn

from sklearn.linear_model import Lasso
from sklearn.model_selection import cross_validate

from randalo import RandALO

In [2]:
# generate a large problem
n = 10000
p = 5000
s = 500
sigma = 1.0

rng = np.random.default_rng(0)
X = rng.integers(0, 2, (n, p)) * 2 - 1
beta = np.zeros(p)
beta[:s] = rng.normal(0, 1 / np.sqrt(s), (s,))
y = X @ beta + rng.normal(0, sigma, (n,))

In [3]:
alpha = 0.5 / np.sqrt(s)

# first, perform 5-fold CV with scikit-learn
tic = time.monotonic()

lasso = Lasso(alpha, fit_intercept=False)
skl_results = cross_validate(lasso, X, y, cv=5, scoring="neg_mean_squared_error")
skl_risk = -np.mean(skl_results["test_score"])

toc = time.monotonic()
skl_time = toc - tic

# next, use RandALO
tic = time.monotonic()

lasso = Lasso(alpha, fit_intercept=False)
lasso.fit(X, y)
ra = RandALO.from_sklearn(lasso, X, y)
ra_risk = ra.evaluate(nn.MSELoss())

toc = time.monotonic()
ra_time = toc - tic

# compare with the true conditional risk
true_risk = np.linalg.norm(lasso.coef_ - beta) ** 2 + sigma**2

print(f"5-fold CV : risk = {skl_risk:4f} in {skl_time:01.3f} seconds")
print(f"RandALO   : risk = {ra_risk:4f} in {ra_time:01.3f} seconds")
print(f"True      : risk = {true_risk:4f}")

5-fold CV : risk = 1.241288 in 2.344 seconds
RandALO   : risk = 1.212009 in 0.825 seconds
True      : risk = 1.219970
