In [1]:
import numpy as np

rng = np.random.default_rng()

In [9]:
def sample_sparse_linear(n_samples=100, n_features=20, n_nonzero=5, noise_std=0.1):
    """
    Generate data from a sparse linear model with Gaussian noise.

    Parameters
    ----------
    n_samples : int
        Number of samples
    n_features : int
        Number of features
    n_nonzero : int
        Number of nonzero coefficients
    noise_std : float
        Standard deviation of Gaussian noise
    random_state : int
        Random seed

    Returns
    -------
    X : array, shape (n_samples, n_features)
        Feature matrix
    y : array, shape (n_samples,)
        Target vector
    beta : array, shape (n_features,)
        True coefficients
    """
    # Generate random feature matrix
    X = rng.standard_normal((n_samples, n_features))

    # Generate sparse coefficients
    beta = np.zeros(n_features)
    nonzero_idx = rng.choice(n_features, size=n_nonzero, replace=False)
    beta[nonzero_idx] = rng.standard_normal(n_nonzero)

    # Generate target with noise
    y = X @ beta + noise_std * rng.standard_normal(n_samples)

    return X, y, beta

In [29]:
X, y, beta_true = sample_sparse_linear()
# Adding interecept column to X
X = np.hstack([np.ones((X.shape[0], 1)), X])

In [35]:
def loss_func(y, y_hat):
    return np.sum(np.square(y - y_hat))


def loss_func_deriv(X, beta, y):
    return -2 * X.T @ (y - X @ beta)

In [178]:
beta = np.zeros(X.shape[1])

while True:
    del_f = loss_func_deriv(X, beta, y)
    print(np.linalg.norm(del_f))
    p = -del_f

    if np.linalg.norm(del_f) < 1e-5:
        break

    def find_step_size(alpha=1, c=0.5, tau=0.5):
        # Backtracking line search
        # https://en.wikipedia.org/wiki/Backtracking_line_search
        m = loss_func_deriv(X, beta, y).T @ p
        t = -c * m
        while loss_func(X @ beta, y) - loss_func(X @ (beta + alpha * p), y) < alpha * t:
            alpha = tau * alpha
        return alpha

    beta = beta + find_step_size() * p

363.12812855208125
87.27298557619102
36.46370998819902
19.895147183186314
11.987691964145755
8.35399559754459
3.701235715009708
2.3377276835873495
1.9910790334238124
0.8934007909907962
0.5833964544757163
0.4884681375872682
0.2352943821945301
0.15782539655824188
0.12180071610014197
0.06388404273139896
0.05879545332212064
0.02658806138362245
0.01757982267950459
0.01458714184341767
0.0072094787315127125
0.004876745886226179
0.003660584223004862
0.0019753485857999064
0.0017566797270165383
0.0008183572241305832
0.0005457197730554001
0.00043761491805957526
0.0002228206509660239
0.00015173561545591134
0.00011030726598024979
6.12432389319367e-05
5.25880459689169e-05
2.5238430857500903e-05
1.69601364235768e-05
1.315294096931655e-05
6.895830079312801e-06


In [179]:
beta[1:].round(2)

array([-0.01,  0.  , -0.89,  0.36, -0.02, -0.01,  0.  ,  0.02, -1.03,
       -0.01,  0.51,  0.  , -0.01,  0.01, -0.  , -0.01, -0.01,  0.83,
       -0.  ,  0.02])

In [146]:
beta_true.round(2)

array([ 0.  ,  0.  , -0.89,  0.36,  0.  ,  0.  ,  0.  ,  0.  , -1.03,
        0.  ,  0.52,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.82,
        0.  ,  0.  ])

In [180]:
def prox_op(x, lam):
    return np.sign(x) * np.maximum((np.abs(x) - lam), 0)


def check_obj_stop(f_old, f_new, eps):
    return np.abs(f_new - f_old) < eps * np.abs(f_old)


def check_params_stop(params_old, params_new, eps):
    return np.linalg.norm(params_new - params_old) < eps * np.linalg.norm(params_old)

In [181]:
beta = np.zeros(X.shape[1])
lam = 0.01
eps_params = 1e-5
eps_obj = 1e-5

while True:
    del_f = loss_func_deriv(X, beta, y)
    print(np.linalg.norm(del_f))
    p = -del_f

    def find_step_size(alpha=1, c=0.5, tau=0.5):
        # Backtracking line search
        # https://en.wikipedia.org/wiki/Backtracking_line_search
        m = loss_func_deriv(X, beta, y).T @ p
        t = -c * m
        while loss_func(X @ beta, y) - loss_func(X @ (beta + alpha * p), y) < alpha * t:
            alpha = tau * alpha
        return alpha

    beta_new = prox_op(beta + find_step_size() * p, lam)
    if check_obj_stop(
        loss_func(y, X @ beta), loss_func(y, X @ beta_new), eps_obj
    ) or check_params_stop(beta, beta_new, eps_params):
        break

    beta = beta_new

363.12812855208125
85.23859031376442
38.41457244666897
19.284178218852183
13.515919323776275
10.89686507183256
9.823744955507648
9.153580933764609
8.940330247727411
8.95715966262864
8.971009892868336
8.982869681619233
8.988510538352989
8.99148772721902
8.992899107424954
8.993595822645311


In [182]:
beta[1:].round(2)

array([-0.  ,  0.  , -0.87,  0.34, -0.01, -0.  ,  0.  ,  0.01, -1.02,
       -0.  ,  0.5 ,  0.  , -0.  ,  0.  , -0.  , -0.  , -0.  ,  0.81,
       -0.  ,  0.  ])

In [183]:
beta_true.round(2)

array([ 0.  ,  0.  , -0.89,  0.36,  0.  ,  0.  ,  0.  ,  0.  , -1.03,
        0.  ,  0.52,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.82,
        0.  ,  0.  ])