In [1]:
import numpy as np

rng = np.random.default_rng()

In [9]:
def sample_sparse_linear(n_samples=100, n_features=20, n_nonzero=5, noise_std=0.1):
    """
    Generate data from a sparse linear model with Gaussian noise.

    Parameters
    ----------
    n_samples : int
        Number of samples
    n_features : int
        Number of features
    n_nonzero : int
        Number of nonzero coefficients
    noise_std : float
        Standard deviation of Gaussian noise
    random_state : int
        Random seed

    Returns
    -------
    X : array, shape (n_samples, n_features)
        Feature matrix
    y : array, shape (n_samples,)
        Target vector
    beta : array, shape (n_features,)
        True coefficients
    """
    # Generate random feature matrix
    X = rng.standard_normal((n_samples, n_features))

    # Generate sparse coefficients
    beta = np.zeros(n_features)
    nonzero_idx = rng.choice(n_features, size=n_nonzero, replace=False)
    beta[nonzero_idx] = rng.standard_normal(n_nonzero)

    # Generate target with noise
    y = X @ beta + noise_std * rng.standard_normal(n_samples)

    return X, y, beta

In [221]:
X, y, beta_true = sample_sparse_linear()
# Adding interecept column to X
X = np.hstack([np.ones((X.shape[0], 1)), X])

In [222]:
def loss_func(y, y_hat):
    return np.sum(np.square(y - y_hat))


def loss_func_deriv(X, beta, y):
    return -2 * X.T @ (y - X @ beta)


def check_obj_stop(f_old, f_new, eps):
    return np.abs(f_new - f_old) < eps * np.abs(f_old)


def check_params_stop(params_old, params_new, eps):
    return np.linalg.norm(params_new - params_old) < eps * np.linalg.norm(params_old)

In [223]:
beta = np.zeros(X.shape[1])
eps_params = 1e-5
eps_obj = 1e-5

while True:
    del_f = loss_func_deriv(X, beta, y)
    print(np.linalg.norm(del_f))
    p = -del_f

    if np.linalg.norm(del_f) < 1e-5:
        break

    def find_step_size(alpha=1, c=0.5, tau=0.5):
        # Backtracking line search
        # https://en.wikipedia.org/wiki/Backtracking_line_search
        m = loss_func_deriv(X, beta, y).T @ p
        t = -c * m
        while loss_func(X @ beta, y) - loss_func(X @ (beta + alpha * p), y) < alpha * t:
            alpha = tau * alpha
        return alpha
    
    beta_new = beta + find_step_size() * p
    if check_obj_stop(
        loss_func(y, X @ beta), loss_func(y, X @ beta_new), eps_obj
    ) or check_params_stop(beta, beta_new, eps_params):
        break

    beta = beta_new

541.4135539090395
279.10667125123746
108.55689488879295
64.28545479279573
51.05068269366922
20.785950373463145
14.503608617061625
13.452627811048428
5.8050310858997705
5.485389783944708
2.464939556538909
2.25815797047769
1.072198525567442
0.9373203509884254
0.47205735117954706
0.39266460852929
0.20943198093366375
0.16639721933515805
0.09347361608884752
0.07158274921983224
0.04195342699057493
0.03141528324492165


In [224]:
beta[1:].round(2)

array([ 1.2 , -0.87, -0.  ,  0.01,  0.01,  0.  ,  0.  ,  0.02,  0.01,
        0.01,  0.01, -0.  ,  1.94,  0.  , -0.01, -0.08, -0.02,  0.02,
        0.02,  1.42])

In [225]:
beta_true.round(2)

array([ 1.19, -0.86,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,
        0.  ,  0.  ,  0.  ,  1.95,  0.  ,  0.  , -0.09,  0.  ,  0.  ,
        0.  ,  1.42])

In [226]:
def prox_op(x, lam):
    return np.sign(x) * np.maximum((np.abs(x) - lam), 0)

In [227]:
beta = np.zeros(X.shape[1])
lam = 0.01
eps_params = 1e-5
eps_obj = 1e-5

while True:
    del_f = loss_func_deriv(X, beta, y)
    print(np.linalg.norm(del_f))
    p = -del_f

    def find_step_size(alpha=1, c=0.5, tau=0.5):
        # Backtracking line search
        # https://en.wikipedia.org/wiki/Backtracking_line_search
        m = loss_func_deriv(X, beta, y).T @ p
        t = -c * m
        while loss_func(X @ beta, y) - loss_func(X @ (beta + alpha * p), y) < alpha * t:
            alpha = tau * alpha
        return alpha

    beta_new = prox_op(beta + find_step_size() * p, lam)
    if check_obj_stop(
        loss_func(y, X @ beta), loss_func(y, X @ beta_new), eps_obj
    ) or check_params_stop(beta, beta_new, eps_params):
        break

    beta = beta_new

541.4135539090395
285.65277450286624
109.1312130168254
66.14627260064026
41.71815139363993
21.651352730184286
16.579251666347574
13.196636407925562
10.962235540739448
9.456258863557371
8.67982452753725
8.412241227895677
8.31200662609785
8.271020673826975
8.254444131450096
8.246824356397559
8.243404538445965
8.241776053638095


In [228]:
beta[1:].round(2)

array([ 1.18, -0.84, -0.  , -0.  ,  0.  , -0.  ,  0.  ,  0.  ,  0.  ,
        0.  ,  0.  , -0.  ,  1.92,  0.  , -0.  , -0.07, -0.  ,  0.  ,
        0.  ,  1.4 ])

In [229]:
beta_true.round(2)

array([ 1.19, -0.86,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ,
        0.  ,  0.  ,  0.  ,  1.95,  0.  ,  0.  , -0.09,  0.  ,  0.  ,
        0.  ,  1.42])