In [1]:
%matplotlib inline


# Generalized Linear Model


## Logistic Regressions

Logistic regression is a important model to solve classification problem, which is expressed specifically as:
$$
\begin{aligned}
& P(y=1 \mid x)=\frac{1}{1+\exp \left(-x^T \beta\right)}, \\
& P(y=0 \mid x)=\frac{1}{1+\exp \left(x^T \beta\right)},
\end{aligned}
$$
where $\beta$ is an unknown parameter vector that to be estimated. Since we expect only a few explanatory variables contribute for predicting $y$, we assume $\beta$ is sparse vector with sparsity level $s$.

With $n$ independent data of the explanatory variables $x$ and the response variable $y$, we can estimate $\beta$ by minimizing the negative log-likelihood function under sparsity constraint:
$$
\arg \min _{\beta \in R^p} L(\beta):=-\frac{1}{n} \sum_{i=1}^n\left\{y_i x_i^T \beta-\log \left(1+\exp \left(x_i^T \beta\right)\right)\right\}, \text { s.t. }\|\beta\|_0 \leq s
$$

In [1]:
import jax.numpy as jnp
import numpy as np
from scope import ScopeSolver
import numpy as np
np.random.seed(0)

def data_generator(n, p, s, rho, random_state=None):
    """
    * $\beta^*_i$ ~ N(0, 1), $\forall i \in supp(\beta^*)$
    * $x = (x_1, \cdots, x_p)^T$, $x_{i+1}=\rho x_i+\sqrt{1-\rho^2}z_i$, where $x_1, z_i$ ~ N(0, 1)
    * $y\in\{0,1\}$, $P(y=0)=\frac{1}{1+\exp^{x^T\beta^*+c}}$
    """
    np.random.seed(random_state)
    # beta
    beta = np.zeros(p)
    true_support_set = np.random.choice(p, s, replace=False)
    beta[true_support_set] = np.random.normal(0, 1, s)
    # X
    X = np.empty((n, p))
    X[:, 0] = np.random.normal(0, 1, n)
    for j in range(1, p):
        X[:, j] = rho * X[:, j - 1] + np.sqrt(1-rho**2) * np.random.normal(0, 1, n)
    # y
    xbeta = np.clip(X @ beta, -30, 30)
    p = 1 / (1 + np.exp(-xbeta))
    y = np.random.binomial(1, p)

    return X, y, beta, true_support_set

n, p, s, rho = 100, 10, 3, 0.0
X, y, true_params, true_support_set = data_generator(n, p, s, rho , 0)
# Define function to calculate negative log-likelihood of logistic regression
def logistic_loss(params):
    xbeta = jnp.clip(X @ params, -30, 30)
    return jnp.mean(jnp.log(1 + jnp.exp(xbeta)) - y * xbeta)

solver = ScopeSolver(p, s)
solver.solve(logistic_loss, jit=True)

print("True support set: ", np.sort(true_support_set))
print("Estimated support set: ", np.sort(solver.support_set))
print("True parameters: ", true_params)
print("True loss value: ", logistic_loss(true_params))
print("Estimated parameters: ", solver.params)
print("Estimated loss value: ", logistic_loss(solver.params))



True support set:  [2 4 8]
Estimated support set:  [2 7 8]
True parameters:  [ 0.          0.          0.95008842  0.         -0.10321885  0.
  0.          0.         -0.15135721  0.        ]
True loss value:  0.6396969
Estimated parameters:  [ 0.          0.          0.86563358  0.          0.          0.
  0.         -0.32055541  0.2376089   0.        ]
Estimated loss value:  0.60980713
