In [1]:
import numpy as np
import jax.numpy as jnp
from scope import ScopeSolver

from sklearn.metrics.pairwise import rbf_kernel

# 1. positive effect linear regression

In [2]:
n, p = 500, 100
rng = np.random.default_rng(0)
X = rng.normal(0, 1, (n, p))
beta = np.zeros(p)
beta[:3] = [1, 2, 3]
y = X @ beta + rng.normal(0, 1, n)

In [3]:
def ols_loss(params):
    loss = jnp.mean((y - X @ jnp.abs(params))**2)
    return loss

solver = ScopeSolver(p, sparsity=3)
params = solver.solve(ols_loss)
print('Square-Loss Error: ', np.sum((jnp.abs(params)-beta)**2).round(3))

Square-Loss Error:  0.001


# 2. non-linear feature selection via HSIC-Splicing
In this example, we show an application of positive effect linear regression.  
We intend to identifying relevant features $X_k, k\in[p]$ from $X^{n\times p}$ which has nonlinear dependence of $y\in\mathbb{R}^n$.  
Motivated by [Yamada et al.](http://www.ms.k.u-tokyo.ac.jp/sugi/2014/HSICLasso.pdf), we consider the following sparse optimization:
$$\min_{\alpha\in\mathbb{R}^p} \left\|\bar{L}-\sum_{k=1}^p\alpha_k\bar{K}^{(k)}\right\|_F^2,\quad\text{ s.t. } \|\alpha\|_0\leq s$$
where $\bar{K}^{(k)}=\Gamma K^{(k)}\Gamma\in\mathbb{R}^{n\times n}$, $\bar{L}=\Gamma L\Gamma\in\mathbb{R}^{n\times n}$ are centralized Gram matrices and $\Gamma=I_n-n^{-1}1_n1_n^{\top}\in\mathbb{R}^{n\times n}$.  
The matrices $K_{i,j}^{(k)}=K(X_{i,k}, X_{j,k})$, $L_{i,j}=L(y_i, y_j)$ are generated via kernel $K$ and $L$ respectively.  
In the following, we choose both $K$ and $L$ to be Gaussian kernel function.

In [4]:
def hsic(X, y, sparsity, gamma_x=0.7, gamma_y=0.7):
    n, p = X.shape
    Gamma = np.eye(n) - np.ones((n, 1)) @ np.ones((1, n)) / n
    L = rbf_kernel(y.reshape(-1, 1), gamma=gamma_y)
    L_bar = Gamma @ L @ Gamma
    response = L_bar.reshape(-1)
    K_bar = np.zeros((n**2, p))
    for k in range(p):
        x = X[:, k]
        tmp = rbf_kernel(x.reshape(-1, 1), gamma=gamma_x)
        K_bar[:, k] = (Gamma @ tmp @ Gamma).reshape(-1)
    covariate = K_bar

    def custom_objective(alpha):
        loss = jnp.mean((response - covariate @ jnp.abs(alpha)) ** 2)
        return loss
    
    solver = ScopeSolver(p, sparsity=sparsity)
    alpha = solver.solve(custom_objective)
    return alpha

## 2.1 additive model:
$y=-2\sin(2X_1)+X_2^2+X_3+\exp(-X_4)+\epsilon$

In [5]:
n, p, s = 500, 1000, 4
rng = np.random.default_rng(0)
X = rng.normal(0, 1, (n, p))
noise = rng.normal(0, 1, n)
y = -2 * np.sin(2 * X[:, 0]) + X[:, 1] ** 2 + X[:, 2] + np.exp(- X[:, 3]) + noise

In [6]:
alpha = hsic(X, y, s)
np.nonzero(alpha)[0]

array([0, 1, 2, 3])

## 2.2 non-additive model: 
$y=X_1\exp(2X_2)+X_3^2+\epsilon$

In [7]:
n, p, s = 500, 1000, 3
rng = np.random.default_rng(1)
X = rng.normal(0, 1, (n, p))
noise = rng.normal(0, 1, n)
y = X[:, 0] * np.exp(2 * X[:, 1]) + X[:, 2] ** 2 + noise

In [8]:
alpha = hsic(X, y, s)
np.nonzero(alpha)[0]

array([0, 1, 2])