In [1]:
%matplotlib inline


# Generalized Linear Model


## Gamma Regression
Gamma regression can be used when you have positive continuous response variables such as payments for insurance claims,
or the lifetime of a redundant system.
It is well known that the density of Gamma distribution can be represented as a function of
a mean parameter ($\mu$) and a shape parameter ($\alpha$), respectively,
$$
\begin{align}f(y \mid \mu, \alpha)=\frac{1}{y \Gamma(\alpha)}\left(\frac{\alpha y}{\mu}\right)^{\alpha} e^{-\alpha y / \mu} {I}_{(0, \infty)}(y),\end{align}
$$
where $I(\cdot)$ denotes the indicator function. In the Gamma regression model,
response variables are assumed to follow Gamma distributions. Specifically,

\begin{align}y_i \sim Gamma(\mu_i, \alpha),\end{align}


where $1/\mu_i = x_i^T\beta$.

With $n$ independent data of the explanatory variables $x$ and the response variable $y$, we can estimate $\beta$ by minimizing the negative log-likelihood function under sparsity constraint:
$$
\arg \min _{\beta \in R^p} L(\beta):=-\frac{1}{n} \sum_{i=1}^n\left\{-\alpha \left( y_i x_i^T \beta - \log \left(x_i^T \beta\right)\right) + \alpha \log \alpha + \left(\alpha - 1\right) \log y - \log \Gamma \left(\alpha\right) \right\}, \text { s.t. }\|\beta\|_0 \leq s .
$$

Here is Python code for solving sparse gamma regression problem:


In [6]:
import numpy as np
from abess.datasets import make_glm_data
import jax.numpy as jnp
from scope import ScopeSolver, HTPSolver, GraspSolver
from scope.numeric_solver import convex_solver_nlopt
np.random.seed(123)

n = 1000
p = 10
s = 3
data = make_glm_data(n=n, p=p, k=s, family="gamma")
X = np.hstack((np.ones((n, 1)), data.x))
y = data.y
true_params = np.hstack(([0.0], data.coef_))

# Define function to calculate negative log-likelihood of Gamma regression
def gamma_loss(params):
    xbeta = jnp.clip(X @ params, -30, 30)
    return jnp.mean(y * xbeta - jnp.log(xbeta)) 

def convex_solver_gamma(
    loss_fn,
    value_and_grad,
    params,
    optim_variable_set,
    data,
):
    """
    change the initial value of the parameters to let X @ params > 0
    """
    m = np.min(X @ params)
    if m <= 0.0:
        params[0] -= m
    return convex_solver_nlopt(loss_fn, value_and_grad, params, optim_variable_set, data)

    

solver = ScopeSolver(p+1, s+1, always_select=[0], convex_solver=convex_solver_gamma)
solver.solve(gamma_loss, init_params=np.hstack(([1.0], np.zeros(p))), jit=True)

print("True support set: ", np.nonzero(true_params)[0])
print("True parameters: ", true_params[1:])
#print("True loss value: ", gamma_loss(true_params))
print("Estimated support set: ", np.sort(solver.support_set)[1:])
print("Estimated parameters: ", solver.params[1:])
print("Estimated loss value: ", gamma_loss(solver.params))

True support set:  [1 7 9]
True parameters:  [11.82346289  0.          0.          0.          0.          0.
 19.56456813  0.         23.29703584  0.        ]
Estimated support set:  [1 7 9]
Estimated parameters:  [10.25436016  0.          0.          0.          0.          0.
 21.66317082  0.         19.96831658  0.        ]
Estimated loss value:  -1.8518866
