In [1]:
import pandas as pd
import numpy as np
from scipy import stats
from utils import generate_data
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

In [2]:
np.random.seed(1)
data = generate_data()

In [3]:
data.x_df.head()

Unnamed: 0,intercept,days_since_start,days_since_start_squared,day_of_week_1,day_of_week_2,day_of_week_3,day_of_week_4,day_of_week_5,day_of_week_6,seasonality_cos,seasonality_sin
0,1.0,-1.729679,2.228932,0,0,0,0,0,1,0.999852,0.017213
1,1.0,-1.726517,2.216707,0,0,0,0,0,0,0.999407,0.034422
2,1.0,-1.723355,2.204505,1,0,0,0,0,0,0.998667,0.05162
3,1.0,-1.720193,2.192325,0,1,0,0,0,0,0.99763,0.068802
4,1.0,-1.717031,2.180168,0,0,1,0,0,0,0.996298,0.085965


In [4]:
gpois_mdl = sm.GeneralizedPoisson(data.y, data.x_df)

In [5]:
gpois_res = gpois_mdl.fit()

Optimization terminated successfully.
         Current function value: 6.042689
         Iterations: 20
         Function evaluations: 23
         Gradient evaluations: 23


In [6]:
print(gpois_res.summary2())

                       Results: GeneralizedPoisson
Model:                 GeneralizedPoisson  Pseudo R-squared:  0.046      
Dependent Variable:    y                   AIC:               13257.4897 
Date:                  2022-03-31 14:11    BIC:               13317.4718 
No. Observations:      1095                Log-Likelihood:    -6616.7    
Df Model:              10                  LL-Null:           -6938.4    
Df Residuals:          1084                LLR p-value:       9.1750e-132
Converged:             1.0000              Scale:             1.0000     
-------------------------------------------------------------------------
                          Coef.  Std.Err.    z     P>|z|   [0.025  0.975]
-------------------------------------------------------------------------
intercept                 5.2691   0.0390 135.0435 0.0000  5.1927  5.3456
days_since_start          0.3443   0.0149  23.0411 0.0000  0.3150  0.3736
days_since_start_squared -0.1218   0.0140  -8.7024 0.0000 -0.

## Extend Generic Likelihood Model

In [7]:
def _ll_latentnorm(y, X, beta, alph):
    mu = (np.dot(X, beta)) # Should we exponentiate this??
    sigma = np.exp(np.dot(X, alph))
    Phi_bar = stats.norm(mu, sigma).cdf(np.log1p(y))
    Phi_underbar = stats.norm(mu, sigma).cdf(np.log(y))
    ll = np.log(Phi_bar - Phi_underbar)
    #print(Phi_bar.sum())
    #print(Phi_underbar.sum())
    #print(ll.sum())
    return ll

def _gradutils(y, X, beta, alph):
    mu = (np.dot(X, beta)) # Should we exponentiate this??
    sigma = np.exp(np.dot(X, alph))

    z_bar = (np.log1p(y) - mu) / sigma 
    z_underbar = (np.log(y) - mu) / sigma

    Phi_bar = stats.norm.cdf(z_bar)
    Phi_underbar = stats.norm.cdf(z_underbar)
    Phi  = Phi_bar - Phi_underbar

    phi_bar = stats.norm.pdf(z_bar)
    phi_underbar = stats.norm.pdf(z_underbar)
    phi = phi_bar - phi_underbar
    
    kappa_0 = phi / Phi
    kappa_1 = (z_bar * phi_bar - z_underbar * phi_underbar) / Phi
    kappa_2 = (z_bar**2 * phi_bar - z_underbar**2 * phi_underbar) / Phi
    kappa_3 = (z_bar**3 * phi_bar - z_underbar**3 * phi_underbar) / Phi
    
    return kappa_0, kappa_1, kappa_2, kappa_3, mu, sigma
    
def _vec_matrix_multiply(a, B):
    return np.apply_along_axis(lambda x: x * a, 0, B)

In [53]:
penalty = 1.

class MyLatentNormal(GenericLikelihoodModel):
    def __init__(self, endog, exog, **kwds):
        super(MyLatentNormal, self).__init__(endog, exog, **kwds)
        self.nparams = 22
        
        
    def nloglikeobs(self, params):
        exog = self.exog
        endog = self.endog
        beta = params[:11] #first 11 are for mu
        alph = params[11:] #last 11 are for sigma
        ll = _ll_latentnorm(endog, exog, beta, alph)
        params_alt = params.copy()
        params_alt[0] = 0.
#         params_alt[11] = 0.
        return -ll + penalty*np.sum(params_alt**2)
    
    def score(self, params):
        y = self.endog
        X = self.exog
        beta = params[:11] #first 11 are for mu
        alph = params[11:] #last 11 are for sigma
        
        kappa_0, kappa_1, kappa_2, kappa_3, mu, sigma = _gradutils(y, X, beta, alph)
        
        beta_alt = beta.copy()
        beta_alt[0] = 0
        alph_alt = alph.copy()
#         alph_alt[0] = 0.
        
        grad_beta = -(kappa_0 / sigma) @ X - penalty*2 * beta_alt
        grad_alph = -kappa_1 @ X - penalty*2 * alph_alt
        
        return np.append(grad_beta, grad_alph)
    
    def hessian(self, params):
        y = self.endog
        X = self.exog
        beta = params[:11] #first 11 are for mu
        alph = params[11:] #last 11 are for sigma
        
        kappa_0, kappa_1, kappa_2, kappa_3, mu, sigma = _gradutils(y, X, beta, alph)
        
        #xx = np.einsum('ij,ij-> i', X, X) # row-wise dot product?
        k_beta = (kappa_0**2 + kappa_1) / sigma**2
        k_alph = kappa_1 * (kappa_1 - 1) + kappa_3
        k_beta_alph = (kappa_2 + kappa_0*(kappa_1 - 1)) / sigma
        H_beta = np.zeros([11, 11])
        H_alph = np.zeros([11, 11])
        H_beta_alph = np.zeros([11, 11])
                  
        for i in range(X.shape[0]):
            x = X[i]
            xxT = np.outer(x, x)
            H_beta -= k_beta[i] * xxT
            H_alph -= k_alph[i] * xxT
            H_beta_alph -= k_beta_alph[i] * xxT
        
        H_all = np.block([[H_beta, H_beta_alph], [H_beta_alph.T, H_alph]]) # 22 x 22
        penalty_matrix = penalty*2 * np.eye(22)
        penalty_matrix[0, 0] = 0.
#         penalty_matrix[11, 11] = 0.

        return H_all - penalty_matrix
        
    
    def fit(self, start_params=None, maxiter=10000, maxfun=5000, **kwds):
        if start_params is None:
            # Reasonable starting values
            start_params = np.zeros(self.nparams)
            start_params[0] = np.log(np.mean(self.endog)) # beta
        return super(MyLatentNormal, self).fit(start_params=start_params,
                                     maxiter=maxiter, maxfun=maxfun,
                                     **kwds)

In [54]:
names = list(data.x_df)
names_alpha = [s + "_alpha" for s in names]

In [55]:
mod = MyLatentNormal(data.y, data.x_df, extra_params_names=names_alpha)

In [57]:
# start_params = np.zeros(22)
# start_params[:11] = gpois_res.params[:11] # Warm start with estimates of betas
# start_params[11] = -1. # It's really sensitive to this starting value
mod_res = mod.fit(method="newton", maxiter=100, start_params=start_params) 

Optimization terminated successfully.
         Current function value: 8.787365
         Iterations 8


In [34]:
print(mod_res.summary())
print(f"True Beta: {data.beta}")
print(f"True Alpha: {data.alpha}")

                            MyLatentNormal Results                            
Dep. Variable:                      y   Log-Likelihood:                -8013.0
Model:                 MyLatentNormal   AIC:                         1.605e+04
Method:            Maximum Likelihood   BIC:                         1.610e+04
Date:                Thu, 31 Mar 2022                                         
Time:                        14:17:34                                         
No. Observations:                1095                                         
Df Residuals:                    1084                                         
Df Model:                          10                                         
                                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
intercept                          4.6898        nan        nan        nan         nan     

In [17]:
# Negative Hessian is PD at optimal values
starting_vals = np.array([ 5., 0.4, -0.17, 0.33, 0.36, 0.4, 0.39, 0.26, 0.07, 0.2, -0.1, -1., -0.2, -0.03, 0.33, 0.36, 0.4, 0.39, 0.26, 0.07, 0.16, -0.05])
starting_vals = np.array([ 5., 0.4, -0.17, 0.33, 0.36, 0.4, 0.39, 0.26, 0.07, 0.2, -0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
np.linalg.eigvals(-mod.hessian(starting_vals))

array([1239.40909447, 1169.52333052, 1111.64149088,  864.30006269,
        672.26168113,  533.12565814,  488.33293005,  379.91785823,
        277.77613696,    7.36287204,   21.80117537,  220.94008611,
         57.09792013,   68.82228866,   83.88336387,  104.34541364,
        112.89569071,  153.64095429,  161.01961849,  155.23095091,
        158.95180193,  157.82744299])

In [None]:
bs = mod_res.bootstrap(nrep=10, method="ncg", store = 0, disp = 0) # we can bootstrap SE
bs[0] #mean estimates
bs[1] # se