See example at https://www.statsmodels.org/dev/examples/notebooks/generated/discrete_choice_overview.html#Poisson

In [1]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
import sys
sys.path.append("..")

In [3]:
import linearlab as ll

In [4]:
print(sm.datasets.randhie.NOTE)

::

    Number of observations - 20,190
    Number of variables - 10
    Variable name definitions::

        mdvis   - Number of outpatient visits to an MD
        lncoins - ln(coinsurance + 1), 0 <= coninsurance <= 100
        idp     - 1 if individual deductible plan, 0 otherwise
        lpi     - ln(max(1, annual participation incentive payment))
        fmde    - 0 if idp = 1; ln(max(1, MDE/(0.01 coinsurance))) otherwise
        physlm  - 1 if the person has a physical limitation
        disea   - number of chronic diseases
        hlthg   - 1 if self-rated health is good
        hlthf   - 1 if self-rated health is fair
        hlthp   - 1 if self-rated health is poor
        (Omitted category is excellent self-rated health)



In [5]:
randhie = sm.datasets.randhie.load().data

In [6]:
randhie

Unnamed: 0,mdvis,lncoins,idp,lpi,fmde,physlm,disea,hlthg,hlthf,hlthp
0,0,4.615120,1,6.907755,0.000000,0.000000,13.73189,1,0,0
1,2,4.615120,1,6.907755,0.000000,0.000000,13.73189,1,0,0
2,0,4.615120,1,6.907755,0.000000,0.000000,13.73189,1,0,0
3,0,4.615120,1,6.907755,0.000000,0.000000,13.73189,1,0,0
4,0,4.615120,1,6.907755,0.000000,0.000000,13.73189,1,0,0
...,...,...,...,...,...,...,...,...,...,...
20185,2,0.000000,0,5.377498,0.000000,0.144292,10.57626,0,0,0
20186,0,0.000000,0,5.377498,0.000000,0.144292,10.57626,0,0,0
20187,8,3.258096,0,6.874819,8.006368,0.144292,10.57626,0,0,0
20188,8,3.258096,0,5.156178,6.542472,0.144292,10.57626,0,0,0


In [7]:
formula = "mdvis ~ lncoins + idp + lpi + fmde + physlm + disea + hlthg + hlthf + hlthp"

In [8]:
sm_pois_model = smf.glm(formula, randhie, family=sm.families.Poisson())

In [9]:
sm_pois_fit = sm_pois_model.fit()

In [10]:
sm_pois_fit.summary()

0,1,2,3
Dep. Variable:,mdvis,No. Observations:,20190.0
Model:,GLM,Df Residuals:,20180.0
Model Family:,Poisson,Df Model:,9.0
Link Function:,Log,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-62420.0
Date:,"Sat, 12 Aug 2023",Deviance:,83934.0
Time:,06:20:14,Pearson chi2:,127000.0
No. Iterations:,5,Pseudo R-squ. (CS):,0.3422
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.7004,0.011,62.741,0.000,0.678,0.722
lncoins,-0.0525,0.003,-18.216,0.000,-0.058,-0.047
idp,-0.2471,0.011,-23.272,0.000,-0.268,-0.226
lpi,0.0353,0.002,19.302,0.000,0.032,0.039
fmde,-0.0346,0.002,-21.439,0.000,-0.038,-0.031
physlm,0.2717,0.012,22.200,0.000,0.248,0.296
disea,0.0339,0.001,60.098,0.000,0.033,0.035
hlthg,-0.0126,0.009,-1.366,0.172,-0.031,0.005
hlthf,0.0541,0.015,3.531,0.000,0.024,0.084


In [11]:
ll_pois_model = ll.glm(randhie, formula, lik=ll.lik.poisson())

In [12]:
ll_pois_fit = ll_pois_model.fit()

In [13]:
ll_pois_fit.loglik

-62419.58856444893

In [14]:
ll_pois_fit.beta_grouped

mu  Intercept    0.700353
    lncoins     -0.052535
    idp         -0.247087
    lpi          0.035290
    fmde        -0.034578
    physlm       0.271714
    disea        0.033941
    hlthg       -0.012635
    hlthf        0.054056
    hlthp        0.206115
dtype: float64

Note that statsmodels does not include the softplusinv link function, so we can't easily compare.

In [15]:
ll_pois_softmax_model = ll.glm(randhie, formula, lik=ll.lik.poisson(ll.link.softplusinv))

In [16]:
ll_pois_softmax_fit = ll_pois_softmax_model.fit()

In [17]:
ll_pois_softmax_fit.loglik

-62357.706991445295

In [18]:
ll_pois_softmax_fit.beta_grouped

mu  Intercept    1.672007
    lncoins     -0.179115
    idp         -0.806327
    lpi          0.120109
    fmde        -0.111938
    physlm       1.038660
    disea        0.123859
    hlthg       -0.122138
    hlthf        0.089108
    hlthp        1.124529
dtype: float64