In [1]:
import biogeme.biogeme_logging as blog
import biogeme.biogeme as bio
from biogeme.expressions import (
    Beta,
    log,
    Elem,
    bioNormalCdf,
    Variable,
    bioMultSum,
)

from optima import (
    read_data,
    male,
    age,
    haveChildren,
    highEducation,
    SocioProfCat,
)

logger = blog.get_screen_logger(level=blog.INFO)
logger.info('Example m01_latent_variable.py')

Example m01_latent_variable.py 


In [2]:
coef_intercept = Beta('coef_intercept', 0.0, None, None, 0)
coef_age_50_less = Beta('coef_age_50_less', 0.0, None, None, 0)
coef_male = Beta('coef_male', 0.0, None, None, 0)
coef_haveChildren = Beta('coef_haveChildren', 0.0, None, None, 0)
coef_highEducation = Beta('coef_highEducation', 0.0, None, None, 0)
coef_employees = Beta('coef_employees', 0.0, None, None, 0)

In [3]:
ACTIVELIFE = (
    coef_intercept
        + coef_highEducation * highEducation
    + coef_employees * (SocioProfCat == 6)
    + coef_age_50_less * (age <= 50)
    + coef_male * male
    + coef_haveChildren * haveChildren
)

In [4]:
indicators = [
    'ResidCh01',
    'ResidCh04',
    'ResidCh05',
    'ResidCh06',
    'LifSty07',
    'LifSty10',
]

In [5]:
inter = {k: Beta(f'inter_{k}', 0, None, None, 0) for k in indicators[1:]}
inter[indicators[0]] = Beta(f'INTER_{indicators[0]}', 0, None, None, 1)

In [6]:
coefficients = {k: Beta(f'coeff_{k}', 0, None, None, 0) for k in indicators[1:]}
coefficients[indicators[0]] = Beta(f'B_{indicators[0]}', 1, None, None, 1)

In [7]:
models = {k: inter[k] + coefficients[k] * ACTIVELIFE for k in indicators}

In [8]:
sigma_star = {k: Beta(f'sigma_star_{k}', 1, 1.0e-5, None, 0) for k in indicators[1:]}
sigma_star[indicators[0]] = Beta(f'sigma_star_{indicators[0]}', 1, None, None, 1)

In [9]:
delta_1 = Beta('delta_1', 0.1, 1.0e-5, None, 0)
delta_2 = Beta('delta_2', 0.2, 1.0e-5, None, 0)
tau_1 = -delta_1 - delta_2
tau_2 = -delta_1
tau_3 = delta_1
tau_4 = delta_1 + delta_2

In [10]:
tau_1_residual = {k: (tau_1 - models[k]) / sigma_star[k] for k in indicators}
tau_2_residual = {k: (tau_2 - models[k]) / sigma_star[k] for k in indicators}
tau_3_residual = {k: (tau_3 - models[k]) / sigma_star[k] for k in indicators}
tau_4_residual = {k: (tau_4 - models[k]) / sigma_star[k] for k in indicators}
dict_prob_indicators = {
    k: {
        1: bioNormalCdf(tau_1_residual[k]),
        2: bioNormalCdf(tau_2_residual[k]) - bioNormalCdf(tau_1_residual[k]),
        3: bioNormalCdf(tau_3_residual[k]) - bioNormalCdf(tau_2_residual[k]),
        4: bioNormalCdf(tau_4_residual[k]) - bioNormalCdf(tau_3_residual[k]),
        5: 1 - bioNormalCdf(tau_4_residual[k]),
        6: 1.0,
        -1: 1.0,
        -2: 1.0,
    }
    for k in indicators
}

In [11]:
log_proba = {k: log(Elem(dict_prob_indicators[k], Variable(k))) for k in indicators}
loglike = bioMultSum(log_proba)

In [12]:
database = read_data()

In [13]:
biogeme = bio.BIOGEME(database, loglike)
biogeme.modelName = 'm01_latent_variable'

Default values of the Biogeme parameters are used. 
File biogeme.toml has been created 


In [14]:
results = biogeme.estimate()

As the model is not too complex, we activate the calculation of second derivatives. If you want to change it, change the name of the algorithm in the TOML file from "automatic" to "simple_bounds" 
*** Initial values of the parameters are obtained from the file __m01_latent_variable.iter 
Cannot read file __m01_latent_variable.iter. Statement is ignored. 
As the model is not too complex, we activate the calculation of second derivatives. If you want to change it, change the name of the algorithm in the TOML file from "automatic" to "simple_bounds" 
Optimization algorithm: hybrid Newton/BFGS with simple bounds [simple_bounds] 
** Optimization: Newton with trust region for simple bounds 
Iter.     Function    Relgrad   Radius      Rho      
    0      2.2e+04        1.2      0.5        0    - 
    1      1.6e+04       0.38        5      1.1   ++ 
    2      1.6e+04       0.38      2.5      1.1    - 
    3      1.6e+04       0.38      1.2      1.1    - 
    4      1.6e+04       0.38     0.

In [15]:
results.get_estimated_parameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
coef_age_50_less,0.002615,0.016496,0.158526,0.874042
coef_employees,-0.03956,0.021237,-1.862768,0.062495
coef_haveChildren,-0.06303,0.025933,-2.430439,0.015081
coef_highEducation,-0.069685,0.036515,-1.908384,0.056342
coef_intercept,-0.433845,0.031169,-13.919188,0.0
coef_male,0.089341,0.035363,2.5264,0.011524
coeff_LifSty07,3.571325,1.225731,2.913629,0.003573
coeff_LifSty10,1.937198,0.773894,2.503183,0.012308
coeff_ResidCh04,-0.877963,0.603826,-1.454001,0.145946
coeff_ResidCh05,1.521038,1.067413,1.424976,0.154164


In [16]:
print(results.short_summary())

Results for model m01_latent_variable
Nbr of parameters:		23
Sample size:			1906
Excluded data:			0
Final log likelihood:		-14206.46
Akaike Information Criterion:	28458.92
Bayesian Information Criterion:	28586.63

