In [3]:
# Translated to .py by Evanthia Kazagli
# 2017
# Adapted to PandasBiogeme by Michel Bierlaire
# Thu Nov  1 17:50:01 2018

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.models import lognested, loglogit
from biogeme.expressions import Beta, DefineVariable

df = pd.read_csv("swissmetro.dat",'\t')
database = db.Database("swissmetro",df)
pd.options.display.float_format = '{:.3g}'.format
globals().update(database.variables)


exclude = ((  PURPOSE   !=  1  ) * (  PURPOSE   !=  3  ) + (  CHOICE   ==  0  ) + ( AGE == 6 ))>0
database.remove(exclude)
  
#Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
ASC_CAR	 = Beta('ASC_CAR',0,None,None,0)
ASC_SBB	 = Beta('ASC_SBB',0,None,None,1)
ASC_SM	 = Beta('ASC_SM',0,None,None,0)
B_HE	 = Beta('B_HE',0,None,None,0)
B_COST	 = Beta('B_COST',0,None,None,0)
B_CAR_TIME	 = Beta('B_CAR_TIME',0,None,None,0)
B_SBB_TIME	 = Beta('B_SBB_TIME',0,None,None,0)
B_SM_TIME	 = Beta('B_SM_TIME',0,None,None,0)
B_SENIOR	 = Beta('B_SENIOR',0,None,None,0)
B_GA	 = Beta('B_GA',0,None,None,0)
B_MALE = Beta('B_MALE',0,None,None,0)
B_INCOME = Beta('B_INCOME',0,None,None,0)
B_SEATS = Beta('B_SEATS',0,None,None,0)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
# nest parameter
MU_rail_based = Beta('MU_rail_based',1,1,None,0)

# Define here arithmetic expressions for name that are not directly 
# available from the data
INCOME_FIL  = DefineVariable('INCOME_FIL', INCOME   !=  4 ,database)
SENIOR  = DefineVariable('SENIOR', AGE   ==  5 ,database)
CAR_AV_SP  = DefineVariable('CAR_AV_SP', CAR_AV    *  (  SP   !=  0  ),database)
SM_COST  = DefineVariable('SM_COST', SM_CO   * (  GA   ==  0  ),database)
TRAIN_AV_SP  = DefineVariable('TRAIN_AV_SP', TRAIN_AV    *  (  SP   !=  0  ),database)
TRAIN_COST  = DefineVariable('TRAIN_COST', TRAIN_CO   * (  GA   ==  0  ),database)

TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED',\
                                 TRAIN_TT / 100.0,database)
TRAIN_COST_SCALED = DefineVariable('TRAIN_COST_SCALED',\
                                   TRAIN_COST / 100,database)
SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0,database)
SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100,database)
CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100,database)
CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100,database)
TRAIN_HE_SCALED = DefineVariable('TRAIN_HE_SCALED', TRAIN_HE / 100,database)
SM_HE_SCALED = DefineVariable('SM_HE_SCALED', SM_HE / 100,database)

# Utilities
Car_SP = ASC_CAR + B_CAR_TIME * CAR_TT_SCALED + B_COST * CAR_CO_SCALED + B_SENIOR * SENIOR
SBB_SP = ASC_SBB + B_SBB_TIME * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED + B_HE * TRAIN_HE_SCALED + B_GA * GA
SM_SP = ASC_SM + B_SM_TIME * SM_TT_SCALED + B_COST * SM_COST_SCALED + B_HE * SM_HE_SCALED + B_GA * GA + B_SENIOR * SENIOR
#             + B_SENIOR * SENIOR + B_MALE * MALE + B_INCOME * INCOME_FIL
V = {3: Car_SP,1: SBB_SP,2: SM_SP}
av = {3: CAR_AV_SP,1: TRAIN_AV_SP,2: SM_AV}

# Definition of nests
# innovative = 1.0, [2]
# classic = MU_classic, [1, 3]

rail_based = MU_rail_based, [1, 2]
car = 1.0, [3]

# nests = classic, innovative
nests = rail_based, car

# NL (Nested Logit model), with availability conditions
logprob = lognested(V,av,nests,CHOICE)

biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "MEV_SM_NL"
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

# Compare with the logit model
logprob_logit = loglogit(V,av,CHOICE)
biogeme_logit  = bio.BIOGEME(database,logprob_logit)
biogeme_logit.modelName = "MEV_SM_NL_logit"
results_logit = biogeme_logit.estimate()

ll_logit = results_logit.data.logLike
rhobar_logit = results_logit.data.rhoBarSquare
ll_nested = results.data.logLike
rhobar_nested = results.data.rhoBarSquare

print(f"LL logit:  {ll_logit:.3f}  rhobar: {rhobar_logit:.3f}  Parameters: {results_logit.data.nparam}")
print(f"LL nested: {ll_nested:.3f}  rhobar: {rhobar_nested:.3f}  Parameters: {results.data.nparam}")
lr = -2 * (ll_logit - ll_nested)
print(f"Likelihood ratio: {lr:.3f}")

               Value  Active bound  Std err  t-test  p-value  Rob. Std err  \
ASC_CAR        0.034             0    0.149   0.228    0.819         0.172   
ASC_SM         0.303             0     0.12    2.53   0.0114          0.13   
B_CAR_TIME     -1.15             0   0.0671   -17.1        0         0.125   
B_COST        -0.984             0    0.055   -17.9        0        0.0757   
B_GA           0.919             0    0.186    4.95 7.61e-07         0.189   
B_HE          -0.573             0    0.116   -4.96 7.12e-07         0.119   
B_SBB_TIME     -1.46             0    0.142   -10.3        0         0.214   
B_SENIOR       -2.08             0    0.242   -8.57        0         0.289   
B_SM_TIME      -1.14             0   0.0894   -12.7        0         0.195   
MU_rail_based      1             1    0.123    8.13 4.44e-16          0.16   

               Rob. t-test  Rob. p-value  
ASC_CAR              0.198         0.843  
ASC_SM                2.34        0.0193  
B_CAR_TIME  

In [2]:
pandasResults

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR,-0.273,0.079,-3.46,0.000533,0.0875,-3.13,0.00177
ASC_SM,0.026,0.0862,0.302,0.763,0.104,0.25,0.803
B_CAR_TIME,-0.741,0.0572,-12.9,0.0,0.117,-6.33,2.37e-10
B_COST,-0.718,0.0458,-15.7,0.0,0.0598,-12.0,0.0
B_GA,0.719,0.0997,7.22,5.32e-13,0.103,6.97,3.09e-12
B_HE,-0.352,0.065,-5.41,6.17e-08,0.0661,-5.32,1.02e-07
B_SBB_TIME,-1.04,0.0669,-15.5,0.0,0.107,-9.68,0.0
B_SM_TIME,-0.807,0.0813,-9.92,0.0,0.168,-4.79,1.63e-06
MU_classic,2.33,0.139,16.8,0.0,0.176,13.3,0.0


In [None]:
r1 = results_M1.getEstimatedParameters()
r2 = results_M2.getEstimatedParameters()
cols_ex = ['Std err', 't-test', 'p-value']
print('Linear')
print(r1.loc[:, ~r1.columns.isin(cols_ex)])
print('\nBox-Cox')
print(r2.loc[:, ~r2.columns.isin(cols_ex)])