In [1]:
# Translated to .py by Marti Montesinos
# Adapted to PandasBiogeme by Michel Bierlaire
# Sun Oct 21 23:41:03 2018

%reset -f
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.expressions import Beta, DefineVariable, Variable
import math

df = pd.read_csv("SwissMetro/swissmetro.dat",'\t')

v = dict()

database = db.Database("swissmetro",df)
pd.options.display.float_format = '{:.3g}'.format
v = database.variables

exclude = ((  v['PURPOSE']   !=  1  ) * (  v['PURPOSE']   !=  3  ) + (  v['CHOICE']   ==  0  ))>0
database.remove(exclude)

# Define here arithmetic expressions for name that are not directly 
# available from the data

v['CAR_AV_SP']  = DefineVariable('CAR_AV_SP', v['CAR_AV']    *  (  v['SP']   !=  0  ),database)
v['CAR_COST']  = DefineVariable('CAR_COST', v['CAR_CO'],database)
v['SM_AV_SP']  = DefineVariable('SM_AV_SP', v['SM_AV']    *  (  v['SP']   !=  0  ),database)
v['SM_COST']  = DefineVariable('SM_COST', v['SM_CO']   * (  v['GA']   ==  0  ),database)
v['TRAIN_AV_SP']  = DefineVariable('TRAIN_AV_SP', v['TRAIN_AV']    *  (  v['SP']   !=  0  ),database)
v['TRAIN_COST']  = DefineVariable('TRAIN_COST', v['TRAIN_CO']   * (  v['GA']   ==  0  ),database)

tt_scale = 1
cost_scale = 1
he_scale = 1

v['TRAIN_TT_SCALED'] = DefineVariable('TRAIN_TT_SCALED',\
                                 v['TRAIN_TT'] / tt_scale,database)
v['TRAIN_COST_SCALED'] = DefineVariable('TRAIN_COST_SCALED',\
                                   v['TRAIN_COST'] / cost_scale,database)
v['SM_TT_SCALED'] = DefineVariable('SM_TT_SCALED', v['SM_TT'] / tt_scale,database)
v['SM_COST_SCALED'] = DefineVariable('SM_COST_SCALED', v['SM_COST'] / cost_scale,database)
v['CAR_TT_SCALED'] = DefineVariable('CAR_TT_SCALED', v['CAR_TT'] / tt_scale,database)
v['CAR_CO_SCALED'] = DefineVariable('CAR_CO_SCALED', v['CAR_CO'] / cost_scale,database)
v['TRAIN_HE_SCALED'] = DefineVariable('TRAIN_HE_SCALED', v['TRAIN_HE'] / he_scale,database)
v['SM_HE_SCALED'] = DefineVariable('SM_HE_SCALED', v['SM_HE'] / he_scale,database)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [2]:
# Generic

#Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
ASC_CAR	 = Beta('ASC_CAR',0,None,None,0)
ASC_SBB	 = Beta('ASC_SBB',0,None,None,1)
ASC_SM	 = Beta('ASC_SM',0,None,None,0)
B_COST	 = Beta('B_COST',0,None,None,0)
B_HE	 = Beta('B_HE',0,None,None,0)
B_TIME	 = Beta('B_TIME',0,None,None,0)

#Utilities
v['Car_SP'] = ASC_CAR + B_TIME * v['CAR_TT_SCALED'] + B_COST * v['CAR_CO_SCALED']
v['SBB_SP'] = ASC_SBB + B_TIME * v['TRAIN_TT_SCALED'] + B_COST * v['TRAIN_COST_SCALED'] + B_HE * v['TRAIN_HE_SCALED']
v['SM_SP'] = ASC_SM + B_TIME * v['SM_TT_SCALED'] + B_COST * v['SM_COST_SCALED'] + B_HE * v['SM_HE_SCALED']
Vs = {3: v['Car_SP'],1: v['SBB_SP'],2: v['SM_SP']}
avs = {3: v['CAR_AV_SP'],1: v['TRAIN_AV_SP'],2: v['SM_AV_SP']}

# The choice model is a logit, with availability conditions
logprob = models.loglogit(Vs,avs,v['CHOICE'])
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "logit_SM_generic"
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f'-----Parameter Statistics------')
print(pandasResults)

N = database.getNumberOfObservations()
Jn = len(Vs.keys())
LL0 = -N*math.log(Jn)
LLB = results.data.logLike
K = results.numberOfFreeParameters()
rho_bar_squared = 1 - (LLB-K)/LL0

print(f'\n-----Summary Statistics------')
print(f'Nbr of alternatives: {Jn}')
print(f'Nbr of free paremeters: {K}')
print(f"Nbr of observations: {N}")
# print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(0) =    {LL0}")
print(f"LL(beta) = {LLB}")
# print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"rho bar square = {rho_bar_squared}")
print(f"Output file: {results.data.htmlFileName}")

-----Parameter Statistics------
           Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
ASC_CAR    0.189   0.0773    2.45   0.0144        0.0798         2.37   
ASC_SM     0.451   0.0697    6.47 9.66e-11        0.0932         4.84   
B_COST   -0.0108 0.000518   -20.9        0      0.000682        -15.9   
B_HE    -0.00535 0.000964   -5.55 2.78e-08      0.000983        -5.45   
B_TIME   -0.0128 0.000569   -22.4        0       0.00104        -12.2   

         Rob. p-value  
ASC_CAR        0.0177  
ASC_SM       1.32e-06  
B_COST              0  
B_HE         5.14e-08  
B_TIME              0  

-----Summary Statistics------
Nbr of alternatives: 3
Nbr of free paremeters: 5
Nbr of observations: 6768
LL(0) =    -7435.407969705767
LL(beta) = -5315.386329180514
rho bar square = 0.28445266879000164
Output file: logit_SM_generic~00.html


In [3]:
# Specific
#Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
ASC_CAR	 = Beta('ASC_CAR',0,None,None,0)
ASC_SBB	 = Beta('ASC_SBB',0,None,None,1)
ASC_SM	 = Beta('ASC_SM',0,None,None,0)
B_CAR_COST	 = Beta('B_CAR_COST',0,None,None,0)
B_SM_COST	 = Beta('B_SM_COST',0,None,None,0)
B_TRAIN_COST	 = Beta('B_TRAIN_COST',0,None,None,0)
B_HE	 = Beta('B_HE',0,None,None,0)
B_TIME	 = Beta('B_TIME',0,None,None,0)

#Utilities
v['Car_SP'] = ASC_CAR + B_TIME * v['CAR_TT_SCALED'] + B_CAR_COST * v['CAR_CO_SCALED']
v['SBB_SP'] = ASC_SBB + B_TIME * v['TRAIN_TT_SCALED'] + B_TRAIN_COST * v['TRAIN_COST_SCALED'] + B_HE * v['TRAIN_HE_SCALED']
v['SM_SP'] = ASC_SM + B_TIME * v['SM_TT_SCALED'] + B_SM_COST * v['SM_COST_SCALED'] + B_HE * v['SM_HE_SCALED']
Vs = {3: v['Car_SP'],1: v['SBB_SP'],2: v['SM_SP']}
avs = {3: v['CAR_AV_SP'],1: v['TRAIN_AV_SP'],2: v['SM_AV_SP']}

# The choice model is a logit, with availability conditions
logprob = models.loglogit(Vs,avs,v['CHOICE'])
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "logit_SM_specific"
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f'-----Parameter Statistics------')
print(pandasResults)

N = database.getNumberOfObservations()
Jn = len(Vs.keys())
LL0 = -N*math.log(Jn)
LLB = results.data.logLike
K = results.numberOfFreeParameters()
rho_bar_squared = 1 - (LLB-K)/LL0

print(f'\n-----Results------')
print(f'Nbr of alternatives: {Jn}')
print(f'Nbr of free paremeters: {K}')
print(f"Nbr of observations: {N}")
# print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(0) =    {LL0}")
print(f"LL(beta) = {LLB}")
# print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"rho bar square = {rho_bar_squared}")
print(f"Output file: {results.data.htmlFileName}")


-----Parameter Statistics------
                Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
ASC_CAR        -0.971    0.115   -8.45        0         0.134        -7.22   
ASC_SM         -0.444   0.0802   -5.53 3.14e-08         0.102        -4.34   
B_CAR_COST   -0.00949 0.000903   -10.5        0       0.00116        -8.21   
B_HE         -0.00542    0.001    -5.4 6.69e-08       0.00101        -5.35   
B_SM_COST     -0.0109 0.000526   -20.7        0      0.000703        -15.5   
B_TIME        -0.0111 0.000609   -18.2        0        0.0012        -9.26   
B_TRAIN_COST  -0.0293  0.00113   -26.1        0       0.00169        -17.3   

              Rob. p-value  
ASC_CAR           5.11e-13  
ASC_SM            1.44e-05  
B_CAR_COST        2.22e-16  
B_HE               8.6e-08  
B_SM_COST                0  
B_TIME                   0  
B_TRAIN_COST             0  

-----Results------
Nbr of alternatives: 3
Nbr of free paremeters: 7
Nbr of observations: 6768
LL(0) =    -7435

In [4]:
# Specific
#Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
ASC_CAR	 = Beta('ASC_CAR',0,None,None,0)
ASC_SBB	 = Beta('ASC_SBB',0,None,None,1)
ASC_SM	 = Beta('ASC_SM',0,None,None,0)
B_CAR_COST	 = Beta('B_CAR_COST',0,None,None,0)
B_SM_COST	 = Beta('B_SM_COST',0,None,None,0)
B_TRAIN_COST	 = Beta('B_TRAIN_COST',0,None,None,0)
B_HE	 = Beta('B_HE',0,None,None,0)
B_TIME	 = Beta('B_TIME',0,None,None,0)
B_MALE	 = Beta('B_MALE',0,None,None,0)

#Utilities
v['Car_SP'] = ASC_CAR + B_TIME * v['CAR_TT_SCALED'] + B_CAR_COST * v['CAR_CO_SCALED'] + B_MALE * v['MALE']
v['SBB_SP'] = ASC_SBB + B_TIME * v['TRAIN_TT_SCALED'] + B_TRAIN_COST * v['TRAIN_COST_SCALED'] + B_HE * v['TRAIN_HE_SCALED']
v['SM_SP'] = ASC_SM + B_TIME * v['SM_TT_SCALED'] + B_SM_COST * v['SM_COST_SCALED'] + B_HE * v['SM_HE_SCALED'] + B_MALE * v['MALE']
Vs = {3: v['Car_SP'],1: v['SBB_SP'],2: v['SM_SP']}
avs = {3: v['CAR_AV_SP'],1: v['TRAIN_AV_SP'],2: v['SM_AV_SP']}

# The choice model is a logit, with availability conditions
logprob = models.loglogit(Vs,avs,v['CHOICE'])
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "logit_SM_gender"
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f'-----Parameter Statistics------')
print(pandasResults)

N = database.getNumberOfObservations()
Jn = len(Vs.keys())
LL0 = -N*math.log(Jn)
LLB = results.data.logLike
K = results.numberOfFreeParameters()
rho_bar_squared = 1 - (LLB-K)/LL0

print(f'\n-----Results------')
print(f'Nbr of alternatives: {Jn}')
print(f'Nbr of free paremeters: {K}')
print(f"Nbr of observations: {N}")
# print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(0) =    {LL0}")
print(f"LL(beta) = {LLB}")
# print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"rho bar square = {rho_bar_squared}")
print(f"Output file: {results.data.htmlFileName}")


-----Parameter Statistics------
                Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
ASC_CAR         -1.63    0.127   -12.9        0         0.146        -11.2   
ASC_SM          -1.08   0.0947   -11.4        0         0.115        -9.46   
B_CAR_COST   -0.00962 0.000904   -10.6        0       0.00116        -8.32   
B_HE         -0.00594  0.00103   -5.78 7.64e-09       0.00104         -5.7   
B_MALE           1.07   0.0809    13.2        0        0.0815         13.1   
B_SM_COST      -0.011 0.000525     -21        0      0.000699        -15.7   
B_TIME        -0.0111 0.000611   -18.2        0        0.0012        -9.22   
B_TRAIN_COST  -0.0276  0.00113   -24.5        0        0.0017        -16.3   

              Rob. p-value  
ASC_CAR                  0  
ASC_SM                   0  
B_CAR_COST               0  
B_HE               1.2e-08  
B_MALE                   0  
B_SM_COST                0  
B_TIME                   0  
B_TRAIN_COST             0  

---

In [5]:
# Specific
#Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
ASC_CAR	 = Beta('ASC_CAR',0,None,None,0)
ASC_SBB	 = Beta('ASC_SBB',0,None,None,1)
ASC_SM	 = Beta('ASC_SM',0,None,None,0)
B_CAR_COST	 = Beta('B_CAR_COST',0,None,None,0)
B_SM_COST	 = Beta('B_SM_COST',0,None,None,0)
B_TRAIN_COST	 = Beta('B_TRAIN_COST',0,None,None,0)
B_HE	 = Beta('B_HE',0,None,None,0)
B_TIME	 = Beta('B_TIME',0,None,None,0)
B_MALE	 = Beta('B_MALE',0,None,None,0)
B_SM_SEATS	 = Beta('B_SM_SEATS',0,None,None,0)

#Utilities
v['Car_SP'] = ASC_CAR + B_TIME * v['CAR_TT_SCALED'] + B_CAR_COST * v['CAR_CO_SCALED'] + B_MALE * v['MALE']
v['SBB_SP'] = ASC_SBB + B_TIME * v['TRAIN_TT_SCALED'] + B_TRAIN_COST * v['TRAIN_COST_SCALED'] + B_HE * v['TRAIN_HE_SCALED'] + B_SM_SEATS * v['SM_SEATS']
v['SM_SP'] = ASC_SM + B_TIME * v['SM_TT_SCALED'] + B_SM_COST * v['SM_COST_SCALED'] + B_HE * v['SM_HE_SCALED'] + B_MALE * v['MALE']
Vs = {3: v['Car_SP'],1: v['SBB_SP'],2: v['SM_SP']}
avs = {3: v['CAR_AV_SP'],1: v['TRAIN_AV_SP'],2: v['SM_AV_SP']}

# The choice model is a logit, with availability conditions
logprob = models.loglogit(Vs,avs,v['CHOICE'])
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "logit_SM_gender_seats"
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f'-----Parameter Statistics------')
print(pandasResults)

N = database.getNumberOfObservations()
Jn = len(Vs.keys())
LL0 = -N*math.log(Jn)
LLB = results.data.logLike
K = results.numberOfFreeParameters()
rho_bar_squared = 1 - (LLB-K)/LL0

print(f'\n-----Results------')
print(f'Nbr of alternatives: {Jn}')
print(f'Nbr of free paremeters: {K}')
print(f"Nbr of observations: {N}")
# print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(0) =    {LL0}")
print(f"LL(beta) = {LLB}")
# print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"rho bar square = {rho_bar_squared}")
print(f"Output file: {results.data.htmlFileName}")


-----Parameter Statistics------
                Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
ASC_CAR         -1.43    0.131   -10.9        0         0.153        -9.33   
ASC_SM         -0.872    0.101   -8.64        0         0.123        -7.07   
B_CAR_COST   -0.00963 0.000904   -10.7        0       0.00116        -8.33   
B_HE         -0.00589  0.00103    -5.7 1.18e-08       0.00105        -5.61   
B_MALE              1   0.0819    12.2        0        0.0831           12   
B_SM_COST      -0.011 0.000524     -21        0      0.000698        -15.8   
B_SM_SEATS       0.61   0.0963    6.33 2.44e-10         0.103         5.91   
B_TIME         -0.011 0.000611     -18        0        0.0012        -9.16   
B_TRAIN_COST  -0.0269  0.00113   -23.8        0       0.00169        -15.9   

              Rob. p-value  
ASC_CAR                  0  
ASC_SM            1.51e-12  
B_CAR_COST               0  
B_HE              1.99e-08  
B_MALE                   0  
B_SM_COST    

In [6]:
# Specific
#Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
ASC_CAR	 = Beta('ASC_CAR',0,None,None,0)
ASC_SBB	 = Beta('ASC_SBB',0,None,None,1)
ASC_SM	 = Beta('ASC_SM',0,None,None,0)
B_CAR_COST	 = Beta('B_CAR_COST',0,None,None,0)
B_SM_COST	 = Beta('B_SM_COST',0,None,None,0)
B_TRAIN_COST	 = Beta('B_TRAIN_COST',0,None,None,0)
B_HE	 = Beta('B_HE',0,None,None,0)
B_TIME	 = Beta('B_TIME',0,None,None,0)
B_MALE	 = Beta('B_MALE',0,None,None,0)
B_SM_SEATS	 = Beta('B_SM_SEATS',0,None,None,0)
B_SENIOR	 = Beta('B_SENIOR',0,None,None,0)
B_GA	 = Beta('B_GA',0,None,None,0)

v['SENIOR']  = DefineVariable('SENIOR', v['AGE']   == 5,database)

#Utilities
v['Car_SP'] = ASC_CAR + B_TIME * v['CAR_TT_SCALED'] + B_CAR_COST * v['CAR_CO_SCALED'] + B_MALE * v['MALE'] + B_SENIOR * v['SENIOR']
v['SBB_SP'] = ASC_SBB + B_TIME * v['TRAIN_TT_SCALED'] + B_TRAIN_COST * v['TRAIN_COST_SCALED'] + B_HE * v['TRAIN_HE_SCALED'] + B_SM_SEATS * v['SM_SEATS'] + B_GA * v['GA']
v['SM_SP'] = ASC_SM + B_TIME * v['SM_TT_SCALED'] + B_SM_COST * v['SM_COST_SCALED'] + B_HE * v['SM_HE_SCALED'] + B_MALE * v['MALE'] + B_SENIOR * v['SENIOR'] + B_GA * v['GA']
Vs = {3: v['Car_SP'],1: v['SBB_SP'],2: v['SM_SP']}
avs = {3: v['CAR_AV_SP'],1: v['TRAIN_AV_SP'],2: v['SM_AV_SP']}

# The choice model is a logit, with availability conditions
logprob = models.loglogit(Vs,avs,v['CHOICE'])
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "logit_SM_gender_seats_senior_ga"
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f'-----Parameter Statistics------')
print(pandasResults)

N = database.getNumberOfObservations()
Jn = len(Vs.keys())
LL0 = -N*math.log(Jn)
LLB = results.data.logLike
K = results.numberOfFreeParameters()
rho_bar_squared = 1 - (LLB-K)/LL0

print(f'\n-----Results------')
print(f'Nbr of alternatives: {Jn}')
print(f'Nbr of free paremeters: {K}')
print(f"Nbr of observations: {N}")
# print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(0) =    {LL0}")
print(f"LL(beta) = {LLB}")
# print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"rho bar square = {rho_bar_squared}")
print(f"Output file: {results.data.htmlFileName}")


-----Parameter Statistics------
                Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
ASC_CAR         -1.03    0.138   -7.51    6e-14          0.16        -6.47   
ASC_SM         -0.545    0.106   -5.16 2.45e-07         0.127        -4.29   
B_CAR_COST   -0.00949 0.000907   -10.5        0       0.00117        -8.13   
B_GA            0.592    0.188    3.15  0.00163         0.194         3.06   
B_HE         -0.00636  0.00107   -5.94 2.85e-09       0.00109        -5.85   
B_MALE          0.935   0.0852      11        0        0.0862         10.8   
B_SENIOR        -1.82     0.12   -15.2        0         0.113        -16.1   
B_SM_COST     -0.0105 0.000549   -19.1        0      0.000738        -14.2   
B_SM_SEATS      0.661   0.0993    6.65 2.88e-11         0.106         6.24   
B_TIME         -0.011 0.000614     -18        0       0.00121        -9.12   
B_TRAIN_COST  -0.0246  0.00116   -21.3        0       0.00172        -14.3   

              Rob. p-value  
AS