In [10]:
# Translated to .py by Meritxell Pacheco (December 2016)
########################################################
# Updated by Evanthia Kazagli (January 2017)
########################################################
# Adapted for PandasBiogeme by Michel Bierlaire
# Fri Nov  2 15:44:49 2018
########################################################

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.models import logit
from datetime import datetime
import math

pandas = pd.read_table("swissmetro.dat")
database = db.Database("swissmetro",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables)
from biogeme.expressions import *

exclude = ((  PURPOSE   !=  1  ) * (  PURPOSE   !=  3  ) + (  CHOICE   ==  0  ))>0
database.remove(exclude)

#Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed

ASC_CAR	 = Beta('ASC_CAR',0,None,None,0)
ASC_SBB	 = Beta('ASC_SBB',0,None,None,1)
ASC_SM	 = Beta('ASC_SM',0,None,None,0)
B_CAR_COST	 = Beta('B_CAR_COST',0,None,None,0)
B_HE	 = Beta('B_HE',0,None,None,0)
B_SM_COST	 = Beta('B_SM_COST',0,None,None,0)
B_TIME	 = Beta('B_TIME',0,None,None,0)
B_TRAIN_COST	 = Beta('B_TRAIN_COST',0,None,None,0)
B_SENIOR	 = Beta('B_SENIOR',0,None,None,0)
B_GA	 = Beta('B_GA',0,None,None,0)

# Define here arithmetic expressions for variables that are not directly
# available in the data file
SENIOR  = DefineVariable('SENIOR', AGE   ==  5 ,database)
CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV    *  (  SP   !=  0  ),database)
SM_COST = DefineVariable('SM_COST', SM_CO   * (  GA   ==  0  ),database)
TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV    *  (  SP   !=  0  ),database)
TRAIN_COST = DefineVariable('TRAIN_COST', TRAIN_CO   * (  GA   ==  0  ),database)

TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED',\
                                 TRAIN_TT / 100.0,database)
TRAIN_COST_SCALED = DefineVariable('TRAIN_COST_SCALED',\
                                   TRAIN_COST / 100,database)
SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0,database)
SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100,database)
CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100,database)
CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100,database)
TRAIN_HE_SCALED = DefineVariable('TRAIN_HE_SCALED', TRAIN_HE / 100,database)
SM_HE_SCALED = DefineVariable('SM_HE_SCALED', SM_HE / 100,database)

# Utilities
Car_SP = ASC_CAR + B_TIME * CAR_TT_SCALED + B_CAR_COST * CAR_CO_SCALED + B_SENIOR * SENIOR
SBB_SP = ASC_SBB + B_TIME * TRAIN_TT_SCALED + B_TRAIN_COST * TRAIN_COST_SCALED + B_HE * TRAIN_HE_SCALED + B_GA * GA
SM_SP = ASC_SM + B_TIME * SM_TT_SCALED + B_SM_COST * SM_COST_SCALED + B_HE * SM_HE_SCALED + B_GA * GA + B_SENIOR * SENIOR

V = {3: Car_SP,1: SBB_SP,2: SM_SP}
av = {3: CAR_AV_SP,1: TRAIN_AV_SP,2: SM_AV}

# The choice model is a logit, with availability conditions
logprob = models.loglogit(V,av,CHOICE)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "logit_SM_generic"
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f'-----Parameter Statistics------')
print(pandasResults)

N = database.getNumberOfObservations()
Jn = len(V.keys())
LL0 = -N*math.log(Jn)
LLB = results.data.logLike
K = results.numberOfFreeParameters()
rho_bar_squared = 1 - (LLB-K)/LL0

print(f'\n-----Summary Statistics------')
print(f'Nbr of alternatives: {Jn}')
print(f'Nbr of free paremeters: {K}')
print(f"Nbr of observations: {N}")
# print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(0) =    {LL0}")
print(f"LL(beta) = {LLB}")
# print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"rho bar square = {rho_bar_squared}")
print(f"Output file: {results.data.htmlFileName}")

-----Parameter Statistics------
              Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
ASC_CAR      -0.632    0.121   -5.21 1.91e-07         0.143        -4.41   
ASC_SM       -0.159    0.084   -1.89   0.0584         0.106         -1.5   
B_CAR_COST   -0.934   0.0906   -10.3        0         0.117        -8.01   
B_GA          0.554    0.188    2.95  0.00314         0.191          2.9   
B_HE         -0.595    0.104    -5.7 1.21e-08         0.105        -5.65   
B_SENIOR      -1.87    0.116   -16.1        0         0.109        -17.2   
B_SM_COST     -1.04   0.0551   -18.9        0        0.0745          -14   
B_TIME        -1.12   0.0613   -18.3        0         0.121        -9.25   
B_TRAIN_COST  -2.69    0.116   -23.1        0         0.176        -15.3   

              Rob. p-value  
ASC_CAR           1.02e-05  
ASC_SM               0.134  
B_CAR_COST        1.11e-15  
B_GA               0.00373  
B_HE              1.58e-08  
B_SENIOR                 0  
B_SM

In [7]:
# Unidentified model
pandas = pd.read_table("swissmetro.dat")
database = db.Database("swissmetro",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables)
from biogeme.expressions import *

exclude = ((  PURPOSE   !=  1  ) * (  PURPOSE   !=  3  ) + (  CHOICE   ==  0  ))>0
database.remove(exclude)

#Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed

ASC_CAR	 = Beta('ASC_CAR',0,None,None,0)
ASC_SBB	 = Beta('ASC_SBB',0,None,None,0)
ASC_SM	 = Beta('ASC_SM',0,None,None,0)
B_CAR_COST	 = Beta('B_CAR_COST',0,None,None,0)
B_HE	 = Beta('B_HE',0,None,None,0)
B_SM_COST	 = Beta('B_SM_COST',0,None,None,0)
B_TIME	 = Beta('B_TIME',0,None,None,0)
B_TRAIN_COST	 = Beta('B_TRAIN_COST',0,None,None,0)
B_SENIOR	 = Beta('B_SENIOR',0,None,None,0)
B_GA	 = Beta('B_GA',0,None,None,0)

# Define here arithmetic expressions for variables that are not directly
# available in the data file
SENIOR  = DefineVariable('SENIOR', AGE   ==  5 ,database)
CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV    *  (  SP   !=  0  ),database)
SM_COST = DefineVariable('SM_COST', SM_CO   * (  GA   ==  0  ),database)
TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV    *  (  SP   !=  0  ),database)
TRAIN_COST = DefineVariable('TRAIN_COST', TRAIN_CO   * (  GA   ==  0  ),database)

TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED',\
                                 TRAIN_TT / 100.0,database)
TRAIN_COST_SCALED = DefineVariable('TRAIN_COST_SCALED',\
                                   TRAIN_COST / 100,database)
SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0,database)
SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100,database)
CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100,database)
CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100,database)
TRAIN_HE_SCALED = DefineVariable('TRAIN_HE_SCALED', TRAIN_HE / 100,database)
SM_HE_SCALED = DefineVariable('SM_HE_SCALED', SM_HE / 100,database)

# Utilities
Car_SP = ASC_CAR + B_TIME * CAR_TT_SCALED + B_CAR_COST * CAR_CO_SCALED + B_SENIOR * SENIOR
SBB_SP = ASC_SBB + B_TIME * TRAIN_TT_SCALED + B_TRAIN_COST * TRAIN_COST_SCALED + B_HE * TRAIN_HE_SCALED + B_GA * GA
SM_SP = ASC_SM + B_TIME * SM_TT_SCALED + B_SM_COST * SM_COST_SCALED + B_HE * SM_HE_SCALED + B_GA * GA + B_SENIOR * SENIOR

V = {3: Car_SP,1: SBB_SP,2: SM_SP}
av = {3: CAR_AV_SP,1: TRAIN_AV_SP,2: SM_AV}

# The choice model is a logit, with availability conditions
logprob = models.loglogit(V,av,CHOICE)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "logit_SM_generic"
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f'-----Parameter Statistics------')
print(pandasResults)

N = database.getNumberOfObservations()
Jn = len(V.keys())
LL0 = -N*math.log(Jn)
LLB = results.data.logLike
K = results.numberOfFreeParameters()
rho_bar_squared = 1 - (LLB-K)/LL0

print(f'\n-----Summary Statistics------')
print(f'Nbr of alternatives: {Jn}')
print(f'Nbr of free paremeters: {K}')
print(f"Nbr of observations: {N}")
# print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(0) =    {LL0}")
print(f"LL(beta) = {LLB}")
# print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"rho bar square = {rho_bar_squared}")
print(f"Output file: {results.data.htmlFileName}")

-----Parameter Statistics------
              Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
ASC_CAR      -0.632   0.0633   -9.98        0        0.0699        -9.03   
ASC_SBB           0   0.0639       0        1        0.0791            0   
ASC_SM       -0.159   0.0381   -4.17 3.03e-05        0.0426        -3.73   
B_CAR_COST   -0.934   0.0906   -10.3        0         0.117        -8.01   
B_GA          0.554    0.188    2.95  0.00314         0.191          2.9   
B_HE         -0.595    0.104    -5.7 1.21e-08         0.105        -5.65   
B_SENIOR      -1.87    0.116   -16.1        0         0.109        -17.2   
B_SM_COST     -1.04   0.0551   -18.9        0        0.0745          -14   
B_TIME        -1.12   0.0613   -18.3        0         0.121        -9.25   
B_TRAIN_COST  -2.69    0.116   -23.1        0         0.176        -15.3   

              Rob. p-value  
ASC_CAR                  0  
ASC_SBB                  1  
ASC_SM             0.00019  
B_CAR_COST     

In [11]:
# alternative specific variance

pandas = pd.read_table("swissmetro.dat")
database = db.Database("swissmetro",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables)
from biogeme.expressions import *

exclude = ((  PURPOSE   !=  1  ) * (  PURPOSE   !=  3  ) + (  CHOICE   ==  0  ))>0
database.remove(exclude)

#Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
ASC_CAR_mean = Beta('ASC_CAR_mean',-0.632,None,None,0)
ASC_CAR_std = Beta('ASC_CAR_std',0.0633,None,None,0)
ASC_SBB_mean = Beta('ASC_SBB_mean',0,None,None,0)
ASC_SBB_std = Beta('ASC_SBB_std',0.0639,None,None,0)
ASC_SM = Beta('ASC_SM',0,None,None,1)

B_CAR_COST	 = Beta('B_CAR_COST',-0.934,None,None,0)
B_HE	 = Beta('B_HE',-0.595,None,None,0)
B_SM_COST	 = Beta('B_SM_COST',-1.04,None,None,0)
B_TIME	 = Beta('B_TIME',-1.12,None,None,0)
B_TRAIN_COST	 = Beta('B_TRAIN_COST',-2.69,None,None,0)
B_SENIOR	 = Beta('B_SENIOR',-1.87,None,None,0)
B_GA	 = Beta('B_GA',0.554,None,None,0)

# Random parameters
ASC_CAR_random = ASC_CAR_mean + ASC_CAR_std * bioDraws('ASC_CAR_random','NORMAL')
ASC_SBB_random = ASC_SBB_mean + ASC_SBB_std * bioDraws('ASC_SBB_random','NORMAL')

# Define here arithmetic expressions for variables that are not directly
# available in the data file
SENIOR  = DefineVariable('SENIOR', AGE   ==  5 ,database)
CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV    *  (  SP   !=  0  ),database)
SM_COST = DefineVariable('SM_COST', SM_CO   * (  GA   ==  0  ),database)
TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV    *  (  SP   !=  0  ),database)
TRAIN_COST = DefineVariable('TRAIN_COST', TRAIN_CO   * (  GA   ==  0  ),database)

TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED',\
                                 TRAIN_TT / 100.0,database)
TRAIN_COST_SCALED = DefineVariable('TRAIN_COST_SCALED',\
                                   TRAIN_COST / 100,database)
SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0,database)
SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100,database)
CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100,database)
CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100,database)
TRAIN_HE_SCALED = DefineVariable('TRAIN_HE_SCALED', TRAIN_HE / 100,database)
SM_HE_SCALED = DefineVariable('SM_HE_SCALED', SM_HE / 100,database)

# Utilities

V_Car_SP = ASC_CAR_random + B_TIME * CAR_TT_SCALED + B_CAR_COST * CAR_CO_SCALED + B_SENIOR * SENIOR
V_SBB_SP = ASC_SBB_random + B_TIME * TRAIN_TT_SCALED + B_TRAIN_COST * TRAIN_COST_SCALED + B_HE * TRAIN_HE_SCALED + B_GA * GA
V_SM_SP = ASC_SM + B_TIME * SM_TT_SCALED + B_SM_COST * SM_COST_SCALED + B_HE * SM_HE_SCALED + B_GA * GA + B_SENIOR * SENIOR


V = {3: V_Car_SP,1: V_SBB_SP,2: V_SM_SP}
av = {3: CAR_AV_SP,1: TRAIN_AV_SP,2: SM_AV}

# # Choice model
prob = logit(V,av,CHOICE)
logprob = log(MonteCarlo(prob))

biogeme = bio.BIOGEME(database,logprob,numberOfDraws=1000)
biogeme.modelName = "Mixture_SM_Heteroskedastic"
start_time = datetime.now()
results = biogeme.estimate()
print(f"Estimation time: {datetime.now() - start_time}")
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"Output file: {results.data.htmlFileName}")

Estimation time: 0:04:08.162523
                Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
ASC_CAR_mean   -0.472   0.0827   -5.71  1.1e-08        0.0846        -5.58   
ASC_CAR_std  -0.00741     0.15 -0.0493    0.961         0.022       -0.337   
ASC_SBB_mean    0.159   0.0841    1.89   0.0591         0.106          1.5   
ASC_SBB_std    0.0376    0.197   0.191    0.848        0.0322         1.17   
B_CAR_COST     -0.934   0.0907   -10.3        0         0.117        -8.01   
B_GA            0.554    0.188    2.95  0.00316         0.191          2.9   
B_HE           -0.595    0.105    -5.7 1.22e-08         0.105        -5.65   
B_SENIOR        -1.87    0.116   -16.1        0         0.109        -17.2   
B_SM_COST       -1.04   0.0551   -18.9        0        0.0745          -14   
B_TIME          -1.12   0.0613   -18.3        0         0.121        -9.24   
B_TRAIN_COST    -2.69    0.116   -23.1        0         0.176        -15.3   

              Rob. p-value  
AS

In [None]:
pandasResults