Mixtures of Logit Swissmetro


Options for the few_draws.toml:
- Second derivative set to 0
- Number of draws: 100 (for now)

In [2]:
import pandas as pd
import numpy as np
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta, Variable, bioDraws, MonteCarlo, log, Power, exp, Derive, RandomVariable, PanelLikelihoodTrajectory
from biogeme.tools import TemporaryFile

Preparing the data

In [3]:
url = "http://transp-or.epfl.ch/data/swissmetro.dat"

# Read the data into a DataFrame
df = pd.read_csv(url, sep='\t')
database_swissmetro = db.Database('swissmetro', df)

#Definition of the variables:

GROUP = Variable('GROUP')
SURVEY = Variable('SURVEY')
SP = Variable('SP')
ID = Variable('ID')
PURPOSE = Variable('PURPOSE')
FIRST = Variable('FIRST')
TICKET = Variable('TICKET')
WHO = Variable('WHO')
LUGGAGE = Variable('LUGGAGE')
AGE = Variable('AGE')
MALE = Variable('MALE')
INCOME = Variable('INCOME')
GA = Variable('GA')
ORIGIN = Variable('ORIGIN')
DEST = Variable('DEST')
TRAIN_AV = Variable('TRAIN_AV')
CAR_AV = Variable('CAR_AV')
SM_AV = Variable('SM_AV')
TRAIN_TT = Variable('TRAIN_TT')
TRAIN_CO = Variable('TRAIN_CO')
TRAIN_HE = Variable('TRAIN_HE')
SM_TT = Variable('SM_TT')
SM_CO = Variable('SM_CO')
SM_HE = Variable('SM_HE')
SM_SEATS = Variable('SM_SEATS')
CAR_TT = Variable('CAR_TT')
CAR_CO = Variable('CAR_CO')
CHOICE = Variable('CHOICE')

#We estimate the parameters of the model using all observations in the data set associated with work trips. 
#Observations such that the dependent variable CHOICE is 0 are also removed.

exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
database_swissmetro.remove(exclude)

#Definition of new variables:
SM_COST = database_swissmetro.DefineVariable('SM_COST', SM_CO * (GA == 0))
TRAIN_COST = database_swissmetro.DefineVariable('TRAIN_COST', TRAIN_CO * (GA == 0))
CAR_AV_SP = database_swissmetro.DefineVariable('CAR_AV_SP', CAR_AV * (SP != 0))
TRAIN_AV_SP = database_swissmetro.DefineVariable('TRAIN_AV_SP', TRAIN_AV * (SP != 0))
TRAIN_TT_SCALED = database_swissmetro.DefineVariable('TRAIN_TT_SCALED', TRAIN_TT / 100)
TRAIN_COST_SCALED = database_swissmetro.DefineVariable('TRAIN_COST_SCALED', TRAIN_COST / 100)
SM_TT_SCALED = database_swissmetro.DefineVariable('SM_TT_SCALED', SM_TT / 100)
SM_COST_SCALED = database_swissmetro.DefineVariable('SM_COST_SCALED', SM_COST / 100)
CAR_TT_SCALED = database_swissmetro.DefineVariable('CAR_TT_SCALED', CAR_TT / 100)
CAR_CO_SCALED = database_swissmetro.DefineVariable('CAR_CO_SCALED', CAR_CO / 100)

database_swissmetro.panel('ID') 



Defining Model Parameters

In [4]:
ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
ASC_SM = Beta('ASC_SM', 0, None, None, 1) #Setting it to 0, no estimation
B_COST = Beta('B_COST', 0, None, None, 0)

Defining Random parameter, with Halton Draw, for Monte-Carlo Simulation

In [45]:
#Importing the coefficients from Apollo

df_coeff = pd.read_csv(r"C:\Users\alexi\Desktop\GITS\Semester-project\Code\Apollo\Swissmetro\random_coefficients_Apollo .csv", index_col=0) 
    #indicating that the first columns is index, 752 rows x 100 columns
Random_Coefficients = db.Database('Random_Coefficients', df_coeff)




Help on class bioDraws in module biogeme.expressions:

class bioDraws(Elementary)
 |  bioDraws(name, drawType)
 |  
 |  Draws for Monte-Carlo integration
 |  
 |  Method resolution order:
 |      bioDraws
 |      Elementary
 |      Expression
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __init__(self, name, drawType)
 |      Constructor
 |      
 |      :param name: name of the random variable with a series of draws.
 |      :type name: string
 |      :param drawType: type of draws.
 |      :type drawType: string
 |  
 |  __str__(self)
 |      string method
 |      
 |      :return: name of the expression
 |      :rtype: str
 |  
 |  check_draws(self)
 |      List of draws defined outside of 'MonteCarlo'
 |      
 |      :return: List of names of variables
 |      :rtype: list(str)
 |  
 |  dict_of_elementary_expression(self, the_type)
 |      Extract a dict with all elementary expressions of a dpecific type
 |      
 |      :param the_type: the type of expression
 |

In [39]:
B_TIME = Beta('B_TIME', 0, None, None, 0)
B_TIME_S = Beta('B_TIME_S', 1, None, None, 0) 
B_TIME_RND = B_TIME + B_TIME_S * bioDraws('B_TIME_RND', 'Random_Coefficients')


#maybe script the draws in Apollo and import them to Biogeme (Apollo should write a list of the draws and then import it to Biogeme)

Defining the Model

In [40]:
V1 = ASC_TRAIN + B_TIME_RND * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED
V2 = ASC_SM + B_TIME_RND * SM_TT_SCALED + B_COST * SM_COST_SCALED
V3 = ASC_CAR + B_TIME_RND * CAR_TT_SCALED + B_COST * CAR_CO_SCALED

V = {1: V1, 2: V2, 3: V3}

av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}

Estimating the Model

In [44]:
prob = models.logit(V, None, CHOICE)

condprodIndiv = PanelLikelihoodTrajectory(prob) #will use panel data in this case

logprob = log(MonteCarlo(condprodIndiv))


USER_NOTES = (
    'Example of a mixture of logit models with three alternatives, '
    'approximated using Monte-Carlo integration.'
)

the_biogeme = bio.BIOGEME(
    database_swissmetro, logprob, userNotes=USER_NOTES, parameter_file='few_draws.toml'
)
the_biogeme.modelName = 'swissmetro_Halton_Mixture_imported'

results = the_biogeme.estimate()


BiogemeError: Unknown type of draws for variable B_TIME_RND: Random_Coefficients. Native types: {'UNIFORM': (<function getUniform at 0x000002BDC95F8790>, 'Uniform U[0, 1]'), 'UNIFORM_ANTI': (<function Database.uniform_antithetic at 0x000002BDC95F8A60>, 'Antithetic uniform U[0, 1]'), 'UNIFORM_HALTON2': (<function Database.halton2 at 0x000002BDC95F8EE0>, 'Halton draws with base 2, skipping the first 10'), 'UNIFORM_HALTON3': (<function Database.halton3 at 0x000002BDC95F8F70>, 'Halton draws with base 3, skipping the first 10'), 'UNIFORM_HALTON5': (<function Database.halton5 at 0x000002BDC95FE040>, 'Halton draws with base 5, skipping the first 10'), 'UNIFORM_MLHS': (<function getLatinHypercubeDraws at 0x000002BDC95F8820>, 'Modified Latin Hypercube Sampling on [0, 1]'), 'UNIFORM_MLHS_ANTI': (<function Database.MLHS_anti at 0x000002BDC95FE0D0>, 'Antithetic Modified Latin Hypercube Sampling on [0, 1]'), 'UNIFORMSYM': (<function Database.symm_uniform at 0x000002BDC95FE160>, 'Uniform U[-1, 1]'), 'UNIFORMSYM_ANTI': (<function Database.symm_uniform_antithetic at 0x000002BDC95FE1F0>, 'Antithetic uniform U[-1, 1]'), 'UNIFORMSYM_HALTON2': (<function Database.symm_halton2 at 0x000002BDC95FE280>, 'Halton draws on [-1, 1] with base 2, skipping the first 10'), 'UNIFORMSYM_HALTON3': (<function Database.symm_halton3 at 0x000002BDC95FE310>, 'Halton draws on [-1, 1] with base 3, skipping the first 10'), 'UNIFORMSYM_HALTON5': (<function Database.symm_halton5 at 0x000002BDC95FE3A0>, 'Halton draws on [-1, 1] with base 5, skipping the first 10'), 'UNIFORMSYM_MLHS': (<function Database.symm_MLHS at 0x000002BDC95FE430>, 'Modified Latin Hypercube Sampling on [-1, 1]'), 'UNIFORMSYM_MLHS_ANTI': (<function Database.symm_MLHS_anti at 0x000002BDC95FE4C0>, 'Antithetic Modified Latin Hypercube Sampling on [-1, 1]'), 'NORMAL': (<function getNormalWichuraDraws at 0x000002BDC95F89D0>, 'Normal N(0, 1) draws'), 'NORMAL_ANTI': (<function Database.normal_antithetic at 0x000002BDC95FE550>, 'Antithetic normal draws'), 'NORMAL_HALTON2': (<function Database.normal_halton2 at 0x000002BDC95FE5E0>, 'Normal draws from Halton base 2 sequence'), 'NORMAL_HALTON3': (<function Database.normal_halton3 at 0x000002BDC95FE670>, 'Normal draws from Halton base 3 sequence'), 'NORMAL_HALTON5': (<function Database.normal_halton5 at 0x000002BDC95FE700>, 'Normal draws from Halton base 5 sequence'), 'NORMAL_MLHS': (<function Database.normal_MLHS at 0x000002BDC95FE790>, 'Normal draws from Modified Latin Hypercube Sampling'), 'NORMAL_MLHS_ANTI': (<function Database.normal_MLHS_anti at 0x000002BDC95FE820>, 'Antithetic normal draws from Modified Latin Hypercube Sampling')}. User defined: {}

In [7]:
# Retrieve the general statistics from the results
general_stats = results.getGeneralStatistics()
print(results.printGeneralStatistics())

Number of estimated parameters:	5
Sample size:	752
Observations:	6768
Excluded observations:	3960
Init log likelihood:	-4977.489
Final log likelihood:	-4977.489
Likelihood ratio test for the init. model:	-0
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	-0.001
Akaike Information Criterion:	9964.977
Bayesian Information Criterion:	9988.091
Final gradient norm:	2.9032E-02
Number of draws:	100
Draws generation time:	0:00:00.068549
Types of draws:	['B_TIME_RND: NORMAL_HALTON2']
Nbr of threads:	8



In [8]:
pandas_results = results.getEstimatedParameters()
pandas_results

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR,-0.507747,0.094015,-5.400703,6.638012e-08
ASC_TRAIN,-1.873896,0.145914,-12.842441,0.0
B_COST,-1.184824,0.206141,-5.747639,9.0498e-09
B_TIME,-1.072751,0.103637,-10.351085,0.0
B_TIME_S,3.881079,0.240249,16.154387,0.0
