In [1]:
import pandas as pd

import biogeme.biogeme_logging as blog
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta, bioDraws, log, MonteCarlo
from biogeme.parameters import Parameters
import biogeme.database as db
from biogeme.expressions import Variable

In [2]:
logger = blog.get_screen_logger(level=blog.INFO)
logger.info('Example b05normal_mixtures.py')

Example b05normal_mixtures.py 


In [3]:
df = pd.read_table("http://transp-or.epfl.ch/data/swissmetro.dat", sep='\t')
database = db.Database('swissmetro', df)

GROUP = Variable('GROUP')
SURVEY = Variable('SURVEY')
SP = Variable('SP')
ID = Variable('ID')
PURPOSE = Variable('PURPOSE')
FIRST = Variable('FIRST')
TICKET = Variable('TICKET')
WHO = Variable('WHO')
LUGGAGE = Variable('LUGGAGE')
AGE = Variable('AGE')
MALE = Variable('MALE')
INCOME = Variable('INCOME')
GA = Variable('GA')
ORIGIN = Variable('ORIGIN')
DEST = Variable('DEST')
TRAIN_AV = Variable('TRAIN_AV')
CAR_AV = Variable('CAR_AV')
SM_AV = Variable('SM_AV')
TRAIN_TT = Variable('TRAIN_TT')
TRAIN_CO = Variable('TRAIN_CO')
TRAIN_HE = Variable('TRAIN_HE')
SM_TT = Variable('SM_TT')
SM_CO = Variable('SM_CO')
SM_HE = Variable('SM_HE')
SM_SEATS = Variable('SM_SEATS')
CAR_TT = Variable('CAR_TT')
CAR_CO = Variable('CAR_CO')
CHOICE = Variable('CHOICE')

In [4]:
exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
database.remove(exclude)

SM_COST = database.define_variable('SM_COST', SM_CO * (GA == 0))
TRAIN_COST = database.define_variable('TRAIN_COST', TRAIN_CO * (GA == 0))
CAR_AV_SP = database.define_variable('CAR_AV_SP', CAR_AV * (SP != 0))
TRAIN_AV_SP = database.define_variable('TRAIN_AV_SP', TRAIN_AV * (SP != 0))
TRAIN_TT_SCALED = database.define_variable('TRAIN_TT_SCALED', TRAIN_TT / 100)
TRAIN_COST_SCALED = database.define_variable('TRAIN_COST_SCALED', TRAIN_COST / 100)
SM_TT_SCALED = database.define_variable('SM_TT_SCALED', SM_TT / 100)
SM_COST_SCALED = database.define_variable('SM_COST_SCALED', SM_COST / 100)
CAR_TT_SCALED = database.define_variable('CAR_TT_SCALED', CAR_TT / 100)
CAR_CO_SCALED = database.define_variable('CAR_CO_SCALED', CAR_CO / 100)

In [5]:
database.data.shape

(6768, 38)

In [None]:
B_COST = Beta('B_COST', 0, None, None, 0)
B_TIME = Beta('B_TIME', 0, None, None, 0)
# B_TIME_S = Beta('B_TIME_S', 1, None, None, 0)
# B_TIME_RND = B_TIME + B_TIME_S * bioDraws('b_time_rnd', 'NORMAL')


ASC_CAR_mu = Beta('ASC_CAR_mu', 0, None, None, 0)
ASC_TRAIN_mu = Beta('ASC_TRAIN_mu', 0, None, None, 0)
ASC_SM_mu = Beta('ASC_SM_mu', 0, None, None, 1)

ASC_CAR_var = Beta('ASC_CAR_var', 1, None, None, 0)
ASC_TRAIN_var = Beta('ASC_TRAIN_var', 0, None, None, 1)
ASC_SM_var = Beta('ASC_SM_var', 1, None, None, 0)

ASC_SM_draws = bioDraws('ASC_SM_draws', 'NORMAL_HALTON3')
ASC_TRAIN_draws = bioDraws('ASC_TRAIN_draws', 'NORMAL_HALTON3')
ASC_CAR_draws = bioDraws('ASC_CAR_draws', 'NORMAL_HALTON3')

ASC_CAR = ASC_CAR_mu + ASC_CAR_var * ASC_CAR_draws  # Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = ASC_TRAIN_mu + ASC_TRAIN_var * ASC_TRAIN_draws  # Beta('ASC_TRAIN', 0, None, None, 0)
ASC_SM = ASC_SM_mu + ASC_SM_var * ASC_SM_draws  # Beta('ASC_SM', 0, None, None, 0)

V1 = ASC_TRAIN + B_TIME * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED
V2 = ASC_SM + B_TIME * SM_TT_SCALED + B_COST * SM_COST_SCALED
V3 = ASC_CAR + B_TIME * CAR_TT_SCALED + B_COST * CAR_CO_SCALED

V = {1: V1, 2: V2, 3: V3}
av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}
prob = models.logit(V, av, CHOICE)
logprob = log(MonteCarlo(prob))

the_biogeme = bio.BIOGEME(
    database, logprob, number_of_draws=1000, seed=1223
)
the_biogeme.modelName = 'b05normal_mixture'
the_biogeme.generate_pickle = False
the_biogeme.generate_html = False

results = the_biogeme.estimate()

print(results.short_summary())

pandas_results = results.get_estimated_parameters()
pandas_results

Biogeme parameters read from biogeme.toml. 
