In [9]:
import pandas as pd
import numpy as np
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models, tools
from biogeme.expressions import Beta, Variable, bioDraws, MonteCarlo, log, Power, exp, Derive, RandomVariable
import biogeme.distributions as dist
import scipy.stats as st
import time

Data description

The dataset `apollo_swissRouteChoiceData` comes from an actual SP survey of public transport route choice conducted in
Switzerland (Axhausen et al., 2008). A set of 388 people were faced with 9 choices each between two public transport routes, both using train (leading to 3'492 observations in the data). The two alternatives are described on the basis of travel time, travel cost, headway (time between subsequent trains/busses) and the number of interchanges. For each individual, the dataset
additionally contains information on income, car availability in the household, and whether the
journey was made for commuting, shopping, business or leisure.

See slide 79 Apollo Documentation for more info on Mixtures

Preparing the data

In [10]:
# Read the CSV file
df = pd.read_csv(r"C:\Users\alexi\Desktop\GITS\Semester-project\Data\apollo_swissRouteChoiceData.csv")
df.columns

Index(['ID', 'choice', 'tt1', 'tc1', 'hw1', 'ch1', 'tt2', 'tc2', 'hw2', 'ch2',
       'hh_inc_abs', 'car_availability', 'commute', 'shopping', 'business',
       'leisure'],
      dtype='object')

In [11]:
#fill nAn values with 0 (same as in Apollo)
df = df.fillna(0) 

#creating biogeme database

database_MMNL = db.Database('MMNL_Preference_Model', df)
globals().update(database_MMNL.variables) #transform all columns into variables

Defining the model parameters

In [12]:
mu_log_b_tt = Beta('mu_log_b_tt', -3, None, None, 0)
sigma_log_b_tt = Beta('sigma_log_b_tt', -0.01, None, None, 0)
mu_log_b_tc = Beta('mu_log_b_tc', -3, None, None, 0)
sigma_log_b_tc = Beta('sigma_log_b_tc', -0.01, None, None, 0)
mu_log_b_hw = Beta('mu_log_b_hw', -3, None, None, 0)
sigma_log_b_hw = Beta('sigma_log_b_hw', -0.01, None, None, 0)
mu_log_b_ch = Beta('mu_log_b_tt', -3, None, None, 0)
sigma_log_b_ch = Beta('sigma_log_b_ch', -0.01, None, None, 0)

#no fixed parameters



Generate draws from Random Distribution (Halton Draws)

In [13]:
b_tt = -exp(mu_log_b_tt + sigma_log_b_tt*bioDraws('b_tt', 'NORMAL_HALTON5'))
b_tc = -exp(mu_log_b_tc + sigma_log_b_tc*bioDraws('b_tc', 'NORMAL_HALTON5'))
b_hw = -exp(mu_log_b_hw + sigma_log_b_hw*bioDraws('b_hw', 'NORMAL_HALTON5'))
b_ch = -exp(mu_log_b_ch + sigma_log_b_ch*bioDraws('b_ch', 'NORMAL_HALTON5'))



Defining the Model

In [14]:
#Availabilities not provided for 'apollo_choiceAnalysis', so full availability is assumed.

V_1 = b_tt * tt1 + b_tc * tc1 + b_hw * hw1 + b_ch * ch1
V_2 = b_tt * tt2 + b_tc * tc2 + b_hw * hw2 + b_ch * ch2


V_MMNL = {1: V_1, 2: V_2}


Estimating the model

In [15]:
# Define the model
prob_MMNL = models.logit(V_MMNL, None, choice)
logprob_MMNL = log(MonteCarlo(prob_MMNL))

USER_NOTES = (
    'Example of a mixture of logit models with two alternatives, '
    'approximated using Monte-Carlo integration with Halton draws.'
)


#**call cpu/ram function**

# Estimate the model
the_biogeme_MMNL = bio.BIOGEME(
    database_MMNL, logprob_MMNL, userNotes=USER_NOTES, parameter_file='few_draws.toml'
)

the_biogeme_MMNL.modelName = 'b24halton_mixture'

the_biogeme_MMNL.generateHtml = True  # Disable HTML file generation
the_biogeme_MMNL.generatePickle = False  # Disable PICKLE file generation
the_biogeme_MMNL.save_iterations = False  # Disable ITER file 

start_time = time.time() #mesure time of estimation

results_MMNL = the_biogeme_MMNL.estimate()

end_time = time.time()
elapsed_time = end_time - start_time

# Output + time taken
print(results_MMNL.getEstimatedParameters())

print(f"Elapsed Time: {elapsed_time} seconds")

File few_draws.toml has been created
Obsolete syntax. Use generate_html instead of generateHtml
Obsolete syntax. Use generate_pickle instead of generatePickle


                   Value  Rob. Std err  Rob. t-test  Rob. p-value
mu_log_b_hw    -1.687205      0.238339    -7.079013  1.451950e-12
mu_log_b_tc     0.296600      0.251117     1.181121  2.375545e-01
mu_log_b_tt    -0.622556      0.254752    -2.443771  1.453464e-02
sigma_log_b_ch -5.541070      0.310897   -17.822876  0.000000e+00
sigma_log_b_hw -2.152781      0.315931    -6.814096  9.485968e-12
sigma_log_b_tc -3.781815      0.313130   -12.077470  0.000000e+00
sigma_log_b_tt -1.354288      0.277317    -4.883531  1.042027e-06
Elapsed Time: 2754.447288274765 seconds


In [17]:
# Retrieve the general statistics from the results
general_stats_model_MMNL = results_MMNL.getGeneralStatistics()
print(results_MMNL.printGeneralStatistics())

Number of estimated parameters:	7
Sample size:	3492
Excluded observations:	0
Init log likelihood:	-2253.898
Final log likelihood:	-1786.633
Likelihood ratio test for the init. model:	934.5287
Rho-square for the init. model:	0.207
Rho-square-bar for the init. model:	0.204
Akaike Information Criterion:	3587.266
Bayesian Information Criterion:	3630.374
Final gradient norm:	5.0587E+01
Number of draws:	500
Draws generation time:	0:00:07.138568
Types of draws:	['b_ch: NORMAL_HALTON5', 'b_hw: NORMAL_HALTON5', 'b_tc: NORMAL_HALTON5', 'b_tt: NORMAL_HALTON5']
Nbr of threads:	8

