In [1]:
import pandas as pd
import numpy as np
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models, tools
from biogeme.expressions import Beta, Variable, bioDraws, MonteCarlo, log, Power, exp, Derive, RandomVariable
import biogeme.distributions as dist
import scipy.stats as st
import time
import psutil

Defining CPU and RAM monitoring function

In [2]:
def monitor_system(duration_minute):
    duration = duration_minute*60 #transforming the argument so it's in minutes instead of seconds
    data = {'Time': [], 'CPU Usage (%)': [], 'RAM Usage (%)': []}
    start_time = time.time()

    while time.time() - start_time < duration:
        elapsed_time = time.time() - start_time
        cpu_percent = psutil.cpu_percent(interval=None)
        ram_info = psutil.virtual_memory()

        data['Time'].append(elapsed_time)
        data['CPU Usage (%)'].append(cpu_percent)
        data['RAM Usage (%)'].append(ram_info.percent)

        time.sleep(60) #mesure the CPU + RAM every minute

    df_monitor = pd.DataFrame(data)
    return df_monitor

Data description

The dataset `apollo_swissRouteChoiceData` comes from an actual SP survey of public transport route choice conducted in
Switzerland (Axhausen et al., 2008). A set of 388 people were faced with 9 choices each between two public transport routes, both using train (leading to 3'492 observations in the data). The two alternatives are described on the basis of travel time, travel cost, headway (time between subsequent trains/busses) and the number of interchanges. For each individual, the dataset
additionally contains information on income, car availability in the household, and whether the
journey was made for commuting, shopping, business or leisure.

See slide 79 Apollo Documentation for more info on Mixtures

Preparing the data

In [3]:
# Read the CSV file
df = pd.read_csv(r"C:\Users\alexi\Desktop\GITS\Semester-project\Data\apollo_swissRouteChoiceData.csv")
df.columns

Index(['ID', 'choice', 'tt1', 'tc1', 'hw1', 'ch1', 'tt2', 'tc2', 'hw2', 'ch2',
       'hh_inc_abs', 'car_availability', 'commute', 'shopping', 'business',
       'leisure'],
      dtype='object')

In [4]:
#fill nAn values with 0 (same as in Apollo)
df = df.fillna(0) 

#creating biogeme database

database_MMNL = db.Database('MMNL_Preference_Model', df)
globals().update(database_MMNL.variables) #transform all columns into variables

Defining the model parameters

In [5]:
#We're changing the starting parameters with the values found with Apollo, and also adding a constant to V_1 in
#order to make sure all mu's and sigma's are estimated

asc = Beta('asc', 0, None, None, 1) #fixed parameter, to 0
mu_log_b_tt = Beta('mu_log_b_tt', -1.984, None, None, 0)
sigma_log_b_tt = Beta('sigma_log_b_tt', -0.442, None, None, 0)
mu_log_b_tc = Beta('mu_log_b_tc', -1.016, None, None, 0)
sigma_log_b_tc = Beta('sigma_log_b_tc', -0.991, None, None, 0)
mu_log_b_hw = Beta('mu_log_b_hw', -2.938, None, None, 0)
sigma_log_b_hw = Beta('sigma_log_b_hw', -0.834, None, None, 0)
mu_log_b_ch = Beta('mu_log_b_tt', -0.631, None, None, 0)
sigma_log_b_ch = Beta('sigma_log_b_ch', 0.858, None, None, 0)

#no fixed parameters



Generate draws from Random Distribution (Halton Draws)

In [6]:
b_tt = -exp(mu_log_b_tt + sigma_log_b_tt*bioDraws('b_tt', 'NORMAL_HALTON5'))
b_tc = -exp(mu_log_b_tc + sigma_log_b_tc*bioDraws('b_tc', 'NORMAL_HALTON5'))
b_hw = -exp(mu_log_b_hw + sigma_log_b_hw*bioDraws('b_hw', 'NORMAL_HALTON5'))
b_ch = -exp(mu_log_b_ch + sigma_log_b_ch*bioDraws('b_ch', 'NORMAL_HALTON5'))



Defining the Model

In [7]:
#Availabilities not provided for 'apollo_choiceAnalysis', so full availability is assumed.
#We're adding an asc in order to have all other parameters estimated

V_1 = asc + b_tt * tt1 + b_tc * tc1 + b_hw * hw1 + b_ch * ch1
V_2 = b_tt * tt2 + b_tc * tc2 + b_hw * hw2 + b_ch * ch2


V_MMNL_1 = {1: V_1, 2: V_2}


Estimating the model

In [8]:
# Define the model
prob_MMNL_1 = models.logit(V_MMNL_1, None, choice)
logprob_MMNL_1 = log(MonteCarlo(prob_MMNL_1))

USER_NOTES = (
    'Example of a mixture of logit models with two alternatives, '
    'approximated using Monte-Carlo integration with Halton draws.'
)


# Estimate the model
the_biogeme_MMNL_1 = bio.BIOGEME(
    database_MMNL, logprob_MMNL_1, userNotes=USER_NOTES, parameter_file='few_draws.toml'
)

the_biogeme_MMNL_1.modelName = 'b24halton_mixture'

the_biogeme_MMNL_1.generateHtml = True  # Disable HTML file generation
the_biogeme_MMNL_1.generatePickle = False  # Disable PICKLE file generation
the_biogeme_MMNL_1.save_iterations = False  # Disable ITER file 

start_time = time.time() #mesure time of estimation

results_MMNL_1 = the_biogeme_MMNL_1.estimate()

end_time = time.time()
elapsed_time = end_time - start_time

# Output + time taken
print(results_MMNL_1.getEstimatedParameters())

print(f"Elapsed Time: {elapsed_time} seconds")

Obsolete syntax. Use generate_html instead of generateHtml
Obsolete syntax. Use generate_pickle instead of generatePickle


KeyboardInterrupt: 

In [None]:
# Retrieve the general statistics from the results
general_stats_model_MMNL_1 = results_MMNL_1.getGeneralStatistics()
print(results_MMNL_1.printGeneralStatistics())