In [None]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, Variable
from biogeme.models import loglogit
from biogeme.tools import likelihood_ratio_test

from data_preparation import *

# Model 0

In [None]:
# Load the data 
df = pd.read_csv("lpmc01.dat", sep='\t')
database = db.Database('lpmc01', df)

# Define ASCs
ASC_CYCLE = Beta('ASC_CYCLE', 0, None, None, 0)
ASC_PT = Beta('ASC_PT', 0, None, None, 0)
ASC_DRIVE = Beta('ASC_DRIVE', 0, None, None, 0)

# Define generic parameters for cost and travel time
B_COST = Beta('B_COST', 0, None, None, 0)
B_TIME = Beta('B_TIME', 0, None, None, 0)

# Define utility functions for each alternative
V_WALK =  B_TIME * dur_walking
V_CYCLE = ASC_CYCLE + B_TIME * dur_cycling
V_PT = ASC_PT + B_COST * cost_transit + B_TIME * dur_pt
V_DRIVE = ASC_DRIVE + B_COST * cost_driving + B_TIME * dur_driving

# Assume every mode of transport is available
availability_walk = 1  
availability_cycle = 1  
availability_pt = 1     
availability_drive = 1

availability = {
    1: availability_walk,   # Walking
    2: availability_cycle,  # Cycling
    3: availability_pt,     # Public Transport
    4: availability_drive   # Driving
}

# Associate utility functions with the mode choice
V = {
    1: V_WALK,    # Walking
    2: V_CYCLE,   # Cycling
    3: V_PT,      # Public Transport
    4: V_DRIVE    # Driving
}

# Specify the model using a log-logit function
model_0 = loglogit(V, availability, travel_mode)

# Create Biogeme object
biogeme = bio.BIOGEME(database, model_0)
biogeme.modelName = "Model_0"

# Estimate parameters
results = biogeme.estimate()

# Display estimation results
print("Estimation results for Model 0:")
print(results.get_estimated_parameters())

# Optional: To calculate number of rows with car ownership 0 but driving mode chosen
driving_without_car = df[(df['car_ownership'] == 0) & (df['travel_mode'] == 4)]
num_rows_driving_without_car = driving_without_car.shape[0]
#print("Number of rows where car ownership is 0 but travel mode is driving:", num_rows_driving_without_car)

File biogeme.toml has been created


Estimation results for Model 0:
              Value  Rob. Std err  Rob. t-test  Rob. p-value
ASC_CYCLE -3.778175      0.103171   -36.620564           0.0
ASC_DRIVE -1.288003      0.079431   -16.215324           0.0
ASC_PT    -0.525660      0.054777    -9.596426           0.0
B_COST    -0.190833      0.014635   -13.039920           0.0
B_TIME    -5.598770      0.203407   -27.524922           0.0


In [27]:
# Define alternative-specific parameters for travel time
B_TIME_WALK = Beta('B_TIME_WALK', 0, None, None, 0)
B_TIME_CYCLE = Beta('B_TIME_CYCLE', 0, None, None, 0)
B_TIME_PT = Beta('B_TIME_PT', 0, None, None, 0)
B_TIME_DRIVE = Beta('B_TIME_DRIVE', 0, None, None, 0)

# Update utility functions with alternative-specific time parameters
V_WALK = B_TIME_WALK * dur_walking
V_CYCLE = ASC_CYCLE + B_TIME_CYCLE * dur_cycling
V_PT = ASC_PT + B_COST * cost_transit + B_TIME_PT * dur_pt
V_DRIVE = ASC_DRIVE + B_COST * cost_driving + B_TIME_DRIVE * dur_driving

# Redefine the model
model_1 = loglogit({1: V_WALK, 2: V_CYCLE, 3: V_PT, 4: V_DRIVE}, availability, travel_mode)

# Create Biogeme object for Model 1
biogeme_model_1 = bio.BIOGEME(database, model_1)
biogeme_model_1.modelName = "Model_1"

# Estimate parameters for Model 1
results_model_1 = biogeme_model_1.estimate()

# Display estimation results
print("Estimation results for Model 1:")
print(results_model_1.get_estimated_parameters())

Estimation results for Model 1:
                 Value  Rob. Std err  Rob. t-test  Rob. p-value
ASC_CYCLE    -4.602378      0.197139   -23.345861           0.0
ASC_DRIVE    -2.115318      0.144923   -14.596201           0.0
ASC_PT       -2.599549      0.146553   -17.737963           0.0
B_COST       -0.180812      0.017547   -10.304242           0.0
B_TIME_CYCLE -6.462945      0.483849   -13.357359           0.0
B_TIME_DRIVE -6.623754      0.379926   -17.434326           0.0
B_TIME_PT    -3.494668      0.244093   -14.316929           0.0
B_TIME_WALK  -9.065278      0.456118   -19.874847           0.0


In [37]:
# Retrieve log-likelihoods and number of parameters
model_0_loglike = results.data.logLike       # Log-likelihood for Model 0
model_0_numParam = len(results.get_beta_values())   # Number of parameters in Model 0

model_1_loglike = results_model_1.data.logLike       # Log-likelihood for Model 1
model_1_numParam = len(results_model_1.get_beta_values())   # Number of parameters in Model 1

# Perform the likelihood ratio test
alpha = 0.05
lr_test_result = likelihood_ratio_test(
    [model_0_loglike, model_0_numParam],
    [model_1_loglike, model_1_numParam],
    alpha # Significance level of 0.01
)

# Display the results (accessing values by index)
print("Likelihood Ratio Test Results:")
print(f" {lr_test_result[0]}")           # LR Statistic


Likelihood Ratio Test Results:
 H0 can be rejected at level 5.0%


Model_pref = model_1

# Model 2

In [29]:
# Standardize age
df['age_scaled'] = (df['age'] - df['age'].mean()) / df['age'].std()
database = db.Database('lpmc01', df)  # Reload the database with standardized age
age_scaled = Variable('age_scaled')

In [32]:
# Define interaction terms between ASCs and age_scaled
ASC_CYCLE_AGE = ASC_CYCLE + Beta('ASC_CYCLE_AGE', 0, None, None, 0) * age_scaled
ASC_PT_AGE = ASC_PT + Beta('ASC_PT_AGE', 0, None, None, 0) * age_scaled
ASC_DRIVE_AGE = ASC_DRIVE + Beta('ASC_DRIVE_AGE', 0, None, None, 0) * age_scaled

# Utility functions with interaction terms
V_WALK = B_TIME * dur_walking
V_CYCLE = ASC_CYCLE_AGE + B_TIME * dur_cycling
V_PT = ASC_PT_AGE + B_COST * cost_transit + B_TIME * dur_pt
V_DRIVE = ASC_DRIVE_AGE + B_COST * cost_driving + B_TIME * dur_driving

# Specify the model
model_2_spec1 = loglogit({1: V_WALK, 2: V_CYCLE, 3: V_PT, 4: V_DRIVE}, availability, travel_mode)

# Estimate Model 2 - Specification 1
biogeme_spec1 = bio.BIOGEME(database, model_2_spec1)
biogeme_spec1.modelName = "Model_2_spec1"
results_spec1 = biogeme_spec1.estimate()

In [31]:
# Define interaction terms between travel time parameters and age_scaled
B_TIME_WALK_AGE = Beta('B_TIME_WALK_AGE', 0, None, None, 0) * age_scaled
B_TIME_CYCLE_AGE = Beta('B_TIME_CYCLE_AGE', 0, None, None, 0) * age_scaled
B_TIME_PT_AGE = Beta('B_TIME_PT_AGE', 0, None, None, 0) * age_scaled
B_TIME_DRIVE_AGE = Beta('B_TIME_DRIVE_AGE', 0, None, None, 0) * age_scaled

# Updated utility functions with age interaction for travel time
V_WALK = (B_TIME + B_TIME_WALK_AGE) * dur_walking
V_CYCLE = ASC_CYCLE + (B_TIME + B_TIME_CYCLE_AGE) * dur_cycling
V_PT = ASC_PT + B_COST * cost_transit + (B_TIME + B_TIME_PT_AGE) * dur_pt
V_DRIVE = ASC_DRIVE + B_COST * cost_driving + (B_TIME + B_TIME_DRIVE_AGE) * dur_driving

# Specify the model
model_2_spec2 = loglogit({1: V_WALK, 2: V_CYCLE, 3: V_PT, 4: V_DRIVE}, availability, travel_mode)

# Estimate Model 2 - Specification 2
biogeme_spec2 = bio.BIOGEME(database, model_2_spec2)
biogeme_spec2.modelName = "Model_2_spec2"
results_spec2 = biogeme_spec2.estimate()

In [41]:
print("Estimation results for Model 2 - Specification 1 (Interaction with ASCs):")
print(results_spec1.get_estimated_parameters())

# Display estimation results for Model 2 - Specification 2
print("\nEstimation results for Model 2 - Specification 2 (Interaction with Travel Time):")
print(results_spec2.get_estimated_parameters())

Estimation results for Model 2 - Specification 1 (Interaction with ASCs):
                  Value  Rob. Std err  Rob. t-test  Rob. p-value
ASC_CYCLE     -3.780802      0.103558   -36.509040  0.000000e+00
ASC_CYCLE_AGE  0.085611      0.079369     1.078644  2.807462e-01
ASC_DRIVE     -1.290644      0.079768   -16.179921  0.000000e+00
ASC_DRIVE_AGE  0.297452      0.043608     6.821057  9.037437e-12
ASC_PT        -0.519548      0.054986    -9.448776  0.000000e+00
ASC_PT_AGE     0.130969      0.040394     3.242261  1.185853e-03
B_COST        -0.191840      0.014779   -12.980697  0.000000e+00
B_TIME        -5.607906      0.203078   -27.614548  0.000000e+00

Estimation results for Model 2 - Specification 2 (Interaction with Travel Time):
                     Value  Rob. Std err  Rob. t-test  Rob. p-value
ASC_CYCLE        -3.796371      0.103459   -36.694392      0.000000
ASC_DRIVE        -1.307115      0.080222   -16.293646      0.000000
ASC_PT           -0.537913      0.054957    -9.787858  

In [None]:
# Retrieve log-likelihoods and number of parameters for Model_pref and Model 2 specifications
model_1_loglike = results.data.logLike      
model_1_numParam = len(results.get_beta_values())  

spec1_loglike = results_spec1.data.logLike       
spec1_numParam = len(results_spec1.get_beta_values())   

spec2_loglike = results_spec2.data.logLike       
spec2_numParam = len(results_spec2.get_beta_values())   

# Perform the likelihood ratio test for Model 1 vs Model 2 - Specification 1
lr_test_spec1 = likelihood_ratio_test(
    [model_1_loglike, model_1_numParam],
    [spec1_loglike, spec1_numParam],
    0.05  # Significance level of 0.01
)

# Display results for Model 0 vs Model 2 - Specification 1
print("Likelihood Ratio Test Results for Model 0 vs Model 2 - Specification 1:")
print(f" {lr_test_spec1[0]}")           # LR Statistic

# Perform the likelihood ratio test for Model 0 vs Model 2 - Specification 2
lr_test_spec2 = likelihood_ratio_test(
    [model_0_loglike, model_0_numParam],
    [spec2_loglike, spec2_numParam],
    0.05  
)

# Display results for Model 0 vs Model 2 - Specification 2
print("\nLikelihood Ratio Test Results for Model 0 vs Model 2 - Specification 2:")
print(f": {lr_test_spec2[0]}")           # LR Statistic




Likelihood Ratio Test Results for Model 0 vs Model 2 - Specification 1:
 H0 can be rejected at level 5.0%

Likelihood Ratio Test Results for Model 0 vs Model 2 - Specification 2:
: H0 can be rejected at level 5.0%
