## Preprocessing

### Imports

In [1]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from IPython.core.display_functions import display
from biogeme.expressions import Beta, Variable
from biogeme.models import loglogit, boxcox
from biogeme.segmentation import DiscreteSegmentationTuple, segmented_beta

### Load Data and Filter

Remove all those who shouldn't have access to cars but still chose it

In [2]:
data = pd.read_csv("lpmc01.dat", sep = '\t')

# Step 1: Identify whether each household has a driving license
data['household_has_license'] = data.groupby('household_id')['driving_license'].transform(lambda x: x.max())

# Step 2: Define a filter for the availability conditions not being met
car_availability_unmet = (data['car_ownership'] == 0) & (data['driving_license'] == 0) & (data['household_has_license'] == 0)

# Step 3: Identify rows where travel_mode is 4 (car) but the availability conditions are not met
car_without_availability = data[(data['travel_mode'] == 4) & car_availability_unmet]

# Step 4: Remove those rows from the original DataFrame
data_filtered = data[~data.index.isin(car_without_availability.index)]

database = db.Database('london', data_filtered)

## Variable Definition

In [3]:
trip_id = Variable('trip_id')
household_id = Variable('household_id')
person_n = Variable('person_n')
trip_n = Variable('trip_n')
travel_mode = Variable('travel_mode')
purpose = Variable('purpose')
fueltype = Variable('fueltype')
faretype = Variable('faretype')
bus_scale = Variable('bus_scale')
survey_year = Variable('survey_year')
travel_year = Variable('travel_year')
travel_month = Variable('travel_month')
travel_date = Variable('travel_date')
day_of_week = Variable('day_of_week')
start_time = Variable('start_time')
age = Variable('age')
female = Variable('female')
driving_license = Variable('driving_license')
car_ownership = Variable('car_ownership')
distance = Variable('distance')
dur_walking = Variable('dur_walking')
dur_cycling = Variable('dur_cycling')
dur_pt_access = Variable('dur_pt_access') # Predicted total access and egress time for public transport route in hours
dur_pt_rail = Variable('dur_pt_rail')
dur_pt_bus = Variable('dur_pt_bus')
dur_pt_int = Variable('dur_pt_int') # Time taken (hrs) at each interchange point
pt_interchanges = Variable('pt_interchanges')   # Number of interchange points in public transport route
dur_driving = Variable('dur_driving')
cost_transit = Variable('cost_transit')
cost_driving_fuel = Variable('cost_driving_fuel')   # Estimated fuel cost of driving route in GBP
cost_driving_ccharge = Variable('cost_driving_ccharge')  # Estimated congestion charge cost of driving route in GBP
driving_traffic_percent = Variable('driving_traffic_percent')

household_has_license = Variable('household_has_license')


In [4]:
# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
time_pt = dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int 

time_cycling = dur_cycling
time_walking = dur_walking
time_driving = dur_driving

## Model 0

Only generic ASCs and parameters

In [5]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)


In [6]:
# Define utility functions

v_car = asc_car + beta_cost * cost_driving + beta_time * time_driving

v_walk = asc_walk + beta_time * time_walking

v_pt = asc_pt + beta_cost * cost_transit + beta_time * time_pt

v_cycling = beta_time * time_cycling

In [7]:
# Availability

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

In [8]:
logprob_m0 = loglogit(V, av, travel_mode)

model_0 = bio.BIOGEME(database, logprob_m0)
model_0.modelName = 'model_0'

null_log_likelihood_m0 = model_0.calculate_null_loglikelihood(av)

results_m0 = model_0.estimate()

print(results_m0.print_general_statistics())

Number of estimated parameters:	5
Sample size:	4941
Excluded observations:	0
Null log likelihood:	-6610.329
Init log likelihood:	-6610.329
Final log likelihood:	-4007.051
Likelihood ratio test for the null model:	5206.557
Rho-square for the null model:	0.394
Rho-square-bar for the null model:	0.393
Likelihood ratio test for the init. model:	5206.557
Rho-square for the init. model:	0.394
Rho-square-bar for the init. model:	0.393
Akaike Information Criterion:	8024.101
Bayesian Information Criterion:	8056.628
Final gradient norm:	2.4196E-03
Nbr of threads:	16



In [9]:
pd_results_m0 = results_m0.get_estimated_parameters()
display(pd_results_m0)


Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,2.904088,0.091323,31.800297,0.0
asc_pt,3.355414,0.094523,35.498546,0.0
asc_walk,3.945274,0.112873,34.953192,0.0
beta_cost,-0.206632,0.015116,-13.669511,0.0
beta_time,-5.974888,0.227355,-26.280001,0.0


## Model 1: Time alternative specific

In [10]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_time_car = Beta(name='beta_time_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_pt = Beta(name='beta_time_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_walk = Beta(name='beta_time_walk', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_cycling = Beta(name='beta_time_cycling', value=0, lowerbound=None, upperbound=None, status=0)

# Define alternative specific parameters

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = asc_car + beta_cost * cost_driving + beta_time_car * time_driving

v_walk = asc_walk + beta_time_walk * time_walking

v_pt = asc_pt + beta_cost * cost_transit + beta_time_pt * time_pt

v_cycling = beta_time_cycling * time_cycling

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

logprob_m1 = loglogit(V, av, travel_mode)

model_1 = bio.BIOGEME(database, logprob_m1)
model_1.modelName = 'model_1'

null_log_likelihood_m1 = model_1.calculate_null_loglikelihood(av)

results_m1 = model_1.estimate()
print(results_m1.print_general_statistics())


Number of estimated parameters:	8
Sample size:	4941
Excluded observations:	0
Null log likelihood:	-6610.329
Init log likelihood:	-6610.329
Final log likelihood:	-3663.213
Likelihood ratio test for the null model:	5894.233
Rho-square for the null model:	0.446
Rho-square-bar for the null model:	0.445
Likelihood ratio test for the init. model:	5894.233
Rho-square for the init. model:	0.446
Rho-square-bar for the init. model:	0.445
Akaike Information Criterion:	7342.425
Bayesian Information Criterion:	7394.468
Final gradient norm:	6.3734E-03
Nbr of threads:	16



In [11]:
pd_results_m1 = results_m1.get_estimated_parameters()
display(pd_results_m1)


Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,2.873129,0.149264,19.248647,0.0
asc_pt,1.983868,0.152605,13.000034,0.0
asc_walk,4.639939,0.204647,22.672896,0.0
beta_cost,-0.191306,0.018074,-10.584411,0.0
beta_time_car,-6.689821,0.392282,-17.05359,0.0
beta_time_cycling,-6.543603,0.512912,-12.757758,0.0
beta_time_pt,-3.462775,0.264295,-13.10194,0.0
beta_time_walk,-9.155925,0.469862,-19.486417,0.0


### Comparison between Model 0 and Model 1

In [12]:
loglikehood_m0 = results_m0.data.logLike
num_params_m0 = results_m0.data.nparam

loglikehood_m1 = results_m1.data.logLike
num_params_m1 = results_m1.data.nparam

# Calculate the LR statistic
LR = 2 * (loglikehood_m1 - loglikehood_m0)

# Degrees of freedom
df = num_params_m1 - num_params_m0

from scipy.stats import chi2

# Critical value at 0.05 significance level
critical_value = chi2.ppf(0.95, df)

print("Likelihood Ratio:", LR)
print("Degrees of Freedom:", df)
print("Critical Chi-Square Value (0.05 significance):", critical_value)

if LR > critical_value:
    print("Model 1 is significantly better than Model 0.")
else:
    print("No significant improvement in Model 1 over Model 0.")



Likelihood Ratio: 687.6759050711189
Degrees of Freedom: 3
Critical Chi-Square Value (0.05 significance): 7.814727903251179
Model 1 is significantly better than Model 0.


We find that Model 1 is better than Model 0 based on the likelihood test

## Model 2: Socio-economic interactions (Sex)

In [None]:
# Time segmentation
sex_segmentation = DiscreteSegmentationTuple(
    variable=female, mapping={0: 'other', 1: 'female'}
)

### Model 2A: Sex interaction with Time

In [None]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)

# Define alternative specific parameters

beta_time_car = Beta(name='beta_time_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_pt = Beta(name='beta_time_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_walk = Beta(name='beta_time_walk', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_cycling = Beta(name='beta_time_cycling', value=0, lowerbound=None, upperbound=None, status=0)

segmented_B_time_car = segmented_beta(beta_time_car, [sex_segmentation])
segmented_B_time_pt = segmented_beta(beta_time_pt, [sex_segmentation])
segmented_B_time_walk = segmented_beta(beta_time_walk, [sex_segmentation])
segmented_B_time_cycling = segmented_beta(beta_time_cycling, [sex_segmentation])


# Define alternative specific parameters

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = asc_car + beta_cost * cost_driving + beta_time_car * time_driving

v_walk = asc_walk + beta_time_walk * time_walking

v_pt = asc_pt + beta_cost * cost_transit + beta_time_pt * time_pt

v_cycling = beta_time_cycling * time_cycling

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

In [None]:
logprob_m2a = loglogit(V, av, travel_mode)

model_2a = bio.BIOGEME(database, logprob_m2a)
model_2a.modelName = 'model_2a'

null_log_likelihood_m2a = model_2a.calculate_null_loglikelihood(av)

results_m2a = model_2a.estimate()
print(results_m2a.print_general_statistics())


Number of estimated parameters:	7
Sample size:	4941
Excluded observations:	0
Null log likelihood:	-6610.329
Init log likelihood:	-6610.329
Final log likelihood:	-3943.101
Likelihood ratio test for the null model:	5334.455
Rho-square for the null model:	0.403
Rho-square-bar for the null model:	0.402
Likelihood ratio test for the init. model:	5334.455
Rho-square for the init. model:	0.403
Rho-square-bar for the init. model:	0.402
Akaike Information Criterion:	7900.203
Bayesian Information Criterion:	7945.74
Final gradient norm:	3.8666E-03
Nbr of threads:	16



In [None]:
pd_results_m2a = results_m2a.get_estimated_parameters()
display(pd_results_m2a)

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,2.943807,0.089851,32.763102,0.0
asc_pt,2.938448,0.10132,29.001769,0.0
asc_walk,3.856007,0.113037,34.112845,0.0
beta_cost_car,-0.265288,0.026525,-10.001362,0.0
beta_cost_pt,0.050593,0.030476,1.660084,0.096897
beta_time,-5.987109,0.277836,-21.549062,0.0
beta_time_female,0.097284,0.26277,0.370225,0.711215


### Model 2B: Sex interaction with all ASCs

In [None]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)

# Define alternative specific parameters

beta_cost_car = Beta(name='beta_cost_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_cost_pt = Beta(name='beta_cost_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Segmented Walk ASC

segmented_ASC_walk = segmented_beta(asc_walk, [sex_segmentation])
segmented_ASC_car = segmented_beta(asc_car, [sex_segmentation])
segmented_ASC_pt = segmented_beta(asc_pt, [sex_segmentation])
ASC_cycling_female = Beta(name='asc_cycling_female', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

# v_car = segmented_ASC_car + beta_cost_car * cost_driving + beta_time * time_driving
# v_walk = segmented_ASC_walk + beta_time * time_walking
# v_pt = segmented_ASC_pt + beta_cost_pt * cost_transit + beta_time * time_pt


v_car = asc_car + beta_cost_car * cost_driving + beta_time * time_driving
v_walk = asc_walk + beta_time * time_walking
v_pt = asc_pt + beta_cost_pt * cost_transit + beta_time * time_pt

v_cycling = ASC_cycling_female * (female == 1) + beta_time * time_cycling

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

In [None]:
logprob_m2b = loglogit(V, av, travel_mode)

model_2b = bio.BIOGEME(database, logprob_m2b)
model_2b.modelName = 'model_2b'

null_log_likelihood_m2b = model_2b.calculate_null_loglikelihood(av)

results_m2b = model_2b.estimate()
print(results_m2b.print_general_statistics())


Number of estimated parameters:	7
Sample size:	4941
Excluded observations:	0
Null log likelihood:	-6610.329
Init log likelihood:	-6610.329
Final log likelihood:	-3912.876
Likelihood ratio test for the null model:	5394.906
Rho-square for the null model:	0.408
Rho-square-bar for the null model:	0.407
Likelihood ratio test for the init. model:	5394.906
Rho-square for the init. model:	0.408
Rho-square-bar for the init. model:	0.407
Akaike Information Criterion:	7839.752
Bayesian Information Criterion:	7885.289
Final gradient norm:	3.6695E-03
Nbr of threads:	16



In [None]:
pd_results_m2b = results_m2b.get_estimated_parameters()
display(pd_results_m2b)

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,2.406768,0.104246,23.087389,0.0
asc_cycling_female,-1.334305,0.188679,-7.071818,1.529221e-12
asc_pt,2.404008,0.113327,21.213038,0.0
asc_walk,3.328543,0.122239,27.229691,0.0
beta_cost_car,-0.263671,0.026386,-9.99266,0.0
beta_cost_pt,0.053245,0.030315,1.756423,0.07901614
beta_time,-5.969256,0.232429,-25.682077,0.0


### Compare Model 2b with Model 1

In [None]:
loglikehood_m2b = results_m2b.data.logLike
num_params_m2b = results_m2b.data.nparam

# Calculate the LR statistic
LR = 2 * (loglikehood_m2b - loglikehood_m1)

# Degrees of freedom
df = num_params_m2b - num_params_m1

from scipy.stats import chi2

# Critical value at 0.05 significance level
critical_value = chi2.ppf(0.95, df)

print("Likelihood Ratio:", LR)
print("Degrees of Freedom:", df)
print("Critical Chi-Square Value (0.05 significance):", critical_value)

if LR > critical_value:
    print("Model 2b is significantly better than Model 1.")
else:
    print("No significant improvement in Model 2b over Model 1.")



Likelihood Ratio: 60.60571605403584
Degrees of Freedom: 1
Critical Chi-Square Value (0.05 significance): 3.841458820694124
Model 2b is significantly better than Model 1.


### Compare Segmenting all ASCs vs just Cycling

In [None]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)

# Define alternative specific parameters

beta_cost_car = Beta(name='beta_cost_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_cost_pt = Beta(name='beta_cost_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Segmented Walk ASC

segmented_ASC_walk = segmented_beta(asc_walk, [sex_segmentation])
segmented_ASC_car = segmented_beta(asc_car, [sex_segmentation])
segmented_ASC_pt = segmented_beta(asc_pt, [sex_segmentation])
ASC_cycling_female = Beta(name='asc_cycling_female', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = segmented_ASC_car + beta_cost_car * cost_driving + beta_time * time_driving
v_walk = segmented_ASC_walk + beta_time * time_walking
v_pt = segmented_ASC_pt + beta_cost_pt * cost_transit + beta_time * time_pt

v_cycling = ASC_cycling_female * (female == 1) + beta_time * time_cycling

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

In [None]:
logprob_m2b_all = loglogit(V, av, travel_mode)

model_2b_all = bio.BIOGEME(database, logprob_m2b_all)
model_2b_all.modelName = 'model_2b_all'

null_log_likelihood_m2b_all = model_2b_all.calculate_null_loglikelihood(av)

results_m2b_all = model_2b_all.estimate()
print(results_m2b_all.print_general_statistics())


Number of estimated parameters:	10
Sample size:	4941
Excluded observations:	0
Null log likelihood:	-6610.329
Init log likelihood:	-6610.329
Final log likelihood:	-3910.55
Likelihood ratio test for the null model:	5399.559
Rho-square for the null model:	0.408
Rho-square-bar for the null model:	0.407
Likelihood ratio test for the init. model:	5399.559
Rho-square for the init. model:	0.408
Rho-square-bar for the init. model:	0.407
Akaike Information Criterion:	7841.099
Bayesian Information Criterion:	7906.153
Final gradient norm:	4.6180E-03
Nbr of threads:	16



In [None]:
loglikehood_m2b_all = results_m2b_all.data.logLike
num_params_m2b_all = results_m2b_all.data.nparam

# Calculate the LR statistic
LR = 2 * (loglikehood_m2b_all - loglikehood_m2b)

# Degrees of freedom
df = num_params_m2b_all - num_params_m2b

from scipy.stats import chi2

# Critical value at 0.05 significance level
critical_value = chi2.ppf(0.95, df)

print("Likelihood Ratio:", LR)
print("Degrees of Freedom:", df)
print("Critical Chi-Square Value (0.05 significance):", critical_value)

if LR > critical_value:
    print("Model 2b_all is significantly better than Model 2b.")
else:
    print("No significant improvement in Model 2b_all over Model 2b.")



Likelihood Ratio: 4.652473069847474
Degrees of Freedom: 3
Critical Chi-Square Value (0.05 significance): 7.814727903251179
No significant improvement in Model 2b_all over Model 2b.


## Model 3: Non-linear specification

In [None]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)
LAMBDA = Beta('LAMBDA', 0, None, None, 0)

# Define alternative specific parameters

beta_cost_car = Beta(name='beta_cost_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_cost_pt = Beta(name='beta_cost_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Segmented Walk ASC

ASC_cycling_female = Beta(name='asc_cycling_female', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = asc_car + beta_cost_car * cost_driving + beta_time * boxcox(time_driving,LAMBDA)
v_walk = asc_walk + beta_time * boxcox(time_walking,LAMBDA)
v_pt = asc_pt + beta_cost_pt * cost_transit + beta_time * boxcox(time_pt,LAMBDA)

v_cycling = ASC_cycling_female * (female == 1) + beta_time * boxcox(time_cycling,LAMBDA)

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)


In [None]:
logprob_m3 = loglogit(V, av, travel_mode)

model_3 = bio.BIOGEME(database, logprob_m3)
model_3.modelName = 'model_3'

null_log_likelihood_m3 = model_3.calculate_null_loglikelihood(av)

results_m3 = model_3.estimate()
print(results_m3.print_general_statistics())


Number of estimated parameters:	8
Sample size:	4941
Excluded observations:	0
Null log likelihood:	-6610.329
Init log likelihood:	-6610.329
Final log likelihood:	-3822.292
Likelihood ratio test for the null model:	5576.074
Rho-square for the null model:	0.422
Rho-square-bar for the null model:	0.421
Likelihood ratio test for the init. model:	5576.074
Rho-square for the init. model:	0.422
Rho-square-bar for the init. model:	0.421
Akaike Information Criterion:	7660.584
Bayesian Information Criterion:	7712.626
Final gradient norm:	1.0293E-01
Nbr of threads:	16



In [None]:
pd_results_m3 = results_m3.get_estimated_parameters()
display(pd_results_m3)

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
LAMBDA,0.480279,0.023742,20.228786,0.0
asc_car,2.137815,0.104259,20.504826,0.0
asc_cycling_female,-1.358128,0.187009,-7.262372,3.803624e-13
asc_pt,3.029652,0.123595,24.512744,0.0
asc_walk,4.407632,0.14712,29.959525,0.0
beta_cost_car,-0.196301,0.021652,-9.066256,0.0
beta_cost_pt,-0.038949,0.029644,-1.313875,0.1888884
beta_time,-5.384872,0.165841,-32.470084,0.0


### Compare Model 3 with Model 2   

#### Model C

In [None]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_lambda = Beta(name='beta_time_lambda', value=0, lowerbound=None, upperbound=None, status=0)
LAMBDA = Beta('LAMBDA', 0, None, None, 0)

# Define alternative specific parameters

beta_cost_car = Beta(name='beta_cost_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_cost_pt = Beta(name='beta_cost_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Segmented Walk ASC

ASC_cycling_female = Beta(name='asc_cycling_female', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = asc_car + beta_cost_car * cost_driving + beta_time_lambda * boxcox(time_driving,LAMBDA) + beta_time * time_driving
v_walk = asc_walk + beta_time_lambda * boxcox(time_walking,LAMBDA) + beta_time * time_walking
v_pt = asc_pt + beta_cost_pt * cost_transit + beta_time_lambda * boxcox(time_pt,LAMBDA) + beta_time * time_pt

v_cycling = ASC_cycling_female * (female == 1) + beta_time_lambda * boxcox(time_cycling,LAMBDA) + beta_time * time_cycling

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)


In [None]:
logprob_mC = loglogit(V, av, travel_mode)

model_C = bio.BIOGEME(database, logprob_mC)
model_C.modelName = 'model_C'

null_log_likelihood_mC = model_C.calculate_null_loglikelihood(av)

results_mC = model_C.estimate()
print(results_mC.print_general_statistics())


Number of estimated parameters:	9
Sample size:	4941
Excluded observations:	0
Null log likelihood:	-6610.329
Init log likelihood:	-6610.329
Final log likelihood:	-3820.209
Likelihood ratio test for the null model:	5580.24
Rho-square for the null model:	0.422
Rho-square-bar for the null model:	0.421
Likelihood ratio test for the init. model:	5580.24
Rho-square for the init. model:	0.422
Rho-square-bar for the init. model:	0.421
Akaike Information Criterion:	7658.418
Bayesian Information Criterion:	7716.966
Final gradient norm:	1.9448E-01
Nbr of threads:	16



#### Test C vs 2b

In [None]:
loglikehood_mC = results_mC.data.logLike
num_params_mC = results_mC.data.nparam

# Calculate the LR statistic
LR = 2 * (loglikehood_mC - loglikehood_m2b)

# Degrees of freedom
df = num_params_mC - num_params_m2b

from scipy.stats import chi2

# Critical value at 0.05 significance level
critical_value = chi2.ppf(0.99, df)

print("Likelihood Ratio:", LR)
print("Degrees of Freedom:", df)
print("Critical Chi-Square Value (0.05 significance):", critical_value)

if LR > critical_value:
    print("Model C is significantly better than Model 2b.")
else:
    print("No significant improvement in Model C over Model 2b.")



Likelihood Ratio: 185.33375483536565
Degrees of Freedom: 2
Critical Chi-Square Value (0.05 significance): 9.21034037197618
Model C is significantly better than Model 2b.


#### Test C vs 3

In [None]:
loglikehood_m3 = results_m3.data.logLike
num_params_m3 = results_m3.data.nparam

# Calculate the LR statistic
LR = 2 * (loglikehood_mC - loglikehood_m3)

# Degrees of freedom
df = num_params_mC - num_params_m3

from scipy.stats import chi2

# Critical value at 0.01 significance level
critical_value = chi2.ppf(0.99, df)

print("Likelihood Ratio:", LR)
print("Degrees of Freedom:", df)
print("Critical Chi-Square Value (0.01 significance):", critical_value)

if LR > critical_value:
    print("Model C is significantly better than Model 3.")
else:
    print("No significant improvement in Model C over Model 3.")



Likelihood Ratio: 4.165636125654601
Degrees of Freedom: 1
Critical Chi-Square Value (0.01 significance): 6.6348966010212145
No significant improvement in Model C over Model 3.


## Model 4: Nested or Cross-Nested

In [None]:
from biogeme.models import loglogit, lognested
from biogeme.nests import OneNestForNestedLogit, NestsForNestedLogit

# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)
LAMBDA = Beta('LAMBDA', 0, None, None, 0)

# Define alternative specific parameters

beta_cost_car = Beta(name='beta_cost_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_cost_pt = Beta(name='beta_cost_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Segmented Walk ASC

ASC_cycling_female = Beta(name='asc_cycling_female', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = asc_car + beta_cost_car * cost_driving + beta_time * boxcox(time_driving,LAMBDA)
v_walk = asc_walk + beta_time * boxcox(time_walking,LAMBDA)
v_pt = asc_pt + beta_cost_pt * cost_transit + beta_time * boxcox(time_pt,LAMBDA)

v_cycling = ASC_cycling_female * (female == 1) + beta_time * boxcox(time_cycling,LAMBDA)

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)


In [None]:
mu_a = Beta('mu_a', 1, 0, None, 0)
mu_b = Beta('mu_b', 1, 0, None, 0)
nest_a = OneNestForNestedLogit(nest_param=mu_a, list_of_alternatives=[1, 2], name='slow modes')
nest_b = OneNestForNestedLogit(nest_param=mu_b, list_of_alternatives=[3, 4], name='faster modes')
nests = NestsForNestedLogit(choice_set=list(V), tuple_of_nests=(nest_a, nest_b))

logprob_m4 = lognested(V, av, nests, travel_mode)


# logprob_m3 = loglogit(V, av, travel_mode)

model_4 = bio.BIOGEME(database, logprob_m4)
model_4.modelName = 'model_4'

null_log_likelihood_m4 = model_4.calculate_null_loglikelihood(av)

results_m4 = model_4.estimate()
print(results_m4.print_general_statistics())


The following expression may potentially be ambiguous: [((car_ownership >= `0.0`) != `0.0`)] if it contains the chaining of two comparisons expressions. Keep in mind that, for Biogeme (like for Pandas), the expression (a <= x <= b) is not equivalent to (a <= x) and (x <= b).
The following expression may potentially be ambiguous: [((car_ownership >= `0.0`) != `0.0`)] if it contains the chaining of two comparisons expressions. Keep in mind that, for Biogeme (like for Pandas), the expression (a <= x <= b) is not equivalent to (a <= x) and (x <= b).
The following expression may potentially be ambiguous: [((car_ownership >= `0.0`) != `0.0`)] if it contains the chaining of two comparisons expressions. Keep in mind that, for Biogeme (like for Pandas), the expression (a <= x <= b) is not equivalent to (a <= x) and (x <= b).
The following expression may potentially be ambiguous: [((car_ownership >= `0.0`) != `0.0`)] if it contains the chaining of two comparisons expressions. Keep in mind that, 

Number of estimated parameters:	10
Sample size:	4941
Excluded observations:	0
Null log likelihood:	-6610.329
Init log likelihood:	-6610.329
Final log likelihood:	-3807.078
Likelihood ratio test for the null model:	5606.502
Rho-square for the null model:	0.424
Rho-square-bar for the null model:	0.423
Likelihood ratio test for the init. model:	5606.502
Rho-square for the init. model:	0.424
Rho-square-bar for the init. model:	0.423
Akaike Information Criterion:	7634.156
Bayesian Information Criterion:	7699.209
Final gradient norm:	7.6265E-02
Nbr of threads:	16



In [None]:
pd_results_m4 = results_m4.get_estimated_parameters()
display(pd_results_m4)

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
LAMBDA,0.509533,0.026464,19.25371,0.0
asc_car,2.218673,0.165932,13.370986,0.0
asc_cycling_female,-1.507425,0.234679,-6.42336,1.332985e-10
asc_pt,3.19196,0.191175,16.696573,0.0
asc_walk,4.919867,0.232887,21.125571,0.0
beta_cost_car,-0.261827,0.030239,-8.658677,0.0
beta_cost_pt,-0.070793,0.036535,-1.937687,0.05266144
beta_time,-6.336354,0.285157,-22.220551,0.0
mu_a,0.904861,0.082078,11.024372,0.0
mu_b,0.765282,0.036324,21.068066,0.0
