## Preprocessing

### Imports

In [1]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from IPython.core.display_functions import display
from biogeme.expressions import Beta, Variable
from biogeme.models import loglogit, boxcox
from biogeme.segmentation import DiscreteSegmentationTuple, segmented_beta

### Load Data and Filter

Remove all those who shouldn't have access to cars but still chose it

In [2]:
data = pd.read_csv("lpmc01.dat", sep = '\t')

# # Step 1: Identify whether each household has a driving license
# data['household_has_license'] = data.groupby('household_id')['driving_license'].transform(lambda x: x.max())

# # Step 2: Define a filter for the availability conditions not being met
# car_availability_unmet = (data['car_ownership'] == 0) & (data['driving_license'] == 0) & (data['household_has_license'] == 0)

# # Step 3: Identify rows where travel_mode is 4 (car) but the availability conditions are not met
# car_without_availability = data[(data['travel_mode'] == 4) & car_availability_unmet]

# # Step 4: Remove those rows from the original DataFrame
# data_filtered = data[~data.index.isin(car_without_availability.index)]
data_filtered = data

database = db.Database('london', data_filtered)

## Variable Definition

In [3]:
trip_id = Variable('trip_id')
household_id = Variable('household_id')
person_n = Variable('person_n')
trip_n = Variable('trip_n')
travel_mode = Variable('travel_mode')
purpose = Variable('purpose')
fueltype = Variable('fueltype')
faretype = Variable('faretype')
bus_scale = Variable('bus_scale')
survey_year = Variable('survey_year')
travel_year = Variable('travel_year')
travel_month = Variable('travel_month')
travel_date = Variable('travel_date')
day_of_week = Variable('day_of_week')
start_time = Variable('start_time')
age = Variable('age')
female = Variable('female')
driving_license = Variable('driving_license')
car_ownership = Variable('car_ownership')
distance = Variable('distance')
dur_walking = Variable('dur_walking')
dur_cycling = Variable('dur_cycling')
dur_pt_access = Variable('dur_pt_access') # Predicted total access and egress time for public transport route in hours
dur_pt_rail = Variable('dur_pt_rail')
dur_pt_bus = Variable('dur_pt_bus')
dur_pt_int = Variable('dur_pt_int') # Time taken (hrs) at each interchange point
pt_interchanges = Variable('pt_interchanges')   # Number of interchange points in public transport route
dur_driving = Variable('dur_driving')
cost_transit = Variable('cost_transit')
cost_driving_fuel = Variable('cost_driving_fuel')   # Estimated fuel cost of driving route in GBP
cost_driving_ccharge = Variable('cost_driving_ccharge')  # Estimated congestion charge cost of driving route in GBP
driving_traffic_percent = Variable('driving_traffic_percent')

household_has_license = Variable('household_has_license')


In [4]:
# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
time_pt = dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int 

time_cycling = dur_cycling
time_walking = dur_walking
time_driving = dur_driving

## Model 0

Only generic ASCs and parameters

In [5]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)


In [6]:
# Define utility functions

v_car = asc_car + beta_cost * cost_driving + beta_time * time_driving

v_walk = asc_walk + beta_time * time_walking

v_pt = asc_pt + beta_cost * cost_transit + beta_time * time_pt

v_cycling = beta_time * time_cycling

In [7]:
# Availability

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership >= 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

In [8]:
logprob_m0 = loglogit(V, av, travel_mode)

model_0 = bio.BIOGEME(database, logprob_m0)
model_0.modelName = 'model_0'

null_log_likelihood_m0 = model_0.calculate_null_loglikelihood(av)

results_m0 = model_0.estimate()

print(results_m0.print_general_statistics())

Number of estimated parameters:	5
Sample size:	5000
Excluded observations:	0
Null log likelihood:	-6931.472
Init log likelihood:	-4650.614
Final log likelihood:	-4581.896
Likelihood ratio test for the null model:	4699.152
Rho-square for the null model:	0.339
Rho-square-bar for the null model:	0.338
Likelihood ratio test for the init. model:	137.4367
Rho-square for the init. model:	0.0148
Rho-square-bar for the init. model:	0.0137
Akaike Information Criterion:	9173.791
Bayesian Information Criterion:	9206.377
Final gradient norm:	1.0870E-04
Nbr of threads:	16



In [9]:
pd_results_m0 = results_m0.get_estimated_parameters()
display(pd_results_m0)


Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,2.490171,0.085858,29.003358,0.0
asc_pt,3.252517,0.088852,36.605993,0.0
asc_walk,3.778178,0.103171,36.620556,0.0
beta_cost,-0.190833,0.014635,-13.039906,0.0
beta_time,-5.598792,0.203408,-27.524872,0.0


## Model 1: Time alternative specific

In [None]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_time_car = Beta(name='beta_time_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_pt = Beta(name='beta_time_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_walk = Beta(name='beta_time_walk', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_cycling = Beta(name='beta_time_cycling', value=0, lowerbound=None, upperbound=None, status=0)

# Define alternative specific parameters

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = asc_car + beta_cost * cost_driving + beta_time_car * time_driving

v_walk = asc_walk + beta_time_walk * time_walking

v_pt = asc_pt + beta_cost * cost_transit + beta_time_pt * time_pt

v_cycling = beta_time_cycling * time_cycling

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership >= 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

logprob_m1 = loglogit(V, av, travel_mode)

model_1 = bio.BIOGEME(database, logprob_m1)
model_1.modelName = 'model_1'

null_log_likelihood_m1 = model_1.calculate_null_loglikelihood(av)

results_m1 = model_1.estimate()
print(results_m1.print_general_statistics())


KeyError: 'household_has_license'

In [None]:
pd_results_m1 = results_m1.get_estimated_parameters()
display(pd_results_m1)


Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,2.873129,0.149264,19.248647,0.0
asc_pt,1.983868,0.152605,13.000034,0.0
asc_walk,4.639939,0.204647,22.672896,0.0
beta_cost,-0.191306,0.018074,-10.584411,0.0
beta_time_car,-6.689821,0.392282,-17.05359,0.0
beta_time_cycling,-6.543603,0.512912,-12.757758,0.0
beta_time_pt,-3.462775,0.264295,-13.10194,0.0
beta_time_walk,-9.155925,0.469862,-19.486417,0.0


### Comparison between Model 0 and Model 1

In [None]:
loglikehood_m0 = results_m0.data.logLike
num_params_m0 = results_m0.data.nparam

loglikehood_m1 = results_m1.data.logLike
num_params_m1 = results_m1.data.nparam

# Calculate the LR statistic
LR = 2 * (loglikehood_m1 - loglikehood_m0)

# Degrees of freedom
df = num_params_m1 - num_params_m0

from scipy.stats import chi2

# Critical value at 0.05 significance level
critical_value = chi2.ppf(0.95, df)

print("Likelihood Ratio:", LR)
print("Degrees of Freedom:", df)
print("Critical Chi-Square Value (0.05 significance):", critical_value)

if LR > critical_value:
    print("Model 1 is significantly better than Model 0.")
else:
    print("No significant improvement in Model 1 over Model 0.")



Likelihood Ratio: nan
Degrees of Freedom: 3
Critical Chi-Square Value (0.05 significance): 7.814727903251179
No significant improvement in Model 1 over Model 0.


We find that Model 1 is better than Model 0 based on the likelihood test

## Model 2: Socio-economic interactions (Sex)

In [None]:
# Time segmentation
sex_segmentation = DiscreteSegmentationTuple(
    variable=female, mapping={0: 'other', 1: 'female'}
)

### Model 2A: Sex interaction with Time

In [None]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)

# Define alternative specific parameters

beta_time_car = Beta(name='beta_time_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_pt = Beta(name='beta_time_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_walk = Beta(name='beta_time_walk', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_cycling = Beta(name='beta_time_cycling', value=0, lowerbound=None, upperbound=None, status=0)

segmented_B_time_car = segmented_beta(beta_time_car, [sex_segmentation])
segmented_B_time_walk = segmented_beta(beta_time_walk, [sex_segmentation])
segmented_B_time_cycling = segmented_beta(beta_time_cycling, [sex_segmentation])


# Define alternative specific parameters

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = asc_car + beta_cost * cost_driving + segmented_B_time_car * time_driving

v_walk = asc_walk + segmented_B_time_walk * time_walking

v_pt = asc_pt + beta_cost * cost_transit + beta_time_pt * time_pt

v_cycling = segmented_B_time_cycling * time_cycling

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

In [None]:
logprob_m2a = loglogit(V, av, travel_mode)

model_2a = bio.BIOGEME(database, logprob_m2a)
model_2a.modelName = 'model_2a'

null_log_likelihood_m2a = model_2a.calculate_null_loglikelihood(av)

results_m2a = model_2a.estimate()
print(results_m2a.print_general_statistics())


The chosen alternative [travel_mode] is not available for the following observations (rownumber[choice]): 44[4.0]-112[4.0]-168[4.0]-227[4.0]-280[4.0]-539[4.0]-646[4.0]-730[4.0]-790[4.0]-813[4.0]-874[4.0]-88...


Number of estimated parameters:	11
Sample size:	5000
Excluded observations:	0
Null log likelihood:	-6675.147
Init log likelihood:	-inf
Final log likelihood:	-inf
Likelihood ratio test for the null model:	-inf
Rho-square for the null model:	-1.8e+308
Rho-square-bar for the null model:	-1.8e+308
Likelihood ratio test for the init. model:	nan
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	0
Akaike Information Criterion:	inf
Bayesian Information Criterion:	inf
Final gradient norm:	1.1699E-02
Nbr of threads:	16



In [None]:
pd_results_m2a = results_m2a.get_estimated_parameters()
display(pd_results_m2a)

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,2.687503,0.16927,15.877003,0.0
asc_pt,1.781266,0.172984,10.297302,0.0
asc_walk,4.459032,0.217132,20.536088,0.0
beta_cost,-0.190481,0.018069,-10.542054,0.0
beta_time_car,-6.516034,0.399651,-16.304308,0.0
beta_time_car_female,-0.951818,0.253896,-3.748845,0.000178
beta_time_cycling,-5.944615,0.463687,-12.820317,0.0
beta_time_cycling_female,-3.898006,0.95295,-4.090461,4.3e-05
beta_time_pt,-3.600084,0.272779,-13.197786,0.0
beta_time_walk,-8.927291,0.486317,-18.356949,0.0


In [None]:
loglikehood_m2a = results_m2a.data.logLike
num_params_m2a = results_m2a.data.nparam

# Calculate the LR statistic
LR = 2 * (loglikehood_m2a - loglikehood_m1)

# Degrees of freedom
df = num_params_m2a - num_params_m1

from scipy.stats import chi2

# Critical value at 0.05 significance level
critical_value = chi2.ppf(0.95, df)

print("Likelihood Ratio:", LR)
print("Degrees of Freedom:", df)
print("Critical Chi-Square Value (0.05 significance):", critical_value)

if LR > critical_value:
    print("Model 2a is significantly better than Model 1.")
else:
    print("No significant improvement in Model 2a over Model 1.")



Likelihood Ratio: nan
Degrees of Freedom: 3
Critical Chi-Square Value (0.05 significance): 7.814727903251179
No significant improvement in Model 2a over Model 1.


### Model 2B: Sex interaction with all ASCs

In [None]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)

# Define alternative specific parameters

beta_time_car = Beta(name='beta_time_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_pt = Beta(name='beta_time_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_walk = Beta(name='beta_time_walk', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_cycling = Beta(name='beta_time_cycling', value=0, lowerbound=None, upperbound=None, status=0)

# Define alternative specific parameters

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)

ASC_cycling_female = Beta(name='asc_cycling_female', value=0, lowerbound=None, upperbound=None, status=0)


# Define utility functions

v_car = asc_car + beta_cost * cost_driving + beta_time_car * time_driving

v_walk = asc_walk + beta_time_walk * time_walking

v_pt = asc_pt + beta_cost * cost_transit + beta_time_pt * time_pt

v_cycling = ASC_cycling_female * (female == 1) + beta_time_cycling * time_cycling


# Define utility functions

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

In [None]:
logprob_m2b = loglogit(V, av, travel_mode)

model_2b = bio.BIOGEME(database, logprob_m2b)
model_2b.modelName = 'model_2b'

null_log_likelihood_m2b = model_2b.calculate_null_loglikelihood(av)

results_m2b = model_2b.estimate()
print(results_m2b.print_general_statistics())


The chosen alternative [travel_mode] is not available for the following observations (rownumber[choice]): 44[4.0]-112[4.0]-168[4.0]-227[4.0]-280[4.0]-539[4.0]-646[4.0]-730[4.0]-790[4.0]-813[4.0]-874[4.0]-88...


Number of estimated parameters:	9
Sample size:	5000
Excluded observations:	0
Null log likelihood:	-6675.147
Init log likelihood:	-inf
Final log likelihood:	-inf
Likelihood ratio test for the null model:	-inf
Rho-square for the null model:	-1.8e+308
Rho-square-bar for the null model:	-1.8e+308
Likelihood ratio test for the init. model:	nan
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	0
Akaike Information Criterion:	inf
Bayesian Information Criterion:	inf
Final gradient norm:	1.3941E-01
Nbr of threads:	16



In [None]:
pd_results_m2b = results_m2b.get_estimated_parameters()
display(pd_results_m2b)

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,2.274583,0.161368,14.095635,0.0
asc_cycling_female,-1.352205,0.185643,-7.283915,3.241851e-13
asc_pt,1.385823,0.164287,8.435367,0.0
asc_walk,4.043483,0.212592,19.019948,0.0
beta_cost,-0.190871,0.018124,-10.531572,0.0
beta_time_car,-6.713876,0.393908,-17.044291,0.0
beta_time_cycling,-6.752828,0.511128,-13.211616,0.0
beta_time_pt,-3.481605,0.265758,-13.100666,0.0
beta_time_walk,-9.170549,0.471181,-19.462909,0.0


### Compare Model 2b with Model 1

In [None]:
loglikehood_m2b = results_m2b.data.logLike
num_params_m2b = results_m2b.data.nparam

# Calculate the LR statistic
LR = 2 * (loglikehood_m2b - loglikehood_m1)

# Degrees of freedom
df = num_params_m2b - num_params_m1

from scipy.stats import chi2

# Critical value at 0.05 significance level
critical_value = chi2.ppf(0.95, df)

print("Likelihood Ratio:", LR)
print("Degrees of Freedom:", df)
print("Critical Chi-Square Value (0.05 significance):", critical_value)

if LR > critical_value:
    print("Model 2b is significantly better than Model 1.")
else:
    print("No significant improvement in Model 2b over Model 1.")



Likelihood Ratio: nan
Degrees of Freedom: 1
Critical Chi-Square Value (0.05 significance): 3.841458820694124
No significant improvement in Model 2b over Model 1.


## Model 3: Non-linear specification

In [None]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters
beta_time_car = Beta(name='beta_time_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_pt = Beta(name='beta_time_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_walk = Beta(name='beta_time_walk', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_cycling = Beta(name='beta_time_cycling', value=0, lowerbound=None, upperbound=None, status=0)

# Define alternative specific parameters

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)

LAMBDA = Beta('LAMBDA', 0, None, None, 0)

# Segmented Walk ASC

ASC_cycling_female = Beta(name='asc_cycling_female', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = asc_car + beta_cost * cost_driving + beta_time_car * boxcox(time_driving,LAMBDA)
v_walk = asc_walk + beta_time_walk * boxcox(time_walking,LAMBDA)
v_pt = asc_pt + beta_cost * cost_transit + beta_time_pt * boxcox(time_pt,LAMBDA)

v_cycling = ASC_cycling_female * (female == 1) + beta_time_cycling * boxcox(time_cycling,LAMBDA)

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership >= 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)


In [None]:
logprob_m3 = loglogit(V, av, travel_mode)

model_3 = bio.BIOGEME(database, logprob_m3)
model_3.modelName = 'model_3'

null_log_likelihood_m3 = model_3.calculate_null_loglikelihood(av)

results_m3 = model_3.estimate()
print(results_m3.print_general_statistics())


The chosen alternative [travel_mode] is not available for the following observations (rownumber[choice]): 44[4.0]-112[4.0]-168[4.0]-227[4.0]-280[4.0]-539[4.0]-646[4.0]-730[4.0]-790[4.0]-813[4.0]-874[4.0]-88...


Number of estimated parameters:	10
Sample size:	5000
Excluded observations:	0
Null log likelihood:	-6675.147
Init log likelihood:	-inf
Final log likelihood:	-inf
Likelihood ratio test for the null model:	-inf
Rho-square for the null model:	-1.8e+308
Rho-square-bar for the null model:	-1.8e+308
Likelihood ratio test for the init. model:	nan
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	0
Akaike Information Criterion:	inf
Bayesian Information Criterion:	inf
Final gradient norm:	1.7361E-01
Nbr of threads:	16



In [None]:
pd_results_m3 = results_m3.get_estimated_parameters()
display(pd_results_m3)

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
LAMBDA,0.326846,0.051859,6.302557,2.92774e-10
asc_car,2.52482,0.235737,10.710336,0.0
asc_cycling_female,-1.355404,0.185537,-7.305306,2.766676e-13
asc_pt,3.995528,0.224905,17.765405,0.0
asc_walk,1.872902,0.28604,6.5477,5.842993e-11
beta_cost,-0.182666,0.017698,-10.321398,0.0
beta_time_car,-3.576622,0.305693,-11.70004,0.0
beta_time_cycling,-3.835007,0.314741,-12.184662,0.0
beta_time_pt,-2.657098,0.202039,-13.15141,0.0
beta_time_walk,-6.049449,0.316631,-19.105696,0.0


### Compare Model 3 with Model 2   

#### Model C

In [None]:
# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

beta_time = Beta(name='beta_time', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_lambda = Beta(name='beta_time_lambda', value=0, lowerbound=None, upperbound=None, status=0)
LAMBDA = Beta('LAMBDA', 0, None, None, 0)

# Define alternative specific parameters

beta_cost_car = Beta(name='beta_cost_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_cost_pt = Beta(name='beta_cost_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Segmented Walk ASC

ASC_cycling_female = Beta(name='asc_cycling_female', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = asc_car + beta_cost_car * cost_driving + beta_time_lambda * boxcox(time_driving,LAMBDA) + beta_time * time_driving
v_walk = asc_walk + beta_time_lambda * boxcox(time_walking,LAMBDA) + beta_time * time_walking
v_pt = asc_pt + beta_cost_pt * cost_transit + beta_time_lambda * boxcox(time_pt,LAMBDA) + beta_time * time_pt

v_cycling = ASC_cycling_female * (female == 1) + beta_time_lambda * boxcox(time_cycling,LAMBDA) + beta_time * time_cycling

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)


In [None]:
logprob_mC = loglogit(V, av, travel_mode)

model_C = bio.BIOGEME(database, logprob_mC)
model_C.modelName = 'model_C'

null_log_likelihood_mC = model_C.calculate_null_loglikelihood(av)

results_mC = model_C.estimate()
print(results_mC.print_general_statistics())


The chosen alternative [travel_mode] is not available for the following observations (rownumber[choice]): 44[4.0]-112[4.0]-168[4.0]-227[4.0]-280[4.0]-539[4.0]-646[4.0]-730[4.0]-790[4.0]-813[4.0]-874[4.0]-88...


Number of estimated parameters:	9
Sample size:	5000
Excluded observations:	0
Null log likelihood:	-6675.147
Init log likelihood:	-inf
Final log likelihood:	-inf
Likelihood ratio test for the null model:	-inf
Rho-square for the null model:	-1.8e+308
Rho-square-bar for the null model:	-1.8e+308
Likelihood ratio test for the init. model:	nan
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	0
Akaike Information Criterion:	inf
Bayesian Information Criterion:	inf
Final gradient norm:	9.8315E-03
Nbr of threads:	16



#### Test C vs 2b

In [None]:
loglikehood_mC = results_mC.data.logLike
num_params_mC = results_mC.data.nparam

# Calculate the LR statistic
LR = 2 * (loglikehood_mC - loglikehood_m2b)

# Degrees of freedom
df = num_params_mC - num_params_m2b

from scipy.stats import chi2

# Critical value at 0.05 significance level
critical_value = chi2.ppf(0.99, df)

print("Likelihood Ratio:", LR)
print("Degrees of Freedom:", df)
print("Critical Chi-Square Value (0.05 significance):", critical_value)

if LR > critical_value:
    print("Model C is significantly better than Model 2b.")
else:
    print("No significant improvement in Model C over Model 2b.")



Likelihood Ratio: nan
Degrees of Freedom: 0
Critical Chi-Square Value (0.05 significance): nan
No significant improvement in Model C over Model 2b.


#### Test C vs 3

In [None]:
loglikehood_m3 = results_m3.data.logLike
num_params_m3 = results_m3.data.nparam

# Calculate the LR statistic
LR = 2 * (loglikehood_mC - loglikehood_m3)

# Degrees of freedom
df = num_params_mC - num_params_m3

from scipy.stats import chi2

# Critical value at 0.01 significance level
critical_value = chi2.ppf(0.99, df)

print("Likelihood Ratio:", LR)
print("Degrees of Freedom:", df)
print("Critical Chi-Square Value (0.01 significance):", critical_value)

if LR > critical_value:
    print("Model C is significantly better than Model 3.")
else:
    print("No significant improvement in Model C over Model 3.")



Likelihood Ratio: nan
Degrees of Freedom: -1
Critical Chi-Square Value (0.01 significance): nan
No significant improvement in Model C over Model 3.


## Model 4: Nested or Cross-Nested

In [None]:
from biogeme.models import loglogit, lognested
from biogeme.nests import OneNestForNestedLogit, NestsForNestedLogit

# Define ASCs 
asc_car = Beta(name='asc_car', value=0, lowerbound=None, upperbound=None, status=0)
asc_walk = Beta(name='asc_walk', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)

# Define generic parameters

LAMBDA = Beta('LAMBDA', 0, None, None, 0)

beta_time_car = Beta(name='beta_time_car', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_pt = Beta(name='beta_time_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_walk = Beta(name='beta_time_walk', value=0, lowerbound=None, upperbound=None, status=0)
beta_time_cycling = Beta(name='beta_time_cycling', value=0, lowerbound=None, upperbound=None, status=0)

# Define alternative specific parameters

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)

# Segmented Walk ASC

ASC_cycling_female = Beta(name='asc_cycling_female', value=0, lowerbound=None, upperbound=None, status=0)

# Define utility functions

v_car = asc_car + beta_cost * cost_driving + beta_time_car * boxcox(time_driving,LAMBDA)
v_walk = asc_walk + beta_time_walk * boxcox(time_walking,LAMBDA)
v_pt = asc_pt + beta_cost * cost_transit + beta_time_pt * boxcox(time_pt,LAMBDA)

v_cycling = ASC_cycling_female * (female == 1) + beta_time_cycling * boxcox(time_cycling,LAMBDA)

V = {1: v_walk, 2: v_cycling, 3: v_pt, 4: v_car}

CAR_AV = (car_ownership != 0) | (driving_license != 0) | (household_has_license != 0)
WALK_AV = (car_ownership >= 0)
PT_AV = (car_ownership >= 0)
CYCLING_AV = (car_ownership >= 0)

av = {1: WALK_AV, 2: CYCLING_AV , 3: PT_AV, 4: CAR_AV}

It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)
It is advised to set the bounds on parameter LAMBDA. A value of -10 and 10 should be appropriate: Beta("LAMBDA", 0, -10, 10, 0)


In [None]:
mu_slow = Beta('mu_slow', 1, 0, None, 0)
nest_slow = OneNestForNestedLogit(nest_param = mu_slow, list_of_alternatives=[1, 2], name='slow modes')
nest_fast = OneNestForNestedLogit(nest_param = 1, list_of_alternatives=[3, 4], name='faster modes')
nests = NestsForNestedLogit(choice_set=list(V), tuple_of_nests=(nest_slow, nest_fast))

logprob_m4 = lognested(V, av, nests, travel_mode)

# logprob_m3 = loglogit(V, av, travel_mode)

model_4 = bio.BIOGEME(database, logprob_m4)
model_4.modelName = 'model_4'

null_log_likelihood_m4 = model_4.calculate_null_loglikelihood(av)

results_m4 = model_4.estimate()
print(results_m4.print_general_statistics())


The following expression may potentially be ambiguous: [((car_ownership >= `0.0`) != `0.0`)] if it contains the chaining of two comparisons expressions. Keep in mind that, for Biogeme (like for Pandas), the expression (a <= x <= b) is not equivalent to (a <= x) and (x <= b).
The following expression may potentially be ambiguous: [((car_ownership >= `0.0`) != `0.0`)] if it contains the chaining of two comparisons expressions. Keep in mind that, for Biogeme (like for Pandas), the expression (a <= x <= b) is not equivalent to (a <= x) and (x <= b).
The following expression may potentially be ambiguous: [((car_ownership >= `0.0`) != `0.0`)] if it contains the chaining of two comparisons expressions. Keep in mind that, for Biogeme (like for Pandas), the expression (a <= x <= b) is not equivalent to (a <= x) and (x <= b).
The following expression may potentially be ambiguous: [((car_ownership >= `0.0`) != `0.0`)] if it contains the chaining of two comparisons expressions. Keep in mind that, 

Number of estimated parameters:	11
Sample size:	5000
Excluded observations:	0
Null log likelihood:	-6675.147
Init log likelihood:	-inf
Final log likelihood:	-inf
Likelihood ratio test for the null model:	-inf
Rho-square for the null model:	-1.8e+308
Rho-square-bar for the null model:	-1.8e+308
Likelihood ratio test for the init. model:	nan
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	0
Akaike Information Criterion:	inf
Bayesian Information Criterion:	inf
Final gradient norm:	9.6967E+01
Nbr of threads:	16



In [None]:
pd_results_m4 = results_m4.get_estimated_parameters()
display(pd_results_m4)

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
LAMBDA,0.419292,0.119469,3.509619,0.0004487495
asc_car,2.345378,0.618134,3.794291,0.0001480662
asc_cycling_female,-1.690995,0.371263,-4.554715,5.245661e-06
asc_pt,4.015823,0.404611,9.925151,0.0
asc_walk,1.454344,0.83954,1.73231,0.08321841
beta_cost,-0.202468,0.020373,-9.937845,0.0
beta_time_car,-4.292162,0.774625,-5.540953,3.008304e-08
beta_time_cycling,-4.030771,0.480631,-8.386421,0.0
beta_time_pt,-3.149839,0.407805,-7.723877,1.132427e-14
beta_time_walk,-7.181238,1.055066,-6.806433,1.000489e-11


## Market Shares

In [None]:
populations = {
    'female_45_less': 2841376,
    'female_45_or_more': 1519948,
    'male_45_less': 2929408,
    'male_45_or_more': 1379198,
}

total_pop = sum(populations.values())

filters = {
    'male_45_or_more': (data_filtered.age >= 45) & (data_filtered.female == 0),
    'male_45_less': (data_filtered.age < 45) & (data_filtered.female == 0),
    'female_45_or_more': (data_filtered.age >= 45) & (data_filtered.female == 1),
    'female_45_less': (data_filtered.age < 45) & (data_filtered.female == 1),
}

sample_segments = {
    segment_name: segment_rows.sum() for segment_name, segment_rows in filters.items()
}
print(sample_segments)

total_sample = sum(sample_segments.values())
print(f'Sample size: {total_sample}')

weights = {
    segment_name: populations[segment_name] * total_sample / (segment_size * total_pop)
    for segment_name, segment_size in sample_segments.items()
}
print(weights)


{'male_45_or_more': np.int64(896), 'male_45_less': np.int64(1442), 'female_45_or_more': np.int64(984), 'female_45_less': np.int64(1678)}
Sample size: 5000
{'male_45_or_more': np.float64(0.8877139043468962), 'male_45_less': np.float64(1.1715720875375348), 'female_45_or_more': np.float64(0.890816074423909), 'female_45_less': np.float64(0.9765425353056789)}


In [None]:
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Beta, Variable, log, exp

from biogeme import models

for segment_name, segment_rows in filters.items():
    data_filtered.loc[segment_rows, 'weight'] = weights[segment_name]


prob_walk = models.nested(V, None, nests, 1)
prob_cycle = models.nested(V, None, nests, 2)
prob_pt = models.nested(V, None, nests, 3)
prob_car = models.nested(V, None, nests, 4)


weight = Variable('weight')
simulate = {
    'weight': weight,
    'Prob. pt': prob_pt,
    'Prob. car': prob_car,
    'Prob. walk': prob_walk,
    'Prob. cycle': prob_cycle,
}

database = db.Database('london', data_filtered)


biosim = BIOGEME(database, simulate)
simulated_values = biosim.simulate(results_m4.get_beta_values())
display(simulated_values)

simulated_values['Weighted pt'] = (
    simulated_values['weight'] * simulated_values['Prob. pt']
)
simulated_values['Weighted car'] = (
    simulated_values['weight'] * simulated_values['Prob. car']
)

simulated_values['Weighted walk'] = (
    simulated_values['weight'] * simulated_values['Prob. walk']
)
simulated_values['Weighted cycle'] = (
    simulated_values['weight'] * simulated_values['Prob. cycle']
)




Unnamed: 0,weight,Prob. pt,Prob. car,Prob. walk,Prob. cycle
0,0.887714,0.228740,0.743843,0.006440,0.020977
1,0.976543,0.058617,0.866179,0.060880,0.014325
2,0.890816,0.180690,0.814945,0.000237,0.004129
3,0.887714,0.279158,0.668614,0.009999,0.042229
4,0.890816,0.114136,0.409632,0.459825,0.016407
...,...,...,...,...,...
4995,0.887714,0.281809,0.682964,0.008506,0.026722
4996,1.171572,0.367082,0.546490,0.020814,0.065614
4997,0.976543,0.223896,0.484621,0.265664,0.025819
4998,0.976543,0.188301,0.809081,0.000366,0.002252


In [None]:
market_share_pt = simulated_values['Weighted pt'].mean()
print(f'Market share for pt: {100*market_share_pt:.1f}%')

market_share_car = simulated_values['Weighted car'].mean()
print(f'Market share for car: {100*market_share_car:.1f}%')

market_share_walk = simulated_values['Weighted walk'].mean()
print(f'Market share for walk: {100*market_share_walk:.1f}%')

market_share_cycle = simulated_values['Weighted cycle'].mean()
print(f'Market share for cycling: {100*market_share_cycle:.1f}%')




Market share for pt: 30.6%
Market share for car: 51.3%
Market share for walk: 15.5%
Market share for cycling: 2.6%


In [None]:
model_4.bootstrap_samples = 100
results_bootstrapping = model_4.estimate(run_bootstrap=True)

betas = model_4.free_beta_names
b = results_bootstrapping.get_betas_for_sensitivity_analysis(betas)
left, right = biosim.confidence_intervals(b, 0.9)

display(left)

display(right)

100%|██████████| 100/100 [00:24<00:00,  4.01it/s]


Unnamed: 0,weight,Prob. pt,Prob. car,Prob. walk,Prob. cycle
0,0.887714,0.228740,0.743843,0.006440,0.020977
1,0.976543,0.058617,0.866179,0.060880,0.014325
2,0.890816,0.180690,0.814945,0.000237,0.004129
3,0.887714,0.279158,0.668614,0.009999,0.042229
4,0.890816,0.114136,0.409632,0.459825,0.016407
...,...,...,...,...,...
4995,0.887714,0.281809,0.682964,0.008506,0.026722
4996,1.171572,0.367082,0.546490,0.020814,0.065614
4997,0.976543,0.223896,0.484621,0.265664,0.025819
4998,0.976543,0.188301,0.809081,0.000366,0.002252


Unnamed: 0,weight,Prob. pt,Prob. car,Prob. walk,Prob. cycle
0,0.887714,0.228740,0.743843,0.006440,0.020977
1,0.976543,0.058617,0.866179,0.060880,0.014325
2,0.890816,0.180690,0.814945,0.000237,0.004129
3,0.887714,0.279158,0.668614,0.009999,0.042229
4,0.890816,0.114136,0.409632,0.459825,0.016407
...,...,...,...,...,...
4995,0.887714,0.281809,0.682964,0.008506,0.026722
4996,1.171572,0.367082,0.546490,0.020814,0.065614
4997,0.976543,0.223896,0.484621,0.265664,0.025819
4998,0.976543,0.188301,0.809081,0.000366,0.002252


In [None]:
# Calculate weighted probabilities
left['Weighted pt'] = left['weight'] * left['Prob. pt']
left['Weighted car'] = left['weight'] * left['Prob. car']
left['Weighted walk'] = left['weight'] * left['Prob. walk']
left['Weighted cycle'] = left['weight'] * left['Prob. cycle']

right['Weighted pt'] = right['weight'] * right['Prob. pt']
right['Weighted car'] = right['weight'] * right['Prob. car']
right['Weighted walk'] = right['weight'] * right['Prob. walk']
right['Weighted cycle'] = right['weight'] * right['Prob. cycle']

# Calculate mean market shares
market_share_pt = simulated_values['Weighted pt'].mean()
market_share_car = simulated_values['Weighted car'].mean()
market_share_walk = simulated_values['Weighted walk'].mean()
market_share_cycle = simulated_values['Weighted cycle'].mean()

# Calculate confidence intervals
left_market_share_pt = left['Weighted pt'].mean()
right_market_share_pt = right['Weighted pt'].mean()

left_market_share_car = left['Weighted car'].mean()
right_market_share_car = right['Weighted car'].mean()

left_market_share_walk = left['Weighted walk'].mean()
right_market_share_walk = right['Weighted walk'].mean()

left_market_share_cycle = left['Weighted cycle'].mean()
right_market_share_cycle = right['Weighted cycle'].mean()

# Print market shares and confidence intervals
print(f"Market share for pt: {100 * market_share_pt:.1f}% "
      f"CI: [{100 * left_market_share_pt:.1f}%-{100 * right_market_share_pt:.1f}%]")

print(f"Market share for car: {100 * market_share_car:.1f}% "
      f"CI: [{100 * left_market_share_car:.1f}%-{100 * right_market_share_car:.1f}%]")

print(f"Market share for walk: {100 * market_share_walk:.1f}% "
      f"CI: [{100 * left_market_share_walk:.1f}%-{100 * right_market_share_walk:.1f}%]")

print(f"Market share for cycling: {100 * market_share_cycle:.1f}% "
      f"CI: [{100 * left_market_share_cycle:.1f}%-{100 * right_market_share_cycle:.1f}%]")


Market share for pt: 30.6% CI: [30.6%-30.6%]
Market share for car: 51.3% CI: [51.3%-51.3%]
Market share for walk: 15.5% CI: [15.5%-15.5%]
Market share for cycling: 2.6% CI: [2.6%-2.6%]


In [None]:
# Example mapping
labels = {1: 'walk', 2: 'cycling', 3: 'pt', 4: 'car'}

# Map the travel_mode column to the labels
data_filtered['mode_label'] = data_filtered['travel_mode'].map(labels)

# Calculate market shares
market_shares = (
    data_filtered['mode_label']
    .value_counts(normalize=True)  # Get proportions
    .sort_index()  # Ensure consistent order
    * 100  # Convert to percentage
)

# Print market shares
for mode, share in market_shares.items():
    print(f"Market share for {mode}: {share:.1f}%")


Market share for car: 44.0%
Market share for cycling: 3.3%
Market share for pt: 35.3%
Market share for walk: 17.4%
