In [1]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from IPython.core.display_functions import display
from biogeme.expressions import Beta, Variable
from biogeme.models import loglogit
from biogeme.segmentation import DiscreteSegmentationTuple, segmented_beta
from scipy.stats import chi2

In [2]:
# Loading the data
df = pd.read_csv('lpmc06.dat', sep='\t')

In [3]:
display(df.columns)

Index(['trip_id', 'household_id', 'person_n', 'trip_n', 'travel_mode',
       'purpose', 'fueltype', 'faretype', 'bus_scale', 'survey_year',
       'travel_year', 'travel_month', 'travel_date', 'day_of_week',
       'start_time', 'age', 'female', 'driving_license', 'car_ownership',
       'distance', 'dur_walking', 'dur_cycling', 'dur_pt_access',
       'dur_pt_rail', 'dur_pt_bus', 'dur_pt_int', 'pt_interchanges',
       'dur_driving', 'cost_transit', 'cost_driving_fuel',
       'cost_driving_ccharge', 'driving_traffic_percent'],
      dtype='object')

In [4]:
display(df.head())

Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,7,0,2,1,4,3,1,3,0.0,1,...,0.109444,0.0,0.055556,0.0,0,0.059444,0.0,0.15,0.0,0.11215
1,21,5,1,1,4,3,1,5,0.0,1,...,0.083889,0.0,0.293611,0.0,0,0.167778,0.0,0.46,0.0,0.243377
2,27,7,1,0,4,4,2,1,1.0,1,...,0.193889,0.516667,0.0,0.1,1,0.340833,1.5,2.04,0.0,0.280359
3,52,12,1,2,4,5,2,1,1.0,1,...,0.0625,0.0,0.491944,0.094722,1,0.355556,3.0,1.19,0.0,0.249219
4,53,12,1,3,4,3,2,1,1.0,1,...,0.0825,0.0,0.061944,0.0,0,0.0625,1.5,0.17,0.0,0.124444


In [5]:
database = db.Database('lpmc', df)

## Model 0

We identify the variables that will enter the model specification.

In [6]:
# Time related variables
dur_walking = Variable('dur_walking') # in hours
dur_cycling = Variable('dur_cycling') # in hours
dur_pt_access = Variable('dur_pt_access') # in hours
dur_pt_rail = Variable('dur_pt_rail') # in hours
dur_pt_bus = Variable('dur_pt_bus') # in hours
dur_pt_int = Variable('dur_pt_int') # in hours
dur_driving = Variable('dur_driving') # in hours

dur_pt = dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int

# Cost related variables
cost_transit = Variable('cost_transit') # in GBP
cost_driving_fuel = Variable('cost_driving_fuel') # in GBP
cost_driving_ccharge = Variable('cost_driving_ccharge') # in GBP

cost_driving = cost_driving_fuel + cost_driving_ccharge

# Choice taken by the individual
travel_mode = Variable('travel_mode') # 1 = walk, 2 = cycle, 3 = PT, 4 = car

Parameters to be estimated

In [7]:
# ASC_WALK = Beta('asc_walk', 0, None, None, 0)
ASC_CYCLE = Beta('asc_cycle', 0, None, None, 0)
ASC_PT = Beta('asc_pt', 0, None, None, 0)
ASC_CAR = Beta('asc_car', 0, None, None, 0)

B_TIME = Beta('b_time', 0, None, None, 0)
B_COST = Beta('b_cost', 0, None, None, 0)

Definition of the utility functions.

In [8]:
# Walk
V1 = (
  # ASC_WALK -> Normalized with respect to walk
  B_TIME * dur_walking
)

# Cycle
V2 = (
  ASC_CYCLE
  + B_TIME * dur_cycling
)

# Public transport
V3 = (
  ASC_PT
  + B_TIME * dur_pt
  #+ B_TIME * (dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int)
  + B_COST * cost_transit
)

# Car
V4 = (
  ASC_CAR
  + B_TIME * dur_driving
  + B_COST * cost_driving
  #+ B_COST * (cost_driving_fuel + cost_driving_ccharge)
)

In [9]:
V = {1: V1, 2: V2, 3: V3, 4: V4}

Definition of the model.

In [10]:
# All alternatives are available to all individuals.
logprob = loglogit(V, None, travel_mode)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_0'

File biogeme.toml has been created


Estimate the parameters.

In [11]:
results = biogeme.estimate()

Summary statistics.

In [12]:
print(results.print_general_statistics())

Number of estimated parameters:	5
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4658.601
Likelihood ratio test for the init. model:	4545.742
Rho-square for the init. model:	0.328
Rho-square-bar for the init. model:	0.327
Akaike Information Criterion:	9327.202
Bayesian Information Criterion:	9359.788
Final gradient norm:	1.0183E-03
Nbr of threads:	8



In [13]:
display(results.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,-1.264832,0.078562,-16.099815,0.0
asc_cycle,-3.742817,0.103011,-36.334033,0.0
asc_pt,-0.55221,0.05387,-10.250839,0.0
b_cost,-0.160728,0.014288,-11.248889,0.0
b_time,-5.340745,0.197809,-26.999514,0.0


## Model 1
In this version of the model, we introduce alternative-specific coefficients for either b_time (model 1a) or b_cost (model 1b), and we compare the results to determine which model yields the best results 

### Model 1a (alternative-specific b_time)


Parameters to be estimated

In [14]:
# Alternative specific constants (ASC_walk is normalized to 0)
ASC_CYCLE_1A = Beta('ASC_CYCLE_1A', 0, None, None, 0)
ASC_PT_1A    = Beta('ASC_PT_1A', 0, None, None, 0)
ASC_CAR_1A   = Beta('ASC_CAR_1A', 0, None, None, 0)

# Alternative specific time coefficient
B_TIME_WALK_1A  = Beta('B_TIME_WALK_1A', 0, None, None, 0)
B_TIME_CYCLE_1A = Beta('B_TIME_CYCLE_1A', 0, None, None, 0)
B_TIME_PT_1A    = Beta('B_TIME_PT_1A', 0, None, None, 0)
B_TIME_CAR_1A   = Beta('B_TIME_CAR_1A', 0, None, None, 0)

# Generic cost coefficient
B_COST_1A = Beta('B_COST_1A', 0, None, None, 0)

Definition of the utility functions

In [15]:
V_walk_1A = (
    B_TIME_WALK_1A * dur_walking
)

V_cycle_1A = (
    ASC_CYCLE_1A
    + B_TIME_CYCLE_1A * dur_cycling
)

V_PT_1A = (
    ASC_PT_1A
    + B_TIME_PT_1A * dur_pt
    + B_COST_1A * cost_transit
)

V_car_1A = (
    ASC_CAR_1A
    + B_TIME_CAR_1A * dur_driving
    + B_COST_1A * cost_driving
)

V_1A = {1: V_walk_1A, 2: V_cycle_1A, 3: V_PT_1A, 4: V_car_1A}

Definition of the model

In [16]:
logprob_1A = loglogit(V_1A, None, travel_mode)
biogeme_1A = bio.BIOGEME(database, logprob_1A)
biogeme_1A.modelName = 'model_1A'

Estimation and display of the results

In [17]:
results_1A = biogeme_1A.estimate()
print(results_1A.print_general_statistics())

Number of estimated parameters:	8
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4319.913
Likelihood ratio test for the init. model:	5223.118
Rho-square for the init. model:	0.377
Rho-square-bar for the init. model:	0.376
Akaike Information Criterion:	8655.826
Bayesian Information Criterion:	8707.963
Final gradient norm:	9.8655E-03
Nbr of threads:	8



In [18]:
display(results_1A.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR_1A,-2.087661,0.133307,-15.660567,0.0
ASC_CYCLE_1A,-4.799358,0.19322,-24.838871,0.0
ASC_PT_1A,-2.552549,0.135051,-18.900568,0.0
B_COST_1A,-0.14059,0.015325,-9.174017,0.0
B_TIME_CAR_1A,-6.12053,0.405937,-15.07754,0.0
B_TIME_CYCLE_1A,-5.19715,0.467915,-11.107043,0.0
B_TIME_PT_1A,-3.256306,0.254676,-12.786051,0.0
B_TIME_WALK_1A,-8.643432,0.418678,-20.644602,0.0


_Observations for the report_

First, we observe that all the B coefficients are negative, which makes sense, because a longer and/or more expensive travel mode is less attractive. 

The cost coefficient of model 1A is very close to the one for the model 0. On the other hand, the time coefficient, which was made alternative specific in model 1A, now strongly depends on the chosen mode. In particular, we observe it is the smallest (in absolute value) for PT; an interpretation could be that commuters are more prone to long PT travel times because they can read, sleep, etc., activites they cannot do while driving or cycling. On the other hand, the largest time_coefficient (again, in absolute value) is for walking, probably because beyond 20-30 minutes, people consider that doing the route by foot is too long.

If we compare the Akaike or Bayesian information criterion between model 0 and model 1a, we observe they are both lower in the case of model 1a; it means the latter fits the data better than model 0.

### Model 1b (alternative-specific b_cost)

We reproduce exactly the same steps as for model 1A, but we now assume a generic time coefficient, and a alternative specific cost coefficient

Parameters to be estimated

In [19]:
# Alternative specific constants (ASC_walk is normalized to 0)
ASC_CYCLE_1B = Beta('ASC_CYCLE_1B', 0, None, None, 0)
ASC_PT_1B    = Beta('ASC_PT_1B', 0, None, None, 0)
ASC_CAR_1B   = Beta('ASC_CAR_1B', 0, None, None, 0)

# Alternative specific cost coefficient
B_COST_PT_1B    = Beta('B_COST_PT_1B', 0, None, None, 0)
B_COST_CAR_1B   = Beta('B_COST_CAR_1B', 0, None, None, 0)

# Generic time coefficient
B_TIME_1B = Beta('B_TIME_1B', 0, None, None, 0)

Definition of the utility functions 

In [20]:
V_walk_1B = (
    B_TIME_1B * dur_walking
)

V_cycle_1B = (
    ASC_CYCLE_1B
    + B_TIME_1B * dur_cycling
)

V_PT_1B = (
    ASC_PT_1B
    + B_TIME_1B * dur_pt
    + B_COST_PT_1B * cost_transit
)

V_car_1B = (
    ASC_CAR_1B
    + B_TIME_1B * dur_driving
    + B_COST_CAR_1B * cost_driving
)

V_1B = {1: V_walk_1B, 2: V_cycle_1B, 3: V_PT_1B, 4: V_car_1B}

Definition of the model

In [21]:
logprob_1B = loglogit(V_1B, None, travel_mode)
biogeme_1B = bio.BIOGEME(database, logprob_1B)
biogeme_1B.modelName = 'model_1B'

Estimation and display of the results

In [22]:
results_1B = biogeme_1B.estimate()
print(results_1B.print_general_statistics())

Number of estimated parameters:	6
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4607.516
Likelihood ratio test for the init. model:	4647.911
Rho-square for the init. model:	0.335
Rho-square-bar for the init. model:	0.334
Akaike Information Criterion:	9227.032
Bayesian Information Criterion:	9266.136
Final gradient norm:	5.3980E-04
Nbr of threads:	8



In [23]:
display(results_1B.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR_1B,-1.181056,0.078388,-15.066824,0.0
ASC_CYCLE_1B,-3.698346,0.103376,-35.775502,0.0
ASC_PT_1B,-0.836648,0.065575,-12.758696,0.0
B_COST_CAR_1B,-0.205583,0.021929,-9.374982,0.0
B_COST_PT_1B,0.052828,0.029809,1.772231,0.076356
B_TIME_1B,-5.339547,0.200044,-26.691801,0.0


In [24]:
lr_result1A = results_1A.likelihood_ratio_test(results, 0.05)
print(f'{lr_result1A.statistic=:.3g}')
print(f'{lr_result1A.threshold=:.3g}')
print(lr_result1A.message)

lr_result1A.statistic=677
lr_result1A.threshold=7.81
H0 can be rejected at level 5.0%


In [25]:
lr_result1B = results_1B.likelihood_ratio_test(results, 0.05)
print(f'{lr_result1B.statistic=:.3g}')
print(f'{lr_result1B.threshold=:.3g}')
print(lr_result1B.message)

lr_result1B.statistic=102
lr_result1B.threshold=3.84
H0 can be rejected at level 5.0%


_Observations for the report_

This model seems clearly less effective than model 1A. We see it because

1) The Bayesian and Akaike criterion are higher for model 1B
2) the t-test are closer to 0
3) the cost coefficient for PT is higher than 0, which doesn't make much sense

We could have guessed that model 1B would be less interesting than model 1A, because only two alternatives out of four have a cost parameter, which means that making B_cost alternative specific allows less flexibility in the model than making B_time alternative specific

So our preferred model for the rest of the project will be model 1A (alternative specific time coefficients)

To formalize this, we perform a Cox test. We define a general model containing both an alternative-specific b_time and alt-spec b_cost

In [26]:
# Utility functions
V_walk_1Cox = (B_TIME_WALK_1A * dur_walking)

V_cycle_1Cox = (
    ASC_CYCLE
    + B_TIME_CYCLE_1A * dur_cycling
)

V_PT_1Cox = (
    ASC_PT
    + B_TIME_PT_1A * dur_pt
    + B_COST_PT_1B * cost_transit
)

V_car_1Cox = (
    ASC_CAR
    + B_TIME_CAR_1A * dur_driving
    + B_COST_CAR_1B * cost_driving
)

V_1Cox = {1: V_walk_1Cox, 2: V_cycle_1Cox, 3: V_PT_1Cox, 4: V_car_1Cox}

# Estimation of the results
logprob_1Cox = loglogit(V_1Cox, None, travel_mode)
biogeme_1Cox = bio.BIOGEME(database, logprob_1Cox)
biogeme_1Cox.modelName = 'model_1Cox'
results_1Cox = biogeme_1Cox.estimate()

We compare model 1C with model 1A

In [27]:
lr_result1AC = results_1A.likelihood_ratio_test(results_1Cox, 0.005)
print(f'{lr_result1AC.statistic=:.3g}')
print(f'{lr_result1AC.threshold=:.3g}')
print(lr_result1AC.message)

lr_result1AC.statistic=7.03
lr_result1AC.threshold=7.88
H0 cannot be rejected at level 0.5%


We compare model 1C with model 1B

In [28]:
lr_result1BC = results_1B.likelihood_ratio_test(results_1Cox, 0.005)
print(f'{lr_result1BC.statistic=:.3g}')
print(f'{lr_result1BC.threshold=:.3g}')
print(lr_result1BC.message)

lr_result1BC.statistic=582
lr_result1BC.threshold=12.8
H0 can be rejected at level 0.5%


To conclude, model 1A is better than model 1B because, at the level $\alpha = 0.5 \%$, model B can be rejected against the composite model, but model A cannot. By the Cox test, 1A is better than 1B

## Model 2

In this model we chose a socio-economic characteristic, ownership of a driving liecense, and interacted it with both the ASC and one of the attributes.

It is easy to see that having a driving license has a significant impact on the choice.

We first add the variable driving license, and its segmentation

In [29]:
# driving_license = Variable('purpose')
# driving_license_segmentation = DiscreteSegmentationTuple(
#     variable=driving_license, mapping={1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F'}
# )

# segmented_ASC_CAR_2A = segmented_beta(ASC_CAR_1A,[driving_license_segmentation])

# segmented_ASC_CYCLE_2A = segmented_beta(ASC_CYCLE_1A,[driving_license_segmentation])

# segmented_ASC_PT_2A = segmented_beta(ASC_PT_1A,[driving_license_segmentation])

Then we define the model as usual, but the ASCs are segmented by ownership of driving license

In [30]:
# V_walk_2A = (
#     B_TIME_WALK_1A * dur_walking
# )

# V_cycle_2A = (
#     segmented_ASC_CYCLE_2A
#     + B_TIME_CYCLE_1A * dur_cycling
# )

# V_PT_2A = (
#     segmented_ASC_PT_2A
#     + B_TIME_PT_1A * dur_pt
#     + B_COST_1A * cost_transit
# )

# V_car_2A = (
#     segmented_ASC_CAR_2A
#     + B_TIME_CAR_1A * dur_driving
#     + B_COST_1A * cost_driving
# )

# V_2A ={1: V_walk_2A, 2: V_cycle_2A, 3: V_PT_2A, 4: V_car_2A}

# logprob_2A = loglogit(V_2A, None, travel_mode)
# biogeme_2A = bio.BIOGEME(database, logprob_2A)
# biogeme_2A.modelName = 'model_2A'

# results_2A = biogeme_2A.estimate()
# print(results_2A.print_general_statistics())

In [31]:
# Number of estimated parameters:	11
# Sample size:	5000
# Excluded observations:	0
# Init log likelihood:	-5539.169
# Final log likelihood:	-4105.434
# Likelihood ratio test for the init. model:	2867.471
# Rho-square for the init. model:	0.259
# Rho-square-bar for the init. model:	0.257
# Akaike Information Criterion:	8232.867
# Bayesian Information Criterion:	8304.556
# Final gradient norm:	1.5780E-02
# Nbr of threads:	8

In [32]:
# display(results_2A.get_estimated_parameters())

ASC_CAR with driving license is positive, which makes sense

In [33]:
# segmented_B_TIME_WALK_2B = segmented_beta(B_TIME_WALK_1A,[driving_license_segmentation])

# segmented_B_TIME_CAR_2B = segmented_beta(B_TIME_CAR_1A,[driving_license_segmentation])

# segmented_B_TIME_CYCLE_2B = segmented_beta(B_TIME_CYCLE_1A,[driving_license_segmentation])

# segmented_B_TIME_PT_2B = segmented_beta(B_TIME_PT_1A,[driving_license_segmentation])

In [34]:
# V_walk_2B = (
#     segmented_B_TIME_WALK_2B * dur_walking
# )

# V_cycle_2B = (
#     ASC_CYCLE_1A
#     + segmented_B_TIME_CYCLE_2B * dur_cycling
# )

# V_PT_2B = (
#     ASC_PT_1A
#     + segmented_B_TIME_PT_2B * dur_pt
#     + B_COST_1A * cost_transit
# )

# V_car_2B = (
#     ASC_CAR_1A
#     + segmented_B_TIME_CAR_2B * dur_driving
#     + B_COST_1A * cost_driving
# )

# V_2B ={1: V_walk_2B, 2: V_cycle_2B, 3: V_PT_2B, 4: V_car_2B}

# logprob_2B = loglogit(V_2B, None, travel_mode)
# biogeme_2B = bio.BIOGEME(database, logprob_2B)
# biogeme_2B.modelName = 'model_2B'

# results_2B = biogeme_2B.estimate()
# print(results_2B.print_general_statistics())

In [35]:
# Number of estimated parameters:	12
# Sample size:	5000
# Excluded observations:	0
# Init log likelihood:	-4804.767
# Final log likelihood:	-4142.745
# Likelihood ratio test for the init. model:	1324.044
# Rho-square for the init. model:	0.138
# Rho-square-bar for the init. model:	0.135
# Akaike Information Criterion:	8309.491
# Bayesian Information Criterion:	8387.697
# Final gradient norm:	2.6260E-01
# Nbr of threads:	8

In [36]:
# display(results_2B.get_estimated_parameters())

The pt time is worse if you have a driving license

To compare the models we use the log likehood test.

The null hypothesis, H0, is that the model and the restriced version are equivalent.

In [37]:
# lr_result2A = results_2A.likelihood_ratio_test(results_1A, 0.05)
# print(f'{lr_result2A.statistic=:.3g}')
# print(f'{lr_result2A.threshold=:.3g}')
# print(lr_result2A.message)


In [38]:
# lr_result2B = results_2B.likelihood_ratio_test(results_1A, 0.05)
# print(f'{lr_result2B.statistic=:.3g}')
# print(f'{lr_result2B.threshold=:.3g}')
# print(lr_result2B.message)

Both models rejected H0 with the likelihood test, so both are better then the previous prefered model. Since the AIC and BIC are better for 2A that will be our new prefered.

In this model we chose a socio-economic characteristic, agee, and interacted it with both the ASC and one of the attributes.

It is easy to see that age has a significant impact on the choice.

In [46]:
age = Variable('age')
beta_age_cycle = Beta('beta_age_cycle', 0, None, None, 0)
beta_age_pt = Beta('beta_age_pt', 0, None, None, 0)
beta_age_car = Beta('beta_age_car', 0, None, None, 0)

In [47]:
V_walk_2A = (
    B_TIME_WALK_1A * dur_walking
)

V_cycle_2A = (
    ASC_CYCLE_1A
    + beta_age_cycle * age
    + B_TIME_CYCLE_1A * dur_cycling
)

V_PT_2A = (
    ASC_PT_1A
    + beta_age_pt * age
    + B_TIME_PT_1A * dur_pt
    + B_COST_1A * cost_transit
)

V_car_2A = (
    ASC_CAR_1A
    + beta_age_car * age
    + B_TIME_CAR_1A * dur_driving
    + B_COST_1A * cost_driving
)

V_2A ={1: V_walk_2A, 2: V_cycle_2A, 3: V_PT_2A, 4: V_car_2A}

logprob_2A = loglogit(V_2A, None, travel_mode)
biogeme_2A = bio.BIOGEME(database, logprob_2A)
biogeme_2A.modelName = 'model_2A'

results_2A = biogeme_2A.estimate()
print(results_2A.print_general_statistics())

Number of estimated parameters:	11
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-4408.559
Final log likelihood:	-4285.995
Likelihood ratio test for the init. model:	245.1281
Rho-square for the init. model:	0.0278
Rho-square-bar for the init. model:	0.0253
Akaike Information Criterion:	8593.989
Bayesian Information Criterion:	8665.678
Final gradient norm:	1.1978E-01
Nbr of threads:	8



In [48]:
display(results_2A.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR_1A,-2.875332,0.176802,-16.262979,0.0
ASC_CYCLE_1A,-4.943005,0.262963,-18.797319,0.0
ASC_PT_1A,-3.058859,0.175228,-17.45645,0.0
B_COST_1A,-0.141669,0.015354,-9.226816,0.0
B_TIME_CAR_1A,-6.061088,0.404062,-15.000401,0.0
B_TIME_CYCLE_1A,-5.225049,0.472664,-11.054471,0.0
B_TIME_PT_1A,-3.244592,0.253159,-12.816425,0.0
B_TIME_WALK_1A,-8.761601,0.43081,-20.337516,0.0
beta_age_car,0.018775,0.002321,8.090001,6.661338e-16
beta_age_cycle,0.002599,0.004206,0.617791,0.5367131


In [49]:
lr_result2A = results_2A.likelihood_ratio_test(results_1A, 0.05)
print(f'{lr_result2A.statistic=:.3g}')
print(f'{lr_result2A.threshold=:.3g}')
print(lr_result2A.message)

lr_result2A.statistic=67.8
lr_result2A.threshold=7.81
H0 can be rejected at level 5.0%


In [43]:
age = Variable('age')


V_walk_2B = (
    B_TIME_WALK_1A * dur_walking 
)

V_cycle_2B = (
    ASC_CYCLE_1A
    + B_TIME_CYCLE_1A * dur_cycling 
)

V_PT_2B = (
    ASC_PT_1A
    + B_TIME_PT_1A * dur_pt 
    + B_COST_1A * cost_transit * age
)

V_car_2B = (
    ASC_CAR_1A
    + B_TIME_CAR_1A * dur_driving 
    + B_COST_1A * cost_driving * age
)

V_2B ={1: V_walk_2B, 2: V_cycle_2B, 3: V_PT_2B, 4: V_car_2B}

logprob_2B = loglogit(V_2B, None, travel_mode)
biogeme_2B = bio.BIOGEME(database, logprob_2B)
biogeme_2B.modelName = 'model_2B'

results_2B = biogeme_2B.estimate()
print(results_2B.print_general_statistics())

Number of estimated parameters:	8
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-25467.99
Final log likelihood:	-4332.703
Likelihood ratio test for the init. model:	42270.57
Rho-square for the init. model:	0.83
Rho-square-bar for the init. model:	0.83
Akaike Information Criterion:	8681.407
Bayesian Information Criterion:	8733.544
Final gradient norm:	2.6266E+00
Nbr of threads:	8



In [44]:
display(results_2B.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_CAR_1A,-2.093036,0.133416,-15.687995,0.0
ASC_CYCLE_1A,-4.81017,0.193321,-24.881833,0.0
ASC_PT_1A,-2.561057,0.134966,-18.975602,0.0
B_COST_1A,-0.003016,0.000445,-6.77969,1.204348e-11
B_TIME_CAR_1A,-6.261054,0.403188,-15.528877,0.0
B_TIME_CYCLE_1A,-5.18582,0.467727,-11.087282,0.0
B_TIME_PT_1A,-3.334521,0.253651,-13.146087,0.0
B_TIME_WALK_1A,-8.657642,0.418304,-20.697027,0.0


In [45]:
lr_result2B = results_2B.likelihood_ratio_test(results_1A, 0.05)
print(f'{lr_result2B.statistic=:.3g}')
print(f'{lr_result2B.threshold=:.3g}')
print(lr_result2B.message)

lr_result2B.statistic=25.6
lr_result2B.threshold=nan
H0 can be rejected at level 5.0%


## Model 3

Using Model_pref as the base model, include an appropriate non-linear transformation of one of the variables. Report both the specication and the estimation results (as defined previously).

## Model 4