In [3]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from IPython.core.display_functions import display
from biogeme.expressions import Expression, Beta, Variable
from biogeme.models import loglogit, nested
from biogeme.segmentation import DiscreteSegmentationTuple, segmented_beta
from biogeme.tools.likelihood_ratio import likelihood_ratio_test
from biogeme.results import compile_estimation_results
from biogeme.models import boxcox, loglogit, lognested
from biogeme.nests import OneNestForNestedLogit, NestsForNestedLogit
from pandas import Series, DataFrame


In [4]:
df = pd.read_csv("lpmc01.dat", sep = '\t')
display(df)
database = db.Database('lpmc', df)


Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,12,1,1,0,4,3,1,5,0.0,1,...,0.209167,0.000000,0.123611,0.000000,0,0.141389,0.0,0.51,0.0,0.090373
1,17,3,1,1,3,1,6,1,1.0,1,...,0.123889,0.000000,0.208056,0.091667,1,0.115556,3.0,0.33,0.0,0.033654
2,51,12,1,1,4,5,2,1,1.0,1,...,0.082222,0.000000,0.547500,0.133333,1,0.355556,3.0,1.12,0.0,0.302344
3,67,13,1,6,4,3,1,5,0.0,1,...,0.032500,0.000000,0.391667,0.000000,0,0.206944,0.0,0.67,0.0,0.159732
4,74,14,0,3,4,3,1,5,0.0,1,...,0.136389,0.000000,0.033889,0.000000,0,0.067778,0.0,0.20,0.0,0.151639
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,81003,17593,0,1,4,5,1,1,1.0,3,...,0.069167,0.000000,0.200000,0.000000,0,0.156389,1.5,0.51,0.0,0.314387
4996,81009,17595,0,2,3,3,6,1,1.0,3,...,0.064722,0.000000,0.305278,0.000000,0,0.257500,1.5,0.57,0.0,0.354908
4997,81038,17604,1,4,4,3,5,1,1.0,3,...,0.022778,0.000000,0.233611,0.000000,0,0.157778,1.5,0.55,0.0,0.366197
4998,81071,17610,0,0,4,3,2,1,1.0,3,...,0.206111,0.000000,0.400000,0.000000,0,0.259167,1.5,0.88,0.0,0.233655


## Variable Definition

In [5]:
trip_id = Variable('trip_id')
household_id = Variable('household_id')
person_n = Variable('person_n')
trip_n = Variable('trip_n')
travel_mode = Variable('travel_mode')
purpose = Variable('purpose')
fueltype = Variable('fueltype')
faretype = Variable('faretype')
bus_scale = Variable('bus_scale')
survey_year = Variable('survey_year')
travel_year = Variable('travel_year')
travel_month = Variable('travel_month')
travel_date = Variable('travel_date')
day_of_week = Variable('day_of_week')
start_time = Variable('start_time')
age = Variable('age')
female = Variable('female')
driving_license = Variable('driving_license')
car_ownership = Variable('car_ownership')
distance = Variable('distance')
dur_walking = Variable('dur_walking')
dur_cycling = Variable('dur_cycling')
dur_pt_access = Variable('dur_pt_access') # Predicted total access and egress time for public transport route in hours
dur_pt_rail = Variable('dur_pt_rail')
dur_pt_bus = Variable('dur_pt_bus')
dur_pt_int = Variable('dur_pt_int') # Time taken (hrs) at each interchange point
pt_interchanges = Variable('pt_interchanges')   # Number of interchange points in public transport route
dur_driving = Variable('dur_driving')
cost_transit = Variable('cost_transit')
cost_driving_fuel = Variable('cost_driving_fuel')   # Estimated fuel cost of driving route in GBP
cost_driving_ccharge = Variable('cost_driving_ccharge')  # Estimated congestion charge cost of driving route in GBP
driving_traffic_percent = Variable('driving_traffic_percent')

In [5]:
all_results = {}

In [5]:
# MODEL 0

# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt = Beta(name='beta_tt', value=0, lowerbound=None, upperbound=None, status=0)

v_walking = beta_tt * time_walking
v_cycling = asc_cycling + beta_tt * time_cycling 
v_pt = asc_pt + beta_tt * time_pt + beta_cost * cost_transit
v_driving = asc_driving + beta_tt * time_driving + beta_cost * cost_driving

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_0'

results = biogeme.estimate()
model_0_loglike = results.data.logLike
model_0_numParam = results.get_estimated_parameters().shape[0]

all_results['Model_0'] = biogeme.estimate()


In [6]:
#MODEL 1

# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)

v_walking = beta_tt_walking * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + beta_cost * cost_transit
v_driving = asc_driving + beta_tt_driving * time_driving + beta_cost * cost_driving

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_1'

results = biogeme.estimate()
model_1_loglike = results.data.logLike
model_1_numParam = results.get_estimated_parameters().shape[0]

# I use MLR test, where this is the unrestricted model, and model 0 is the restricted one.
# If I can reject the null hypothesis, then this will become the preferred model.

likelihood_ratio_test([model_0_loglike, model_0_numParam], [model_1_loglike, model_1_numParam], 0.01)

# Ideally, should create 3 models: one with only alternate specific TT, one with alternate specific costs,
# and one with both. Then we should MLR test all possible combinations to have evidence to choose a 
# preferred one. 

print(results.get_estimated_parameters())


all_results['Model_1'] = biogeme.estimate()


                    Value  Rob. Std err  Rob. t-test  Rob. p-value
asc_cycling     -4.602367      0.197138   -23.345903           0.0
asc_driving     -2.115307      0.144921   -14.596239           0.0
asc_pt          -2.599538      0.146552   -17.738021           0.0
beta_cost       -0.180812      0.017547   -10.304241           0.0
beta_tt_cycling -6.462945      0.483849   -13.357354           0.0
beta_tt_driving -6.623753      0.379926   -17.434327           0.0
beta_tt_pt      -3.494667      0.244093   -14.316931           0.0
beta_tt_walking -9.065236      0.456113   -19.874964           0.0


In [7]:
# Adding alternate specifc costs 

asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

beta_cost_pt = Beta(name='beta_cost_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_cost_driving = Beta(name='beta_cost_driving', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)

v_walking = beta_tt_walking * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + beta_cost_pt * cost_transit
v_driving = asc_driving + beta_tt_driving * time_driving + beta_cost_driving * cost_driving

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_1_plus'

results = biogeme.estimate()
model_1_plus_loglike = results.data.logLike
model_1_plus_numParam = results.get_estimated_parameters().shape[0]


In [8]:
# Interaction Age / Travel Time

# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking_interact = Beta(name='beta_tt_walking_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling_interact = Beta(name='beta_tt_cycling_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt_interact = Beta(name='beta_tt_pt_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving_interact = Beta(name='beta_tt_driving_interact', value=0, lowerbound=None, upperbound=None, status=0)

mx_age = df['age'].max()

# Interacting age with ASCs and travel time

## Does it make sense to interact with asc??

v_walking = beta_tt_walking * time_walking +  (1/mx_age) * beta_tt_walking_interact * age * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling + (1/mx_age) * beta_tt_cycling_interact * age * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + (1/mx_age) * beta_tt_pt_interact * age * time_pt + beta_cost * cost_transit
v_driving = asc_driving + beta_tt_driving * time_driving + (1/mx_age) * beta_tt_driving_interact * age * time_driving + beta_cost * cost_driving

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_2_tt'

results = biogeme.estimate()
model_2_tt_loglike = results.data.logLike
model_2_tt__numParam = results.get_estimated_parameters().shape[0]

print(results.get_estimated_parameters())

all_results['Model_2_tt'] = biogeme.estimate()



                             Value  Rob. Std err  Rob. t-test  Rob. p-value
asc_cycling              -4.618741      0.200583   -23.026576  0.000000e+00
asc_driving              -2.156608      0.150105   -14.367306  0.000000e+00
asc_pt                   -2.612314      0.149861   -17.431624  0.000000e+00
beta_cost                -0.182681      0.017772   -10.278879  0.000000e+00
beta_tt_cycling          -4.682452      0.865111    -5.412543  6.213609e-08
beta_tt_cycling_interact -4.489886      1.941357    -2.312756  2.073604e-02
beta_tt_driving          -5.824642      0.882429    -6.600696  4.092326e-11
beta_tt_driving_interact -1.944159      2.054091    -0.946481  3.439032e-01
beta_tt_pt               -2.274892      0.486566    -4.675398  2.933843e-06
beta_tt_pt_interact      -3.055052      1.128707    -2.706683  6.795919e-03
beta_tt_walking          -7.455478      0.504921   -14.765621  0.000000e+00
beta_tt_walking_interact -4.133963      1.082997    -3.817151  1.350013e-04


In [9]:
# Interaction Age / ASCs

# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_pt_Age = Beta(name='asc_pt_Age', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling_Age = Beta(name='asc_cycling_Age', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving_Age = Beta(name='asc_driving_Age', value=0, lowerbound=None, upperbound=None, status=0)

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)

mx_age = df['age'].max()

# Interacting age with ASCs and travel time

## Does it make sense to interact with asc??

v_walking = beta_tt_walking * time_walking
v_cycling = asc_cycling + (1/mx_age) * age * asc_cycling_Age  + beta_tt_cycling * time_cycling 
v_pt = asc_pt + (1/mx_age) * age * asc_pt_Age  + beta_tt_pt * time_pt + beta_cost * cost_transit
v_driving = asc_driving + (1/mx_age) * age * asc_driving_Age  + beta_tt_driving * time_driving + beta_cost * cost_driving

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_2_asc'

results = biogeme.estimate()
model_2_asc_loglike = results.data.logLike
model_2_asc_numParam = results.get_estimated_parameters().shape[0]

all_results['Model_2_Asc'] = biogeme.estimate()


In [10]:
# MODEL 3


# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking_interact = Beta(name='beta_tt_walking_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling_interact = Beta(name='beta_tt_cycling_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt_interact = Beta(name='beta_tt_pt_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving_interact = Beta(name='beta_tt_driving_interact', value=0, lowerbound=None, upperbound=None, status=0)

ell_cost = Beta('lambda_cost', 1, -10, 10, 0)
boxcox_cost_pt = boxcox(cost_transit, ell_cost)
boxcox_cost_driving = boxcox(cost_driving, ell_cost)

mx_age = df['age'].max()

v_walking = beta_tt_walking * time_walking +  (1/mx_age) * beta_tt_walking_interact * age * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling + (1/mx_age) * beta_tt_cycling_interact * age * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + (1/mx_age) * beta_tt_pt_interact * age * time_pt + beta_cost * boxcox_cost_pt
v_driving = asc_driving + beta_tt_driving * time_driving + (1/mx_age) * beta_tt_driving_interact * age * time_driving + beta_cost * boxcox_cost_driving

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_3_boxcox'

results = biogeme.estimate()
model_3_boxcox_loglike = results.data.logLike
model_3_boxcox_numParam = results.get_estimated_parameters().shape[0]

all_results['Model_3_boxcox'] = biogeme.estimate()



In [11]:
# MODEL 3


# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

square_tt_coef = Beta('square_cost_coef', 0, None, None, 0)
cube_tt_coef = Beta('cube_cost_coef', 0, None, None, 0)

def power_series(the_variable: Expression) -> Expression:
    """Generate the expression of a polynomial of degree 3

    :param the_variable: variable of the polynomial
    """
    return (
        the_variable
        + square_tt_coef * the_variable**2
        + cube_tt_coef * the_variable * the_variable**3
    )


beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking_interact = Beta(name='beta_tt_walking_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling_interact = Beta(name='beta_tt_cycling_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt_interact = Beta(name='beta_tt_pt_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving_interact = Beta(name='beta_tt_driving_interact', value=0, lowerbound=None, upperbound=None, status=0)

cost_drive_power = power_series(cost_driving)
cost_pt_power = power_series(cost_transit)

mx_age = df['age'].max()

v_walking = beta_tt_walking * time_walking +  (1/mx_age) * beta_tt_walking_interact * age * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling + (1/mx_age) * beta_tt_cycling_interact * age * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + (1/mx_age) * beta_tt_pt_interact * age * time_pt + beta_cost * cost_pt_power
v_driving = asc_driving + beta_tt_driving * time_driving + (1/mx_age) * beta_tt_driving_interact * age * time_driving + beta_cost * cost_drive_power

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_3_power_series'

results = biogeme.estimate()
model_3_power_Series_loglike = results.data.logLike
model_3_power_Series_numParam = results.get_estimated_parameters().shape[0]

all_results['Model_3_power_series'] = biogeme.estimate()



In [12]:
# MODEL 3


# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

square_tt_coef = Beta('square_cost_coef', 0, None, None, 0)
cube_tt_coef = Beta('cube_cost_coef', 0, None, None, 0)

def power_series(the_variable: Expression) -> Expression:
    """Generate the expression of a polynomial of degree 3

    :param the_variable: variable of the polynomial
    """
    return (
        the_variable
        + square_tt_coef * the_variable**2
        + cube_tt_coef * the_variable * the_variable**3
    )

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking_interact = Beta(name='beta_tt_walking_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling_interact = Beta(name='beta_tt_cycling_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt_interact = Beta(name='beta_tt_pt_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving_interact = Beta(name='beta_tt_driving_interact', value=0, lowerbound=None, upperbound=None, status=0)

cost_drive_power = power_series(cost_driving)
cost_pt_power = power_series(cost_transit)

mx_age = df['age'].max()

v_walking = beta_tt_walking * time_walking +  (1/mx_age) * beta_tt_walking_interact * age * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling + (1/mx_age) * beta_tt_cycling_interact * age * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + (1/mx_age) * beta_tt_pt_interact * age * time_pt + beta_cost * cost_pt_power
v_driving = asc_driving + beta_tt_driving * time_driving + (1/mx_age) * beta_tt_driving_interact * age * time_driving + beta_cost * cost_drive_power

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_3_power_series'

results = biogeme.estimate()
model_3_power_Series_loglike = results.data.logLike
model_3_power_Series_numParam = results.get_estimated_parameters().shape[0]

all_results['Model_3_power_series'] = biogeme.estimate()



In [13]:
# MODEL 3


# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

square_tt_coef = Beta('square_cost_coef', 0, None, None, 0)
cube_tt_coef = Beta('cube_cost_coef', 0, None, None, 0)

def power_series(the_variable: Expression) -> Expression:
    """Generate the expression of a polynomial of degree 3

    :param the_variable: variable of the polynomial
    """
    return (
        the_variable
        + square_tt_coef * the_variable**2
        + cube_tt_coef * the_variable * the_variable**3
    )

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking_interact = Beta(name='beta_tt_walking_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling_interact = Beta(name='beta_tt_cycling_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt_interact = Beta(name='beta_tt_pt_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving_interact = Beta(name='beta_tt_driving_interact', value=0, lowerbound=None, upperbound=None, status=0)

cost_drive_power = power_series(cost_driving)
cost_pt_power = power_series(cost_transit)

mx_age = df['age'].max()

v_walking = beta_tt_walking * time_walking +  (1/mx_age) * beta_tt_walking_interact * age * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling + (1/mx_age) * beta_tt_cycling_interact * age * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + (1/mx_age) * beta_tt_pt_interact * age * time_pt + beta_cost * cost_pt_power
v_driving = asc_driving + beta_tt_driving * time_driving + (1/mx_age) * beta_tt_driving_interact * age * time_driving + beta_cost * cost_drive_power

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_3'

results = biogeme.estimate()
model_3_loglike = results.data.logLike
model_3_numParam = results.get_estimated_parameters().shape[0]

all_results['Model_3'] = biogeme.estimate()


In [19]:
# MODEL 4


# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

square_tt_coef = Beta('square_cost_coef', 0, None, None, 0)
cube_tt_coef = Beta('cube_cost_coef', 0, None, None, 0)

def power_series(the_variable: Expression) -> Expression:
    """Generate the expression of a polynomial of degree 3

    :param the_variable: variable of the polynomial
    """
    return (
        the_variable
        + square_tt_coef * the_variable**2
        + cube_tt_coef * the_variable * the_variable**3
    )

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking_interact = Beta(name='beta_tt_walking_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling_interact = Beta(name='beta_tt_cycling_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt_interact = Beta(name='beta_tt_pt_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving_interact = Beta(name='beta_tt_driving_interact', value=0, lowerbound=None, upperbound=None, status=0)

cost_drive_power = power_series(cost_driving)
cost_pt_power = power_series(cost_transit)

mx_age = df['age'].max()

v_walking = beta_tt_walking * time_walking +  (1/mx_age) * beta_tt_walking_interact * age * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling + (1/mx_age) * beta_tt_cycling_interact * age * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + (1/mx_age) * beta_tt_pt_interact * age * time_pt + beta_cost * cost_pt_power
v_driving = asc_driving + beta_tt_driving * time_driving + (1/mx_age) * beta_tt_driving_interact * age * time_driving + beta_cost * cost_drive_power

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

mu_a = Beta('mu_a', 1, 0, None, 0)
mu_b = Beta('mu_b', 1, 0, None, 0)
nest_a = OneNestForNestedLogit(nest_param=mu_a, list_of_alternatives=[1, 2], name='slow modes')
nest_b = OneNestForNestedLogit(nest_param=mu_b, list_of_alternatives=[3, 4], name='faster modes')
nests = NestsForNestedLogit(choice_set=list(V), tuple_of_nests=(nest_a, nest_b))

logprob_m4 = lognested(V, None, nests, travel_mode)

model_4 = bio.BIOGEME(database, logprob_m4)
model_4.modelName = 'model_4'

results_m4 = model_4.estimate()
model_4_loglike = results_m4.data.logLike
model_4__numParam = results_m4.get_estimated_parameters().shape[0]

results_m4.get_estimated_parameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,-6.31487,0.296982,-21.263465,0.0
asc_driving,-2.560703,0.172858,-14.813899,0.0
asc_pt,-3.309398,0.232062,-14.260846,0.0
beta_cost,0.002815,0.000411,6.855414,7.110534e-12
beta_tt_cycling,-4.034149,1.024664,-3.937045,8.249118e-05
beta_tt_cycling_interact,-4.832371,2.224809,-2.172038,0.02985276
beta_tt_driving,-8.124176,1.4317,-5.674497,1.390964e-08
beta_tt_driving_interact,-3.503264,2.579252,-1.358248,0.174385
beta_tt_pt,-2.980188,0.696276,-4.280183,1.867399e-05
beta_tt_pt_interact,-4.449464,1.51757,-2.931966,0.003368238


In [15]:
comparison_table, _ = compile_estimation_results(all_results)
display(comparison_table)



Unnamed: 0,Model_0,Model_1,Model_2_tt,Model_2_Asc,Model_3_boxcox,Model_3_power_series,Model_3,model_4
Number of estimated parameters,5,8,12,11,13,14,14,14
Sample size,5000,5000,5000,5000,5000,5000,5000,5000
Final log likelihood,-4581.895651,-4223.985956,-4193.755258,-4202.066678,-4194.800317,-4185.483739,-4185.484509,-4185.484509
Akaike Information Criterion,9173.791303,8463.971913,8411.510515,8426.133356,8415.600634,8398.967477,8398.969017,8398.969017
Bayesian Information Criterion,9206.377269,8516.109458,8489.716833,8497.822481,8500.324145,8490.208182,8490.209722,8490.209722
asc_cycling (t-test),-3.78 (-36.6),-4.6 (-23.3),-4.62 (-23),-4.81 (-18.1),-4.75 (-21.9),-4.62 (-22.9),-4.62 (-22.9),-4.62 (-22.9)
asc_driving (t-test),-1.29 (-16.2),-2.12 (-14.6),-2.16 (-14.4),-2.76 (-14.8),-2.89 (-13.6),-2.14 (-14.2),-2.14 (-14.2),-2.14 (-14.2)
asc_pt (t-test),-0.526 (-9.6),-2.6 (-17.7),-2.61 (-17.4),-3.01 (-16.1),-2.79 (-17.4),-2.53 (-16.4),-2.53 (-16.4),-2.53 (-16.4)
beta_cost (t-test),-0.191 (-13),-0.181 (-10.3),-0.183 (-10.3),-0.182 (-10.3),-0.468 (-8.91),-0.3 (-4.6),-0.3 (-4.61),-0.3 (-4.61)
beta_tt (t-test),-5.6 (-27.5),,,,,,,


In [8]:
# FORECASTING


# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

square_tt_coef = Beta('square_cost_coef', 0, None, None, 0)
cube_tt_coef = Beta('cube_cost_coef', 0, None, None, 0)

def power_series(the_variable: Expression) -> Expression:
    """Generate the expression of a polynomial of degree 3

    :param the_variable: variable of the polynomial
    """
    return (
        the_variable
        + square_tt_coef * the_variable**2
        + cube_tt_coef * the_variable * the_variable**3
    )

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking_interact = Beta(name='beta_tt_walking_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling_interact = Beta(name='beta_tt_cycling_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt_interact = Beta(name='beta_tt_pt_interact', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving_interact = Beta(name='beta_tt_driving_interact', value=0, lowerbound=None, upperbound=None, status=0)

cost_drive_power = power_series(cost_driving)
cost_pt_power = power_series(cost_transit)

mx_age = df['age'].max()

v_walking = beta_tt_walking * time_walking +  (1/mx_age) * beta_tt_walking_interact * age * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling + (1/mx_age) * beta_tt_cycling_interact * age * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + (1/mx_age) * beta_tt_pt_interact * age * time_pt + beta_cost * cost_pt_power
v_driving = asc_driving + beta_tt_driving * time_driving + (1/mx_age) * beta_tt_driving_interact * age * time_driving + beta_cost * cost_drive_power

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

mu_a = Beta('mu_a', 1, 0, None, 0)
mu_b = Beta('mu_b', 1, 0, None, 0)
nest_a = OneNestForNestedLogit(nest_param=mu_a, list_of_alternatives=[1, 2], name='slow modes')
nest_b = OneNestForNestedLogit(nest_param=mu_b, list_of_alternatives=[3, 4], name='faster modes')
nests = NestsForNestedLogit(choice_set=list(V), tuple_of_nests=(nest_a, nest_b))

logprob_forecasting = lognested(V, None, nests, travel_mode)

model_forecasting = bio.BIOGEME(database, logprob_forecasting)
model_forecasting.modelName = 'model_forecasting'

results_forecasting = model_forecasting.estimate()

results_forecasting.get_estimated_parameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,-6.312002,0.296802,-21.266708,0.0
asc_driving,-2.560928,0.172833,-14.817325,0.0
asc_pt,-3.309353,0.231994,-14.264834,0.0
beta_cost,0.003255,0.000473,6.886758,5.707879e-12
beta_tt_cycling,-4.054607,1.027368,-3.946596,7.927e-05
beta_tt_cycling_interact,-4.774306,2.219886,-2.150698,0.03150003
beta_tt_driving,-8.130233,1.434114,-5.669168,1.434928e-08
beta_tt_driving_interact,-3.460371,2.574472,-1.344109,0.1789131
beta_tt_pt,-2.984632,0.69708,-4.281622,1.855362e-05
beta_tt_pt_interact,-4.42294,1.513237,-2.922833,0.003468626


In [7]:
data_filtered = df

populations = {
    'female_45_less': 2841376,
    'female_45_or_more': 1519948,
    'male_45_less': 2929408,
    'male_45_or_more': 1379198,
}

total_pop = sum(populations.values())

filters = {
    'male_45_or_more': (data_filtered.age >= 45) & (data_filtered.female == 0),
    'male_45_less': (data_filtered.age < 45) & (data_filtered.female == 0),
    'female_45_or_more': (data_filtered.age >= 45) & (data_filtered.female == 1),
    'female_45_less': (data_filtered.age < 45) & (data_filtered.female == 1),
}

sample_segments = {
    segment_name: segment_rows.sum() for segment_name, segment_rows in filters.items()
}
print(sample_segments)

total_sample = sum(sample_segments.values())
print(f'Sample size: {total_sample}')

weights = {
    segment_name: populations[segment_name] * total_sample / (segment_size * total_pop)
    for segment_name, segment_size in sample_segments.items()
}
print(weights)

{'male_45_or_more': np.int64(896), 'male_45_less': np.int64(1442), 'female_45_or_more': np.int64(984), 'female_45_less': np.int64(1678)}
Sample size: 5000
{'male_45_or_more': np.float64(0.8877139043468962), 'male_45_less': np.float64(1.1715720875375348), 'female_45_or_more': np.float64(0.890816074423909), 'female_45_less': np.float64(0.9765425353056789)}


In [None]:
from biogeme.biogeme import BIOGEME
from biogeme.expressions import Beta, Variable, log, exp

from biogeme import models

for segment_name, segment_rows in filters.items():
    data_filtered.loc[segment_rows, 'weight'] = weights[segment_name]


prob_walk = models.nested(V, None, nests, 1)
prob_cycle = models.nested(V, None, nests, 2)
prob_pt = models.nested(V, None, nests, 3)
prob_car = models.nested(V, None, nests, 4)


weight = Variable('weight')
simulate = {
    'weight': weight,
    'Prob. pt': prob_pt,
    'Prob. car': prob_car,
    'Prob. walk': prob_walk,
    'Prob. cycle': prob_cycle,
}

data_filtered["cost_driving_fuel"] += 1.5
database = db.Database('london', data_filtered)

biosim = BIOGEME(database, simulate)
simulated_values = biosim.simulate(results_forecasting.get_beta_values())
display(simulated_values)

simulated_values['Weighted pt'] = (
    simulated_values['weight'] * simulated_values['Prob. pt']
)
simulated_values['Weighted car'] = (
    simulated_values['weight'] * simulated_values['Prob. car']
)

simulated_values['Weighted walk'] = (
    simulated_values['weight'] * simulated_values['Prob. walk']
)
simulated_values['Weighted cycle'] = (
    simulated_values['weight'] * simulated_values['Prob. cycle']
)


market_share_pt = simulated_values['Weighted pt'].mean()
print(f'Market share for pt: {100*market_share_pt:.1f}%')

market_share_car = simulated_values['Weighted car'].mean()
print(f'Market share for car: {100*market_share_car:.1f}%')

market_share_walk = simulated_values['Weighted walk'].mean()
print(f'Market share for walk: {100*market_share_walk:.1f}%')

market_share_cycle = simulated_values['Weighted cycle'].mean()
print(f'Market share for cycling: {100*market_share_cycle:.1f}%')

model_forecasting.bootstrap_samples = 100
results_bootstrapping = model_forecasting.estimate(run_bootstrap=True)

betas = model_forecasting.free_beta_names
b = results_bootstrapping.get_betas_for_sensitivity_analysis(betas)
left, right = biosim.confidence_intervals(b, 0.9)
    
display(left)

display(right)

TypeError: 'Database' object is not subscriptable

In [None]:
# Calculate weighted probabilities
left['Weighted pt'] = left['weight'] * left['Prob. pt']
left['Weighted car'] = left['weight'] * left['Prob. car']
left['Weighted walk'] = left['weight'] * left['Prob. walk']
left['Weighted cycle'] = left['weight'] * left['Prob. cycle']

right['Weighted pt'] = right['weight'] * right['Prob. pt']
right['Weighted car'] = right['weight'] * right['Prob. car']
right['Weighted walk'] = right['weight'] * right['Prob. walk']
right['Weighted cycle'] = right['weight'] * right['Prob. cycle']

# Calculate mean market shares
market_share_pt = simulated_values['Weighted pt'].mean()
market_share_car = simulated_values['Weighted car'].mean()
market_share_walk = simulated_values['Weighted walk'].mean()
market_share_cycle = simulated_values['Weighted cycle'].mean()

# Calculate confidence intervals
left_market_share_pt = left['Weighted pt'].mean()
right_market_share_pt = right['Weighted pt'].mean()

left_market_share_car = left['Weighted car'].mean()
right_market_share_car = right['Weighted car'].mean()

left_market_share_walk = left['Weighted walk'].mean()
right_market_share_walk = right['Weighted walk'].mean()

left_market_share_cycle = left['Weighted cycle'].mean()
right_market_share_cycle = right['Weighted cycle'].mean()

# Print market shares and confidence intervals
print(f"Market share for pt: {100 * market_share_pt:.1f}% "
      f"CI: [{100 * left_market_share_pt:.1f}%-{100 * right_market_share_pt:.1f}%]")

print(f"Market share for car: {100 * market_share_car:.1f}% "
      f"CI: [{100 * left_market_share_car:.1f}%-{100 * right_market_share_car:.1f}%]")

print(f"Market share for walk: {100 * market_share_walk:.1f}% "
      f"CI: [{100 * left_market_share_walk:.1f}%-{100 * right_market_share_walk:.1f}%]")

print(f"Market share for cycling: {100 * market_share_cycle:.1f}% "
      f"CI: [{100 * left_market_share_cycle:.1f}%-{100 * right_market_share_cycle:.1f}%]")

In [None]:
# Example mapping
labels = {1: 'walk', 2: 'cycling', 3: 'pt', 4: 'car'}

# Map the travel_mode column to the labels
data_filtered['mode_label'] = data_filtered['travel_mode'].map(labels)

# Calculate market shares
market_shares = (
    data_filtered['mode_label']
    .value_counts(normalize=True)  # Get proportions
    .sort_index()  # Ensure consistent order
    * 100  # Convert to percentage
)

# Print market shares
for mode, share in market_shares.items():
    print(f"Market share for {mode}: {share:.1f}%")