In [1]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from IPython.core.display_functions import display
from biogeme.expressions import Beta, Variable
from biogeme.models import loglogit
from biogeme.segmentation import DiscreteSegmentationTuple, segmented_beta
from biogeme.tools.likelihood_ratio import likelihood_ratio_test

In [2]:
df = pd.read_csv("lpmc01.dat", sep = '\t')
display(df)
database = db.Database('lpmc', df)


Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,12,1,1,0,4,3,1,5,0.0,1,...,0.209167,0.000000,0.123611,0.000000,0,0.141389,0.0,0.51,0.0,0.090373
1,17,3,1,1,3,1,6,1,1.0,1,...,0.123889,0.000000,0.208056,0.091667,1,0.115556,3.0,0.33,0.0,0.033654
2,51,12,1,1,4,5,2,1,1.0,1,...,0.082222,0.000000,0.547500,0.133333,1,0.355556,3.0,1.12,0.0,0.302344
3,67,13,1,6,4,3,1,5,0.0,1,...,0.032500,0.000000,0.391667,0.000000,0,0.206944,0.0,0.67,0.0,0.159732
4,74,14,0,3,4,3,1,5,0.0,1,...,0.136389,0.000000,0.033889,0.000000,0,0.067778,0.0,0.20,0.0,0.151639
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,81003,17593,0,1,4,5,1,1,1.0,3,...,0.069167,0.000000,0.200000,0.000000,0,0.156389,1.5,0.51,0.0,0.314387
4996,81009,17595,0,2,3,3,6,1,1.0,3,...,0.064722,0.000000,0.305278,0.000000,0,0.257500,1.5,0.57,0.0,0.354908
4997,81038,17604,1,4,4,3,5,1,1.0,3,...,0.022778,0.000000,0.233611,0.000000,0,0.157778,1.5,0.55,0.0,0.366197
4998,81071,17610,0,0,4,3,2,1,1.0,3,...,0.206111,0.000000,0.400000,0.000000,0,0.259167,1.5,0.88,0.0,0.233655


## Variable Definition

In [3]:
trip_id = Variable('trip_id')
household_id = Variable('household_id')
person_n = Variable('person_n')
trip_n = Variable('trip_n')
travel_mode = Variable('travel_mode')
purpose = Variable('purpose')
fueltype = Variable('fueltype')
faretype = Variable('faretype')
bus_scale = Variable('bus_scale')
survey_year = Variable('survey_year')
travel_year = Variable('travel_year')
travel_month = Variable('travel_month')
travel_date = Variable('travel_date')
day_of_week = Variable('day_of_week')
start_time = Variable('start_time')
age = Variable('age')
female = Variable('female')
driving_license = Variable('driving_license')
car_ownership = Variable('car_ownership')
distance = Variable('distance')
dur_walking = Variable('dur_walking')
dur_cycling = Variable('dur_cycling')
dur_pt_access = Variable('dur_pt_access') # Predicted total access and egress time for public transport route in hours
dur_pt_rail = Variable('dur_pt_rail')
dur_pt_bus = Variable('dur_pt_bus')
dur_pt_int = Variable('dur_pt_int') # Time taken (hrs) at each interchange point
pt_interchanges = Variable('pt_interchanges')   # Number of interchange points in public transport route
dur_driving = Variable('dur_driving')
cost_transit = Variable('cost_transit')
cost_driving_fuel = Variable('cost_driving_fuel')   # Estimated fuel cost of driving route in GBP
cost_driving_ccharge = Variable('cost_driving_ccharge')  # Estimated congestion charge cost of driving route in GBP
driving_traffic_percent = Variable('driving_traffic_percent')

In [4]:
# MODEL 0

# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

car_AV = (car_ownership!=0) * (driving_license != 0) 

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt = Beta(name='beta_tt', value=0, lowerbound=None, upperbound=None, status=0)

v_walking = beta_tt * time_walking
v_cycling = asc_cycling + beta_tt * time_cycling 
v_pt = asc_pt + beta_tt * time_pt + beta_cost * cost_transit
v_driving = asc_driving + beta_tt * time_driving + beta_cost * cost_driving

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_0'

results = biogeme.estimate()
model_0_loglike = results.data.logLike
model_0_numParam = results.get_estimated_parameters().shape[0]


In [None]:
#MODEL 1

# Define driving cost
cost_driving = cost_driving_ccharge + cost_driving_fuel

# Define time taken by each mode of transport
dur_pt = dur_pt_access + dur_pt_int + dur_pt_bus + dur_pt_rail  # Public transport (external) time 

time_pt = dur_pt
time_cycling = dur_cycling
time_walking = dur_walking  
time_driving = dur_driving

car_AV = (car_ownership!=0) * (driving_license != 0) 

# Model normalized with asc_walking = 0
asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

beta_cost = Beta(name='beta_cost', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)

v_walking = beta_tt_walking * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + beta_cost * cost_transit
v_driving = asc_driving + beta_tt_driving * time_driving + beta_cost * cost_driving

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_1'

results = biogeme.estimate()
model_1_loglike = results.data.logLike
model_1_numParam = results.get_estimated_parameters().shape[0]

# I use MLR test, where this is the unrestricted model, and model 0 is the restricted one.
# If I can reject the null hypothesis, then this will become the preferred model.

likelihood_ratio_test([model_0_loglike, model_0_numParam], [model_1_loglike, model_1_numParam], 0.01)

# Ideally, should create 3 models: one with only alternate specific TT, one with alternate specific costs,
# and one with both. Then we should MLR test all possible combinations to have evidence to choose a 
# preferred one. 



LRTuple(message='H0 can be rejected at level 1.0%', statistic=715.819387924601, threshold=np.float64(11.344866730144373))

In [None]:
# Adding alternate specifc costs 

asc_pt = Beta(name='asc_pt', value=0, lowerbound=None, upperbound=None, status=0)
asc_cycling = Beta(name='asc_cycling', value=0, lowerbound=None, upperbound=None, status=0)
asc_driving = Beta(name='asc_driving', value=0, lowerbound=None, upperbound=None, status=0)

beta_cost_pt = Beta(name='beta_cost_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_cost_driving = Beta(name='beta_cost_driving', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_walking = Beta(name='beta_tt_walking', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_cycling = Beta(name='beta_tt_cycling', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_pt = Beta(name='beta_tt_pt', value=0, lowerbound=None, upperbound=None, status=0)
beta_tt_driving = Beta(name='beta_tt_driving', value=0, lowerbound=None, upperbound=None, status=0)

v_walking = beta_tt_walking * time_walking
v_cycling = asc_cycling + beta_tt_cycling * time_cycling 
v_pt = asc_pt + beta_tt_pt * time_pt + beta_cost_pt * cost_transit
v_driving = asc_driving + beta_tt_driving * time_driving + beta_cost_driving * cost_driving

V = {1: v_walking, 2: v_cycling, 3: v_pt, 4: v_driving}

logprob = loglogit(V, None, travel_mode)

biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_cost'

results = biogeme.estimate()
model_cost_loglike = results.data.logLike
model_cost_numParam = results.get_estimated_parameters().shape[0]

likelihood_ratio_test([model_1_loglike, model_1_numParam], [model_cost_loglike, model_cost_numParam], 0.01)


LRTuple(message='H0 can be rejected at level 1.0%', statistic=9.964679045688172, threshold=np.float64(6.6348966010212145))