In [9]:
import os 
import shutil

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, DefineVariable
from biogeme.models import loglogit,boxcox

### 1. Data, Alternatives, and Parameters to estimate

In [29]:
pandas = pd.read_table("lpmc02.dat")
database = db.Database("LPMC",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables) # register variables using those in the 'database

In [30]:
# Choice
chosenAlternative = travel_mode

In [31]:
#Parameters to be estimated+ (  BestAlternative_4   *  4  )
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
Constant1 = Beta('Constant1',0,None,None,1)
Constant2 = Beta('Constant2',0,None,None,0)
Constant3 = Beta('Constant3',0,None,None,0)
Constant4 = Beta('Constant4',0,None,None,0)
Cost = Beta('Cost',0,None,None,0)
Total_TT1 = Beta('Total_TT1',0,None,None,0)
Total_TT2 = Beta('Total_TT2',0,None,None,0)
Total_TT3 = Beta('Total_TT3',0,None,None,0)
Total_TT4 = Beta('Total_TT4',0,None,None,0)
CarOwn = Beta('CarOwn',0,None,None,0)


# socio-economic factors (interacting with Time)
Time_Female = Beta('Time_Female', 0, None, None, 0)
Time_Age = Beta('Time_Age', 0, None, None, 0)

### 2. Variable Specification

### 3. Models
Consulted the following : https://transp-or.epfl.ch/courses/ANTWERP07/06-tests.pdf in the process.

##### a. Power on Time (ATTENTION: NOT TOO SURE ABOUT THIS => should we try adding power on cost instead?)

In [32]:
# Define here arithmetic expressions for name that are not directly
# available from the data

dur_walking_sq = DefineVariable('dur_walking_sq', dur_walking**2, database)
dur_cycling_sq = DefineVariable('dur_cycling_sq', dur_cycling**2, database)
dur_driving_sq = DefineVariable('dur_driving_sq', dur_driving**2, database)
dur_public_sq = DefineVariable('dur_public_sq',
                               (dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int)**2,database)

cost_public = DefineVariable('cost_public', cost_transit, database)
cost_driving = DefineVariable('cost_driving', cost_driving_fuel + cost_driving_ccharge,database)

In [19]:
# Utilities

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * dur_walking_sq + Time_Age * dur_walking_sq * age
Opt2 = Constant2 + Total_TT2 * dur_cycling_sq + Time_Age * dur_cycling_sq * age
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * dur_public_sq + Time_Age * dur_public_sq * age
Opt4 = Constant4 + Cost * cost_driving + Total_TT4 * dur_driving_sq + CarOwn * car_ownership +\
                    Time_Age * dur_driving_sq * age
    
V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [20]:
output_dir = "./model3-output"
filepath = os.path.join(output_dir, "logit_lpmc_model3_Power")

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -3984.507
rho bar square = 0.424
Output file: ./model3-output/logit_lpmc_model3_Power~00.html


Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
CarOwn,1.48,0.0523,28.3,0.0,0.0511,29.0,0.0
Constant2,-3.38,0.119,-28.3,0.0,0.227,-14.9,0.0
Constant3,-0.938,0.0698,-13.4,0.0,0.195,-4.81,1.53e-06
Constant4,-2.09,0.0885,-23.6,0.0,0.207,-10.1,0.0
Cost,-0.176,0.0158,-11.2,0.0,0.016,-11.0,0.0
Time_Age,-0.0374,0.00575,-6.5,7.94e-11,0.00757,-4.94,7.83e-07
Total_TT1,-4.26,0.343,-12.4,0.0,1.41,-3.03,0.00248
Total_TT2,-2.37,0.476,-4.98,6.2e-07,0.59,-4.02,5.72e-05
Total_TT3,-0.186,0.234,-0.796,0.426,0.287,-0.649,0.516
Total_TT4,-3.12,0.346,-9.02,0.0,0.48,-6.5,8.3e-11


##### b. Box-Cox Transform

In [33]:
dur_walking_bc = DefineVariable('dur_walking_bc', boxcox(dur_walking, 2), database)
dur_cycling_bc = DefineVariable('dur_cycling_bc', boxcox(dur_cycling, 2), database)
dur_public_bc = DefineVariable('dur_public_bc', boxcox(dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int, 2),
                              database)
dur_driving_bc = DefineVariable('dur_driving_bc', boxcox(dur_driving, 2), database)

In [34]:
Opt1 = Constant1 + Total_TT1 * dur_walking_bc + Time_Age * dur_walking_bc * age
Opt2 = Constant2 + Total_TT2 * dur_cycling_bc + Time_Age * dur_cycling_bc * age
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * dur_public_bc + Time_Age * dur_public_bc * age
Opt4 = Constant4 + Cost * cost_driving + Total_TT4 * dur_driving_bc + CarOwn * car_ownership +\
                    Time_Age * dur_driving_bc * age


V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [39]:
# Create new folder for output files (and remove old existing files) 
output_dir = "./model3-output"
filepath = os.path.join(output_dir, "logit_lpmc_model3_Box-Cox")

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)

biogeme.modelName = filepath
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -3984.507
rho bar square = 0.424
Output file: ./model3-output/logit_lpmc_model3_Box-Cox.html


Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
CarOwn,1.48,0.0523,28.3,0.0,0.0511,29.0,0.0
Constant2,-1.5,0.454,-3.31,0.000946,1.11,-1.35,0.175
Constant3,3.13,0.269,11.6,0.0,1.03,3.04,0.0024
Constant4,-0.954,0.38,-2.51,0.012,1.07,-0.889,0.374
Cost,-0.176,0.0158,-11.2,0.0,0.016,-11.0,0.0
Time_Age,-0.0747,0.0115,-6.5,8.06e-11,0.0151,-4.94,7.92e-07
Total_TT1,-8.51,0.685,-12.4,0.0,2.81,-3.03,0.00248
Total_TT2,-4.75,0.953,-4.99,6.13e-07,1.18,-4.02,5.7e-05
Total_TT3,-0.375,0.468,-0.8,0.424,0.574,-0.652,0.514
Total_TT4,-6.25,0.692,-9.02,0.0,0.961,-6.5,8.23e-11


##### c. Piece-wise