In [1]:
import os 
import shutil

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, DefineVariable
from biogeme.models import loglogit,boxcox

### 1. Data, Alternatives, and Parameters to estimate

In [2]:
pandas = pd.read_table("lpmc02.dat")
database = db.Database("LPMC",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables) # register variables using those in the 'database

In [3]:
# Choice
chosenAlternative = travel_mode

In [4]:
#Parameters to be estimated+ (  BestAlternative_4   *  4  )
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
Constant1 = Beta('Constant1',0,None,None,1)
Constant2 = Beta('Constant2',0,None,None,0)
Constant3 = Beta('Constant3',0,None,None,0)
Constant4 = Beta('Constant4',0,None,None,0)
Cost = Beta('Cost',0,None,None,0)
Total_TT1 = Beta('Total_TT1',0,None,None,0)
Total_TT2 = Beta('Total_TT2',0,None,None,0)
Total_TT3 = Beta('Total_TT3',0,None,None,0)
Total_TT4 = Beta('Total_TT4',0,None,None,0)
Dist = Beta('Dist',0,None,None,0)

### 2. Variable Specification

In [5]:
# Define here arithmetic expressions for name that are not directly
# available from the data

dur_public = DefineVariable('dur_public', dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int ,database)
cost_public = DefineVariable('cost_public', cost_transit ,database)
cost_driving = DefineVariable('cost_driving', cost_driving_fuel + cost_driving_ccharge ,database)

### 3. Models
Consulted the following : https://transp-or.epfl.ch/courses/ANTWERP07/06-tests.pdf in the process.

##### a. Power Series

In [6]:
# Utilities

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * dur_walking**2 + Dist * distance**3
Opt2 = Constant2 + Total_TT2 * dur_cycling**2 + Dist * distance**3
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * dur_public**2 + Dist * distance**3
Opt4 = Constant4 + Cost * cost_driving + Total_TT4 * dur_driving**2 + Dist * distance**3
V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [11]:
output_dir = "./model3-output"
filepath = os.path.join(output_dir, "logit_lpmc_model3_Power")

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
if os.path.exists(filepath):
    os.remove(f"{filepath}.html")

logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -4519.228
rho bar square = 0.347
Output file: ./model3-output/logit_lpmc_model3_Power.html


Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
Constant2,-3.37,0.118,-28.6,0,0.222,-15.2,0.0
Constant3,-0.909,0.068,-13.4,0,0.189,-4.82,1.45e-06
Constant4,-0.554,0.0642,-8.64,0,0.192,-2.89,0.0039
Cost,-0.172,0.0143,-12.0,0,0.0136,-12.6,0.0
Dist,-4.59e-06,1.8e+308,-2.55e-314,1,2.38e-14,-193000000.0,0.0
Total_TT1,-5.62,0.285,-19.7,0,1.24,-4.53,5.89e-06
Total_TT2,-3.75,0.431,-8.7,0,0.522,-7.19,6.41e-13
Total_TT3,-1.66,0.151,-11.0,0,0.178,-9.37,0.0
Total_TT4,-4.61,0.311,-14.8,0,0.454,-10.2,0.0


##### b. Box-Cox Transform

In [19]:
Opt1 = Constant1 + Total_TT1 * boxcox(dur_walking,2) + Dist * distance
Opt2 = Constant2 + Total_TT2 * boxcox(dur_cycling,2) + Dist * distance
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * boxcox(dur_public,2) + Dist * distance
Opt4 = Constant4 + Cost * cost_driving + Total_TT4*boxcox(dur_driving,2) + Dist * distance
V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [13]:
# Create new folder for output files (and remove old existing files) 
output_dir = "./model3-output"
filepath = os.path.join(output_dir, "logit_lpmc_model3_Box-Cox")

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
if os.path.exists(filepath):
    os.remove(f"{filepath}.html")

logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)

biogeme.modelName = filepath
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -4519.228
rho bar square = 0.347
Output file: ./model3-output/logit_lpmc_model3_Box-Cox.html


Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
Constant2,-3.37,0.118,-28.6,0,0.222,-15.2,0.0
Constant3,-0.909,0.068,-13.4,0,0.189,-4.82,1.45e-06
Constant4,-0.554,0.0642,-8.64,0,0.192,-2.89,0.0039
Cost,-0.172,0.0143,-12.0,0,0.0136,-12.6,0.0
Dist,-4.59e-06,1.8e+308,-2.55e-314,1,2.38e-14,-193000000.0,0.0
Total_TT1,-5.62,0.285,-19.7,0,1.24,-4.53,5.89e-06
Total_TT2,-3.75,0.431,-8.7,0,0.522,-7.19,6.41e-13
Total_TT3,-1.66,0.151,-11.0,0,0.178,-9.37,0.0
Total_TT4,-4.61,0.311,-14.8,0,0.454,-10.2,0.0


##### c. Piece-wise