In [11]:
import os, glob

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, DefineVariable
from biogeme.models import loglogit, piecewiseFormula

pandas = pd.read_table("lpmc02.dat")
database = db.Database("LPMC",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables)

# Exclude
# exclude = (  ArrivalTimeHours_1   ==  -1  )
# database.remove(exclude)
  
# Choice
chosenAlternative = travel_mode

## 1. Data and Parameters to estimate

In [12]:
#Parameters to be estimated+ (  BestAlternative_4   *  4  )
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
Constant1 = Beta('Constant1',0,None,None,1)
Constant2 = Beta('Constant2',0,None,None,0)
Constant3 = Beta('Constant3',0,None,None,0)
Constant4 = Beta('Constant4',0,None,None,0)
Cost = Beta('Cost',0,None,None,0)
Total_TT1 = Beta('Total_TT1',0,None,None,0)
Total_TT2 = Beta('Total_TT2',0,None,None,0)
Total_TT3 = Beta('Total_TT3',0,None,None,0)
Total_TT4 = Beta('Total_TT4',0,None,None,0)

CarOwn_2 = Beta('CarOwn_2',0,None,None,0)
CarOwn_3 = Beta('CarOwn_3',0,None,None,0)
CarOwn_4 = Beta('CarOwn_4',0,None,None,0)

LAMBDA = Beta('LAMBDA',1,None,None,0)


# socio-economic factors (interacting with Time)
Time_Age_1 = Beta('Time_Age_1', 0, None, None, 0)
Time_Age_2 = Beta('Time_Age_2', 0, None, None, 0)
Time_Age_3 = Beta('Time_Age_3', 0, None, None, 0)
Time_Age_4 = Beta('Time_Age_4', 0, None, None, 0)

## 2. Model specifications

### 2.1 Power on time

In [13]:
# Define here arithmetic expressions for name that are not directly
# available from the data

dur_walking_squ = DefineVariable('dur_walking_squ', (dur_walking) ** 2, database)
dur_cycling_squ = DefineVariable('dur_cycling_squ', (dur_cycling) ** 2, database)
dur_driving_squ = DefineVariable('dur_driving_squ', (dur_driving) ** 2, database)
dur_public = DefineVariable('dur_public', (dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int) ** 2,database)
dur_public_squ = DefineVariable('dur_public_squ', dur_public ** 2,database)

cost_public = DefineVariable('cost_public', cost_transit ,database)
cost_driving = DefineVariable('cost_driving', cost_driving_fuel + cost_driving_ccharge ,database)


# Utilities

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * dur_walking_squ + Time_Age_1 * dur_walking * age
Opt2 = Constant2 + Total_TT2 * dur_cycling_squ + CarOwn_2 * car_ownership + Time_Age_2 * dur_cycling * age
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * dur_public_squ + CarOwn_3 * car_ownership +  Time_Age_3 * dur_public * age
Opt4 = Constant4 + Cost * cost_driving + Total_TT4 * dur_driving_squ + CarOwn_4 * car_ownership +\
                    Time_Age_4 * dur_driving * age


V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [14]:
output_dir = "./model3-output"
filepath = os.path.join(output_dir, "logit_lpmc_model3_Power")
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
# delete previously saved html and pickle
for file in glob.glob(f"{filepath}*"):
    os.remove(file)
    
# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -3858.456
rho bar square = 0.441
Output file: ./model3-output/logit_lpmc_model3_Power.html


Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
CarOwn_2,-0.197,0.136,-1.45,0.148,0.142,-1.39,0.164
CarOwn_3,-0.606,0.075,-8.08,6.66e-16,0.0769,-7.88,3.11e-15
CarOwn_4,1.1,0.0697,15.7,0.0,0.0668,16.4,0.0
Constant2,-3.63,0.178,-20.4,0.0,0.198,-18.3,0.0
Constant3,-1.33,0.104,-12.8,0.0,0.126,-10.5,0.0
Constant4,-2.16,0.11,-19.6,0.0,0.133,-16.3,0.0
Cost,-0.165,0.0162,-10.1,0.0,0.0167,-9.88,0.0
Time_Age_1,-0.0873,0.00661,-13.2,0.0,0.0111,-7.85,4.22e-15
Time_Age_2,-0.0594,0.0147,-4.05,5.19e-05,0.015,-3.96,7.54e-05
Time_Age_3,-0.0473,0.0069,-6.86,7.02e-12,0.00729,-6.49,8.83e-11


### 2.2 Box-Cox Transforms

In [15]:
# Utilities

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * ((dur_walking) ** LAMBDA -1)/LAMBDA + Time_Age_1 * dur_walking * age
Opt2 = Constant2 + Total_TT2 * ((dur_cycling) ** LAMBDA -1)/LAMBDA+ CarOwn_2 * car_ownership +\
                    Time_Age_2 * dur_cycling * age
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * (dur_public ** LAMBDA -1)/LAMBDA + CarOwn_3 * car_ownership +\
                    Time_Age_3 * dur_public * age
Opt4 = Constant4 + Cost * cost_driving + Total_TT4 * ((dur_driving) ** LAMBDA -1)/LAMBDA +\
                    CarOwn_4 * car_ownership + Time_Age_4 * dur_driving * age


V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [16]:
output_dir = "./model3-output"
filepath = os.path.join(output_dir, "logit_lpmc_model3_BoxCox")
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
# delete previously saved html and pickle
for file in glob.glob(f"{filepath}*"):
    os.remove(file)
    
# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -3659.904
rho bar square = 0.47
Output file: ./model3-output/logit_lpmc_model3_BoxCox.html


Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
CarOwn_2,-0.295,0.138,-2.13,0.0328,0.145,-2.03,0.0421
CarOwn_3,-0.684,0.0806,-8.49,0.0,0.0839,-8.15,4.44e-16
CarOwn_4,1.0,0.0736,13.6,0.0,0.0718,13.9,0.0
Constant2,-3.67,0.594,-6.19,5.95e-10,0.586,-6.27,3.69e-10
Constant3,1.27,0.334,3.8,0.000142,0.36,3.53,0.000408
Constant4,-2.53,0.498,-5.08,3.85e-07,0.593,-4.26,2.02e-05
Cost,-0.152,0.0167,-9.13,0.0,0.0174,-8.78,0.0
LAMBDA,0.359,0.0691,5.2,2.02e-07,0.0915,3.93,8.59e-05
Time_Age_1,-0.0344,0.00713,-4.83,1.39e-06,0.00703,-4.89,1e-06
Time_Age_2,0.0055,0.0151,0.364,0.716,0.0149,0.37,0.711


### 2.3 Piecewise on Driving time

In [17]:
thresholds = [None, 0.5 * pandas.dur_driving.mean(), 1.5 * pandas.dur_driving.mean(), None]
init_Betas_TT4 = [0,0,0]

In [19]:
# Define here arithmetic expressions for name that are not directly
# available from the data

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * dur_walking + Time_Age_1 * dur_walking * age
Opt2 = Constant2 + Total_TT2 * dur_cycling + CarOwn_2 * car_ownership + Time_Age_2 * dur_cycling * age
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * dur_public + CarOwn_3 * car_ownership +\
                    Time_Age_3 * dur_public * age
Opt4 = Constant4 + Cost * cost_driving + piecewiseFormula(dur_driving, thresholds, init_Betas_TT4)+\
                    CarOwn_4 * car_ownership + Time_Age_4 * dur_driving * age


V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [20]:
output_dir = "./model3-output"
filepath = os.path.join(output_dir, "logit_lpmc_model3_Piecewise")
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
# delete previously saved html and pickle
for file in glob.glob(f"{filepath}*"):
    os.remove(file)
    
# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -3711.889
rho bar square = 0.462
Output file: ./model3-output/logit_lpmc_model3_Piecewise.html


Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
CarOwn_2,-0.28,0.138,-2.03,0.0422,0.144,-1.94,0.0519
CarOwn_3,-0.683,0.0797,-8.56,0.0,0.082,-8.32,0.0
CarOwn_4,1.02,0.0725,14.1,0.0,0.0697,14.7,0.0
Constant2,-4.31,0.199,-21.7,0.0,0.236,-18.3,0.0
Constant3,-2.55,0.138,-18.4,0.0,0.186,-13.7,0.0
Constant4,-2.81,0.176,-15.9,0.0,0.172,-16.3,0.0
Cost,-0.16,0.017,-9.41,0.0,0.0182,-8.81,0.0
Time_Age_1,-0.0382,0.00747,-5.11,3.2e-07,0.00775,-4.93,8.42e-07
Time_Age_2,0.0171,0.0175,0.978,0.328,0.0173,0.99,0.322
Time_Age_3,0.00355,0.0107,0.333,0.739,0.0125,0.285,0.776
