In [54]:
# Translated to .py by Meritxell Pacheco
# 2017
# Adapted to PandasBiogeme by Michel Bierlaire
# Sun Oct 21 23:15:31 2018

import os 

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, DefineVariable
from biogeme.models import loglogit, piecewiseFormula

pandas = pd.read_table("lpmc02.dat")
database = db.Database("LPMC",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables)

# Exclude
# exclude = (  ArrivalTimeHours_1   ==  -1  )
# database.remove(exclude)
  
# Choice
chosenAlternative = travel_mode

## 1. Data, Parameters to estimate

In [55]:
#Parameters to be estimated+ (  BestAlternative_4   *  4  )
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
Constant1 = Beta('Constant1',0,None,None,1)
Constant2 = Beta('Constant2',0,None,None,0)
Constant3 = Beta('Constant3',0,None,None,0)
Constant4 = Beta('Constant4',0,None,None,0)
Cost = Beta('Cost',0,None,None,0)
Total_TT1 = Beta('Total_TT1',0,None,None,0)
Total_TT2 = Beta('Total_TT2',0,None,None,0)
Total_TT3 = Beta('Total_TT3',0,None,None,0)
Total_TT4 = Beta('Total_TT4',0,None,None,0)

CarOwn_2 = Beta('CarOwn_2',0,None,None,0)
CarOwn_3 = Beta('CarOwn_3',0,None,None,0)
CarOwn_4 = Beta('CarOwn_4',0,None,None,0)

LAMBDA = Beta('LAMBDA',1,None,None,0)


# socio-economic factors (interacting with Time)
Time_Age_1 = Beta('Time_Age_1', 0, None, None, 0)
Time_Age_2 = Beta('Time_Age_2', 0, None, None, 0)
Time_Age_3 = Beta('Time_Age_3', 0, None, None, 0)
Time_Age_4 = Beta('Time_Age_4', 0, None, None, 0)

## 2. Model specifications

### 2.1 Power on time

In [56]:
# Define here arithmetic expressions for name that are not directly
# available from the data

dur_walking_squ = DefineVariable('dur_walking_squ', (dur_walking) ** 2, database)
dur_cycling_squ = DefineVariable('dur_cycling_squ', (dur_cycling) ** 2, database)
dur_driving_squ = DefineVariable('dur_driving_squ', (dur_driving) ** 2, database)
dur_public = DefineVariable('dur_public', (dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int) ** 2,database)
dur_public_squ = DefineVariable('dur_public_squ', dur_public ** 2,database)

cost_public = DefineVariable('cost_public', cost_transit ,database)
cost_driving = DefineVariable('cost_driving', cost_driving_fuel + cost_driving_ccharge ,database)


# Utilities

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * dur_walking_squ + Time_Age_1 * dur_walking * age
Opt2 = Constant2 + Total_TT2 * dur_cycling_squ + CarOwn_2 * car_ownership + Time_Age_2 * dur_cycling * age
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * dur_public_squ + CarOwn_3 * car_ownership +  Time_Age_3 * dur_public * age
Opt4 = Constant4 + Cost * cost_driving + Total_TT4 * dur_driving_squ + CarOwn_4 * car_ownership +\
                    Time_Age_4 * dur_driving * age


V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [57]:
output_dir = "./model3-output"
filepath = os.path.join(output_dir, "logit_lpmc_model3_power_time")
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

             Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
CarOwn_2    -0.197    0.136   -1.45    0.148         0.142        -1.39   
CarOwn_3    -0.606    0.075   -8.08 6.66e-16        0.0769        -7.88   
CarOwn_4       1.1   0.0697    15.7        0        0.0668         16.4   
Constant2    -3.63    0.178   -20.4        0         0.198        -18.3   
Constant3    -1.33    0.104   -12.8        0         0.126        -10.5   
Constant4    -2.16     0.11   -19.6        0         0.133        -16.3   
Cost        -0.165   0.0162   -10.1        0        0.0167        -9.88   
Time_Age_1 -0.0873  0.00661   -13.2        0        0.0111        -7.85   
Time_Age_2 -0.0594   0.0147   -4.05 5.19e-05         0.015        -3.96   
Time_Age_3 -0.0473   0.0069   -6.86 7.02e-12       0.00729        -6.49   
Time_Age_4  -0.072   0.0111   -6.49 8.35e-11         0.012        -6.03   
Total_TT1    -3.09    0.286   -10.8        0          1.17        -2.63   
Total_TT2    -2.12    0.5

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
CarOwn_2,-0.197,0.136,-1.45,0.148,0.142,-1.39,0.164
CarOwn_3,-0.606,0.075,-8.08,6.66e-16,0.0769,-7.88,3.11e-15
CarOwn_4,1.1,0.0697,15.7,0.0,0.0668,16.4,0.0
Constant2,-3.63,0.178,-20.4,0.0,0.198,-18.3,0.0
Constant3,-1.33,0.104,-12.8,0.0,0.126,-10.5,0.0
Constant4,-2.16,0.11,-19.6,0.0,0.133,-16.3,0.0
Cost,-0.165,0.0162,-10.1,0.0,0.0167,-9.88,0.0
Time_Age_1,-0.0873,0.00661,-13.2,0.0,0.0111,-7.85,4.22e-15
Time_Age_2,-0.0594,0.0147,-4.05,5.19e-05,0.015,-3.96,7.54e-05
Time_Age_3,-0.0473,0.0069,-6.86,7.02e-12,0.00729,-6.49,8.82e-11


### 2.2 Box Cox Transforms

In [58]:

# Utilities

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * ((dur_walking) ** LAMBDA -1)/LAMBDA + Time_Age_1 * dur_walking * age
Opt2 = Constant2 + Total_TT2 * ((dur_cycling) ** LAMBDA -1)/LAMBDA+ CarOwn_2 * car_ownership + Time_Age_2 * dur_cycling * age
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * (dur_public ** LAMBDA -1)/LAMBDA + CarOwn_3 * car_ownership +  Time_Age_3 * dur_public * age
Opt4 = Constant4 + Cost * cost_driving + Total_TT4 * ((dur_driving) ** LAMBDA -1)/LAMBDA + CarOwn_4 * car_ownership +\
                    Time_Age_4 * dur_driving * age


V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [59]:
output_dir = "./model3-output"
filepath = os.path.join(output_dir, "logit_lpmc_model3_box_cox")
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

              Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
CarOwn_2     -0.294    0.138   -2.13   0.0329         0.145        -2.03   
CarOwn_3     -0.684   0.0806   -8.49        0        0.0839        -8.15   
CarOwn_4          1   0.0736    13.6        0        0.0718         13.9   
Constant2     -3.67    0.593   -6.19 6.03e-10         0.586        -6.27   
Constant3      1.27    0.334    3.81 0.000139          0.36         3.54   
Constant4     -2.52    0.498   -5.07 3.93e-07         0.593        -4.26   
Cost         -0.152   0.0167   -9.13        0        0.0174        -8.78   
LAMBDA        0.359   0.0692    5.19 2.11e-07        0.0915         3.92   
Time_Age_1  -0.0344  0.00713   -4.83 1.37e-06       0.00703        -4.89   
Time_Age_2  0.00542   0.0151   0.359     0.72        0.0149        0.365   
Time_Age_3 -0.00777  0.00798  -0.974     0.33       0.00963       -0.807   
Time_Age_4  0.00027   0.0131  0.0206    0.984        0.0157       0.0172   
Total_TT1   

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
CarOwn_2,-0.294,0.138,-2.13,0.0329,0.145,-2.03,0.0422
CarOwn_3,-0.684,0.0806,-8.49,0.0,0.0839,-8.15,4.44e-16
CarOwn_4,1.0,0.0736,13.6,0.0,0.0718,13.9,0.0
Constant2,-3.67,0.593,-6.19,6.03e-10,0.586,-6.27,3.69e-10
Constant3,1.27,0.334,3.81,0.000139,0.36,3.54,0.000399
Constant4,-2.52,0.498,-5.07,3.93e-07,0.593,-4.26,2.04e-05
Cost,-0.152,0.0167,-9.13,0.0,0.0174,-8.78,0.0
LAMBDA,0.359,0.0692,5.19,2.11e-07,0.0915,3.92,8.74e-05
Time_Age_1,-0.0344,0.00713,-4.83,1.37e-06,0.00703,-4.89,9.89e-07
Time_Age_2,0.00542,0.0151,0.359,0.72,0.0149,0.365,0.715


### 2.3 Piecewise on car time

In [60]:
thresholds = [None, 0.5 * pandas.dur_driving.mean(), 1.5 * pandas.dur_driving.mean(), None]
init_Betas_TT4 = [0,0,0]

In [61]:
# Define here arithmetic expressions for name that are not directly
# available from the data

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * dur_walking + Time_Age_1 * dur_walking * age
Opt2 = Constant2 + Total_TT2 * dur_cycling + CarOwn_2 * car_ownership + Time_Age_2 * dur_cycling * age
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * dur_public + CarOwn_3 * car_ownership +  Time_Age_3 * dur_public * age
Opt4 = Constant4 + Cost * cost_driving + piecewiseFormula(dur_driving, thresholds, init_Betas_TT4)+ CarOwn_4 * car_ownership +\
                    Time_Age_4 * dur_driving * age


V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [62]:
output_dir = "./model3-output"
filepath = os.path.join(output_dir, "logit_lpmc_model3_piecewise")
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

                                                   Value  Std err  t-test  \
CarOwn_2                                           -0.28    0.138   -2.03   
CarOwn_3                                          -0.682   0.0797   -8.56   
CarOwn_4                                            1.02   0.0725    14.1   
Constant2                                          -4.31    0.199   -21.7   
Constant3                                          -2.55    0.138   -18.4   
Constant4                                          -2.81    0.176     -16   
Cost                                               -0.16    0.017   -9.41   
Time_Age_1                                       -0.0382  0.00747   -5.11   
Time_Age_2                                        0.0171   0.0175   0.979   
Time_Age_3                                       0.00353   0.0107   0.331   
Time_Age_4                                        0.0209   0.0154    1.36   
Total_TT1                                          -6.55    0.386     -17   

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
CarOwn_2,-0.28,0.138,-2.03,0.0425,0.144,-1.94,0.0522
CarOwn_3,-0.682,0.0797,-8.56,0.0,0.082,-8.32,0.0
CarOwn_4,1.02,0.0725,14.1,0.0,0.0697,14.7,0.0
Constant2,-4.31,0.199,-21.7,0.0,0.236,-18.3,0.0
Constant3,-2.55,0.138,-18.4,0.0,0.186,-13.7,0.0
Constant4,-2.81,0.176,-16.0,0.0,0.172,-16.4,0.0
Cost,-0.16,0.017,-9.41,0.0,0.0182,-8.81,0.0
Time_Age_1,-0.0382,0.00747,-5.11,3.18e-07,0.00775,-4.93,8.36e-07
Time_Age_2,0.0171,0.0175,0.979,0.328,0.0173,0.99,0.322
Time_Age_3,0.00353,0.0107,0.331,0.741,0.0125,0.283,0.777


### Let's compare all these models with Model2, and find the best one between all of them

First, we can directly dismiss the Model2.1 (Power on time) since it has the same number of estimates as Model2 but a lower loglikelihood

#### Let's compare Model2.2(Box Cox Transforms) with Model2

In order to compare the two models, we take this one as the unrestricted model, and Model2 is the restricted one. Model 2 produces 15 estimates while Model 2.2 produces 16. Thus we have to compare our test statistic with the chi squared ditribution with 1 degree of freedom $\chi^2_{1}$ .

In [4]:
LL_Model2 = - 3727.612
LL_Model2_2 = - 3659.904
-2 * (LL_Model2 - LL_Model2_2)

135.41600000000017

- 2 *(LLModel_2_2 - LLModel_2) = 135.416 > $\chi^2_{1,0.99}$ = 6.63

Thus we can reject Model2 at 99% level of confidence, and we keep Model2.2 as our new preferred model.

#### Let's compare Model2.3 with Model 2.2

We have that Model2.3 produces higher estimates and produces a lower loglikelihood than Model2.2. Thus we can clearly keep Model 2.2 as our preferred model