In [5]:
import os, glob

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, DefineVariable
from biogeme.models import loglogit

pandas = pd.read_table("lpmc02.dat")
database = db.Database("LPMC",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables)

# Choice
chosenAlternative = travel_mode

## 1. Data and Parameters to estimate

In [6]:
#Parameters to be estimated+ (  BestAlternative_4   *  4  )
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
Constant1 = Beta('Constant1',0,None,None,1)
Constant2 = Beta('Constant2',0,None,None,0)
Constant3 = Beta('Constant3',0,None,None,0)
Constant4 = Beta('Constant4',0,None,None,0)
Cost = Beta('Cost',0,None,None,0)
Total_TT1 = Beta('Total_TT1',0,None,None,0)
Total_TT2 = Beta('Total_TT2',0,None,None,0)
Total_TT3 = Beta('Total_TT3',0,None,None,0)
Total_TT4 = Beta('Total_TT4',0,None,None,0)

CarOwn_2 = Beta('CarOwn_2',0,None,None,0)
CarOwn_3 = Beta('CarOwn_3',0,None,None,0)
CarOwn_4 = Beta('CarOwn_4',0,None,None,0)


# socio-economic factors (interacting with Time)
Time_Age_1 = Beta('Time_Age_1', 0, None, None, 0)
Time_Age_2 = Beta('Time_Age_2', 0, None, None, 0)
Time_Age_3 = Beta('Time_Age_3', 0, None, None, 0)
Time_Age_4 = Beta('Time_Age_4', 0, None, None, 0)

## 2. Variables and Utility Functions

In [7]:
dur_public = DefineVariable('dur_public', dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int ,database)
cost_public = DefineVariable('cost_public', cost_transit ,database)
cost_driving = DefineVariable('cost_driving', cost_driving_fuel + cost_driving_ccharge ,database)

# Utilities

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * dur_walking + Time_Age_1 * dur_walking * age
Opt2 = Constant2 + Total_TT2 * dur_cycling + CarOwn_2 * car_ownership + Time_Age_2 * dur_cycling * age
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * dur_public + CarOwn_3 * car_ownership +\
                    Time_Age_3 * dur_public * age
Opt4 = Constant4 + Cost * cost_driving + Total_TT4 * dur_driving + CarOwn_4 * car_ownership +\
                    Time_Age_4 * dur_driving * age


V = {1: Opt1, 2: Opt2, 3: Opt3, 4: Opt4}
av = {1: 1, 2: 1, 3: 1, 4: 1}

## 3. Estimation

In [8]:
output_dir = "./model2-output"
filepath = os.path.join(output_dir, "logit_lpmc_model2_CarOwn+Age")
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
# delete previously saved html and pickle
for file in glob.glob(f"{filepath}*"):
    os.remove(file)

# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath
results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()

print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -3727.612
rho bar square = 0.46
Output file: ./model2-output/logit_lpmc_model2_CarOwn+Age.html


Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
CarOwn_2,-0.195,0.138,-1.42,0.156,0.144,-1.35,0.175
CarOwn_3,-0.589,0.0771,-7.64,2.13e-14,0.0784,-7.51,5.71e-14
CarOwn_4,1.11,0.0722,15.4,0.0,0.0698,15.9,0.0
Constant2,-4.51,0.2,-22.6,0.0,0.233,-19.3,0.0
Constant3,-1.99,0.129,-15.4,0.0,0.157,-12.6,0.0
Constant4,-3.14,0.135,-23.3,0.0,0.167,-18.8,0.0
Cost,-0.165,0.0168,-9.82,0.0,0.0176,-9.38,0.0
Time_Age_1,-0.0584,0.00935,-6.24,4.27e-10,0.0104,-5.62,1.9e-08
Time_Age_2,-0.0321,0.0214,-1.5,0.133,0.0226,-1.42,0.156
Time_Age_3,-0.0362,0.0121,-3.0,0.00267,0.0133,-2.72,0.00653


#### Let's compare this model with the previous one developped (Model 1)

In order to compare the two models, we take this one as the unrestricted model, and Model 1 is the restricted one. Model 1 produces 8 estimates while Model 2 produces 15. Thus we have to compare our test statistic with the chi squared ditribution with 7 degrees of freedom $\chi^2_{7}$ 


In [9]:
LLModel_1 = -4273.573 # result copied from Model 1's notebook
- 2 *(LLModel_1 - results.data.logLike)

1091.9213887672895

2 *(LLModel_2 - LLModel1) = 1091.921 > $\chi^2_{7,0.99}$ = 18.475

Thus we can reject Model1 at 99% level of confidence, and we keep Model2 as our new preferred model.
