In [14]:
import os 

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, DefineVariable
from biogeme.models import loglogit

In [15]:
pandas = pd.read_table("lpmc02.dat")
database = db.Database("LPMC",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables)
chosenAlternative = travel_mode

In [16]:
database.variables

{'age': age,
 'bus_scale': bus_scale,
 'car_ownership': car_ownership,
 'cost_driving_ccharge': cost_driving_ccharge,
 'cost_driving_fuel': cost_driving_fuel,
 'cost_transit': cost_transit,
 'day_of_week': day_of_week,
 'distance': distance,
 'driving_license': driving_license,
 'driving_traffic_percent': driving_traffic_percent,
 'dur_cycling': dur_cycling,
 'dur_driving': dur_driving,
 'dur_pt_access': dur_pt_access,
 'dur_pt_bus': dur_pt_bus,
 'dur_pt_int': dur_pt_int,
 'dur_pt_rail': dur_pt_rail,
 'dur_walking': dur_walking,
 'faretype': faretype,
 'female': female,
 'fueltype': fueltype,
 'household_id': household_id,
 'person_n': person_n,
 'pt_interchanges': pt_interchanges,
 'purpose': purpose,
 'start_time': start_time,
 'survey_year': survey_year,
 'travel_date': travel_date,
 'travel_mode': travel_mode,
 'travel_month': travel_month,
 'travel_year': travel_year,
 'trip_id': trip_id,
 'trip_n': trip_n}

In [25]:
#Parameters to be estimated+ (  BestAlternative_4   *  4  )
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
Constant1 = Beta('Constant1',0,None,None,1)
Constant2 = Beta('Constant2',0,None,None,0)
Constant3 = Beta('Constant3',0,None,None,0)
Constant4 = Beta('Constant4',0,None,None,0)
Cost = Beta('Cost',0,None,None,0)
Total_TT1 = Beta('Total_TT1',0,None,None,0)
Total_TT2 = Beta('Total_TT2',0,None,None,0)
Total_TT3 = Beta('Total_TT3',0,None,None,0)
Total_TT4 = Beta('Total_TT4',0,None,None,0)
Dist = Beta('Dist',0,None,None,0)

# socio-economic factors (interacted with Distance)
Dist_Female = Beta('Dist_Female', 0, None, None, 0)
Dist_Age = Beta('Dist_Age', 0, None, None, 0)

In [18]:
# Define here arithmetic expressions for name that are not directly
# available from the data

dur_public = DefineVariable('dur_public', dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int ,database)
cost_public = DefineVariable('cost_public', cost_transit ,database)
cost_driving = DefineVariable('cost_driving', cost_driving_fuel + cost_driving_ccharge ,database)

##### Model with Gender as the socio-econ interaction

In [26]:
# Utilities

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * dur_walking + Dist * distance + Dist_Female * distance * female
Opt2 = Constant2 + Total_TT2 * dur_cycling + Dist * distance + Dist_Female * distance * female
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * dur_public + Dist * distance + \
                                                    Dist_Female * distance * female
Opt4 = Constant4 + Cost * cost_driving + Total_TT4 * dur_driving + Dist * distance + \
                                                    Dist_Female * distance * female
V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [27]:
output_dir = "./model2-output"
filepath = os.path.join(output_dir, "logit_lpmc_model2_DistGen+Gender")

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
if os.path.exists(filepath):
    os.remove(f"{filepath}.html")
    
# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath

results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -4273.573
rho bar square = 0.382
Output file: ./model2-output/logit_lpmc_model2_DistGen+Gender~01.html


Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
Constant2,-4.61,0.172,-26.8,0,0.214,-21.5,0.0
Constant3,-2.27,0.114,-19.8,0,0.149,-15.2,0.0
Constant4,-1.88,0.107,-17.6,0,0.15,-12.6,0.0
Cost,-0.155,0.015,-10.3,0,0.0148,-10.5,0.0
Dist,9.58e-06,1.8e+308,5.33e-314,1,0.00013,0.0736,0.941
Dist_Female,8.43e-06,1.8e+308,4.69e-314,1,0.000142,0.0592,0.953
Total_TT1,-8.18,0.293,-27.9,0,0.453,-18.0,0.0
Total_TT2,-5.84,0.442,-13.2,0,0.506,-11.5,0.0
Total_TT3,-3.6,0.235,-15.3,0,0.241,-15.0,0.0
Total_TT4,-6.49,0.332,-19.6,0,0.372,-17.5,0.0


##### Model with Age as the socio-econ interaction

In [28]:
# Utilities

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving

Opt1 = Constant1 + Total_TT1 * dur_walking + Dist * distance + Age * distance * age
Opt2 = Constant2 + Total_TT2 * dur_cycling + Dist * distance + Age * distance * age
Opt3 = Constant3 + Cost * cost_public + Total_TT3 * dur_public + Dist * distance + Dist_Age * distance * age
Opt4 = Constant4 + Cost * cost_driving + Total_TT4 * dur_driving + Dist * distance + Dist_Age * distance * age
V = {1: Opt1,2: Opt2,3: Opt3,4: Opt4}
av = {1: 1,2: 1,3: 1,4: 1}

In [29]:
output_dir = "./model2-output"
filepath = os.path.join(output_dir, "logit_lpmc_model2_DistGen+Age")

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
if os.path.exists(filepath):
    os.remove(f"{filepath}.html")
    
# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath

results = biogeme.estimate()
# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

pandasResults

Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -4263.612
rho bar square = 0.383
Output file: ./model2-output/logit_lpmc_model2_DistGen+Age~00.html


Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
Age,4.39e-05,4.92e-07,89.1,0.0,5.14e-07,85.3,0.0
Constant2,-4.59,0.172,-26.7,0.0,0.215,-21.3,0.0
Constant3,-2.27,0.114,-19.8,0.0,0.149,-15.3,0.0
Constant4,-1.88,0.107,-17.6,0.0,0.15,-12.6,0.0
Cost,-0.155,0.0151,-10.3,0.0,0.0148,-10.5,0.0
Dist,-9.08e-05,1.33e-12,-68400000.0,0.0,1.31e-12,-69200000.0,0.0
Dist_Age,4.81e-05,4.93e-07,97.6,0.0,5.15e-07,93.5,0.0
Total_TT1,-7.55,0.323,-23.4,0.0,0.496,-15.2,0.0
Total_TT2,-3.99,0.575,-6.94,3.97e-12,0.551,-7.24,4.49e-13
Total_TT3,-3.6,0.235,-15.3,0.0,0.241,-15.0,0.0
