In [1]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, DefineVariable
from biogeme.models import loglogit

V car = ASC (car) + β (time car) time car + β (cost) cost car

V rail = β (time rail) time rail + β (cost) cost rail + β (gender) gender

In [8]:
pandas = pd.read_table("netherlands.dat")
database = db.Database("netherlands",pandas)
pd.options.display.float_format = '{:.3g}'.format


globals().update(database.variables)

exclude = sp != 0
database.remove(exclude)

In [5]:
pandas["gender"]

0       1
9       1
18      0
25      0
30      1
       ..
1707    0
1712    0
1720    0
1725    0
1730    1
Name: gender, Length: 228, dtype: int64

In [9]:
# Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
ASC_CAR = Beta('ASC_CAR',0,None,None,0)
ASC_RAIL= Beta('ASC_RAIL',0,None,None,1)
BETA_COST= Beta('BETA_COST',0,None,None,0)
BETA_TIME_CAR= Beta('BETA_TIME_CAR',0,None,None,0)
BETA_TIME_RAIL = Beta('BETA_TIME_RAIL',0,None,None,0)
BETA_GENDER = Beta("GENDER",0,None,None,0)

# Define here arithmetic expressions for name that are not directly available from the data
rail_time  = DefineVariable('rail_time',(  rail_ivtt   +  rail_acc_time   ) +  rail_egr_time  ,database)
car_time  = DefineVariable('car_time', car_ivtt   +  car_walk_time  ,database)
rate_G2E = DefineVariable('rate_G2E', 0.44378022,database)
car_cost_euro = DefineVariable('car_cost_euro', car_cost * rate_G2E,database)
rail_cost_euro = DefineVariable('rail_cost_euro', rail_cost * rate_G2E,database)
#gender = DefineVariable("gender", gender, database)

# Utilities
Car = ASC_CAR  + BETA_COST * car_cost_euro + BETA_TIME_CAR * car_time
Rail = ASC_RAIL + BETA_COST * rail_cost_euro + BETA_TIME_RAIL * rail_time + BETA_GENDER*gender
V = {0: Car,1: Rail}
av = {0: 1,1: 1}

# The choice model is a logit, with availability conditions
logprob = loglogit(V,av,choice)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = "binary_socioec_netherlands"
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

                Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
ASC_CAR          2.85     1.09    2.62  0.00881          1.02          2.8   
BETA_COST       -0.13   0.0251   -5.17 2.35e-07        0.0265        -4.89   
BETA_TIME_CAR   -2.34    0.489   -4.78 1.75e-06         0.495        -4.73   
BETA_TIME_RAIL -0.529    0.418   -1.27    0.205         0.414        -1.28   
GENDER          0.675     0.33    2.05   0.0407         0.329         2.05   

                Rob. p-value  
ASC_CAR              0.00506  
BETA_COST           9.94e-07  
BETA_TIME_CAR       2.29e-06  
BETA_TIME_RAIL         0.201  
GENDER                0.0404  
Nbr of observations: 228
LL(0) =    -158.038
LL(beta) = -115.880
rho bar square = 0.235
Output file: binary_socioec_netherlands.html
