In [1]:
import os 
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, DefineVariable
from biogeme.models import loglogit

In [2]:
pandas = pd.read_table("lpmc02.dat")
database = db.Database("LPMC",pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables)

In [3]:
pandas.head()

Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,14991,3180,0,2,1,3,1,5,0,1,...,0.0906,0.0,0.0333,0.0,0,0.0319,0.0,0.09,0,0.0
1,56951,12230,0,0,4,1,6,5,0,3,...,0.271,0.0,0.0506,0.0,0,0.08,0.0,0.28,0,0.135
2,46487,9923,3,1,3,2,6,3,0,2,...,0.364,0.0,0.0711,0.0,0,0.153,0.0,0.4,0,0.318
3,13626,2896,0,0,4,2,2,1,1,1,...,0.217,0.0,0.085,0.0,0,0.0858,1.5,0.22,0,0.055
4,76528,16553,2,0,3,5,6,4,1,3,...,0.201,0.417,0.05,0.1,1,0.853,4.1,2.22,0,0.588


In [4]:
pandas.columns

Index(['trip_id', 'household_id', 'person_n', 'trip_n', 'travel_mode',
       'purpose', 'fueltype', 'faretype', 'bus_scale', 'survey_year',
       'travel_year', 'travel_month', 'travel_date', 'day_of_week',
       'start_time', 'age', 'female', 'driving_license', 'car_ownership',
       'distance', 'dur_walking', 'dur_cycling', 'dur_pt_access',
       'dur_pt_rail', 'dur_pt_bus', 'dur_pt_int', 'pt_interchanges',
       'dur_driving', 'cost_transit', 'cost_driving_fuel',
       'cost_driving_ccharge', 'driving_traffic_percent'],
      dtype='object')

In [5]:
# Choice
chosenAlternative = travel_mode

In [6]:
pandas["travel_mode"].unique()

array([1, 4, 3, 2])

In [7]:
#Parameters to be estimated+ (  BestAlternative_4   *  4  )
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
#   3  Lower bound
#   4  Upper bound
#   5  0: estimate the parameter, 1: keep it fixed
Constant1 = Beta('Constant1',0, None, None, 1)
Constant2 = Beta('Constant2',0, None, None, 0)
Constant3 = Beta('Constant3',0, None, None, 0)
Constant4 = Beta('Constant4',0, None, None, 0)
Cost = Beta('Cost',0, None, None, 0)
Total_TT = Beta('Total_TT',0, None, None,0)


# Define here arithmetic expressions for name that are not directly
# available from the data
dur_public = DefineVariable('dur_public', dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int, database)
cost_public = DefineVariable('cost_public', cost_transit, database)
cost_driving = DefineVariable('cost_driving', cost_driving_fuel + cost_driving_ccharge, database)


# Utilities

#Opt1 = walking
#Opt2 = cycling
#Opt3 = public transport
#Opt4 = driving
Opt1 = Constant1 + Total_TT * dur_walking
Opt2 = Constant2 + Total_TT * dur_cycling
Opt3 = Constant3 + Cost * cost_public + Total_TT * dur_public
Opt4 = Constant4 + Cost * cost_driving + Total_TT * dur_driving

V = {1: Opt1, 2: Opt2, 3: Opt3, 4: Opt4}
av = {1: 1, 2: 1, 3: 1, 4: 1}

In [8]:
# The choice model is a logit, with availability conditions
output_dir = "./model0-output"
filepath = os.path.join(output_dir, "logit_lpmc_model0")

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

logprob = loglogit(V, av, chosenAlternative)
biogeme  = bio.BIOGEME(database,logprob)
biogeme.modelName = filepath
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
print(pandasResults)
print(f"Nbr of observations: {database.getNumberOfObservations()}")
print(f"LL(0) =    {results.data.initLogLike:.3f}")
print(f"LL(beta) = {results.data.logLike:.3f}")
print(f"rho bar square = {results.data.rhoBarSquare:.3g}")
print(f"Output file: {results.data.htmlFileName}")

           Value  Std err  t-test  p-value  Rob. Std err  Rob. t-test  \
Constant2  -3.87    0.103   -37.4        0         0.109        -35.3   
Constant3 -0.511    0.053   -9.65        0        0.0546        -9.36   
Constant4  -1.27   0.0738   -17.2        0        0.0825        -15.4   
Cost      -0.174   0.0136   -12.8        0         0.013        -13.4   
Total_TT   -5.47    0.183   -29.8        0         0.212        -25.8   

           Rob. p-value  
Constant2             0  
Constant3             0  
Constant4             0  
Cost                  0  
Total_TT              0  
Nbr of observations: 5000
LL(0) =    -6931.472
LL(beta) = -4572.870
rho bar square = 0.34
Output file: ./model0-output/logit_lpmc_model0.html


In [9]:
pandasResults

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
Constant2,-3.87,0.103,-37.4,0,0.109,-35.3,0
Constant3,-0.511,0.053,-9.65,0,0.0546,-9.36,0
Constant4,-1.27,0.0738,-17.2,0,0.0825,-15.4,0
Cost,-0.174,0.0136,-12.8,0,0.013,-13.4,0
Total_TT,-5.47,0.183,-29.8,0,0.212,-25.8,0
