In [17]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from IPython.core.display_functions import display
from biogeme.expressions import Beta, Variable
from biogeme.models import loglogit
from biogeme.segmentation import DiscreteSegmentationTuple, segmented_beta

In [18]:
# Loading the data
df = pd.read_csv('lpmc06.dat', sep='\t')

In [19]:
display(df.head())
# Just a test comment

Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,7,0,2,1,4,3,1,3,0.0,1,...,0.109444,0.0,0.055556,0.0,0,0.059444,0.0,0.15,0.0,0.11215
1,21,5,1,1,4,3,1,5,0.0,1,...,0.083889,0.0,0.293611,0.0,0,0.167778,0.0,0.46,0.0,0.243377
2,27,7,1,0,4,4,2,1,1.0,1,...,0.193889,0.516667,0.0,0.1,1,0.340833,1.5,2.04,0.0,0.280359
3,52,12,1,2,4,5,2,1,1.0,1,...,0.0625,0.0,0.491944,0.094722,1,0.355556,3.0,1.19,0.0,0.249219
4,53,12,1,3,4,3,2,1,1.0,1,...,0.0825,0.0,0.061944,0.0,0,0.0625,1.5,0.17,0.0,0.124444


In [20]:
database = db.Database('lpmc', df)

## Model 0

We identify the variables that will enter the model specification.

In [21]:
# Time related variables
dur_walking = Variable('dur_walking') # in hours
dur_cycling = Variable('dur_cycling') # in hours
dur_pt_access = Variable('dur_pt_access') # in hours
dur_pt_rail = Variable('dur_pt_rail') # in hours
dur_pt_bus = Variable('dur_pt_bus') # in hours
dur_pt_int = Variable('dur_pt_int') # in hours
dur_driving = Variable('dur_driving') # in hours

# Cost related variables
cost_transit = Variable('cost_transit') # in GBP
cost_driving_fuel = Variable('cost_driving_fuel') # in GBP
cost_driving_ccharge = Variable('cost_driving_ccharge') # in GBP

# Choice taken by the individual
travel_mode = Variable('travel_mode')

Parameters to be estimated

In [22]:
# ASC_WALK = Beta('asc_walk', 0, None, None, 0)
ASC_CYCLE = Beta('asc_cycle', 0, None, None, 0)
ASC_PT = Beta('asc_pt', 0, None, None, 0)
ASC_CAR = Beta('asc_car', 0, None, None, 0)

B_TIME = Beta('b_time', 0, None, None, 0)
B_COST = Beta('b_cost', 0, None, None, 0)

Definition of the utility functions.

In [23]:
# Walk
V1 = (
  # ASC_WALK -> Normalized with respect to walk
  B_TIME * dur_walking
)

# Cycle
V2 = (
  ASC_CYCLE
  + B_TIME * dur_cycling
)

# Public transport
V3 = (
  ASC_PT
  + B_TIME * (dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int)
  + B_COST * cost_transit
)

# Car
V4 = (
  ASC_CAR
  + B_TIME * dur_driving
  + B_COST * (cost_driving_fuel + cost_driving_ccharge)
)

In [24]:
V = {1: V1, 2: V2, 3: V3, 4: V4}

Definition of the model.

In [25]:
# All alternatives are available to all individuals.
logprob = loglogit(V, None, travel_mode)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_0'

File biogeme.toml has been created


Estimate the parameters.

In [26]:
results = biogeme.estimate()

Summary statistics.

In [28]:
print(results.print_general_statistics())

Number of estimated parameters:	5
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4658.601
Likelihood ratio test for the init. model:	4545.742
Rho-square for the init. model:	0.328
Rho-square-bar for the init. model:	0.327
Akaike Information Criterion:	9327.202
Bayesian Information Criterion:	9359.788
Final gradient norm:	1.0183E-03
Nbr of threads:	8



In [29]:
display(results.get_estimated_parameters())

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_car,-1.264832,0.078562,-16.099815,0.0
asc_cycle,-3.742817,0.103011,-36.334033,0.0
asc_pt,-0.55221,0.05387,-10.250839,0.0
b_cost,-0.160728,0.014288,-11.248889,0.0
b_time,-5.340745,0.197809,-26.999514,0.0


## Model 1

## Model 2

## Model 3

## Model 4