In [1]:
import numpy as np
import pandas as pd
from biogeme import models as models, database as db, biogeme as bio

In [2]:
from biogeme.expressions import Beta

In [6]:
# Read data
df = pd.read_csv("../data/lpmc.dat",sep='\t')
df.shape

(81086, 32)

In [7]:
df.head()

Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,0,0,0,0,4,3,1,1,1.0,1,...,0.134444,0.0,0.016667,0.0,0,0.052222,1.5,0.14,0.0,0.111702
1,1,0,0,1,4,3,1,1,1.0,1,...,0.109444,0.0,0.055556,0.0,0,0.059444,1.5,0.15,0.0,0.11215
2,2,0,0,2,4,3,1,1,1.0,1,...,0.203056,0.0,0.210278,0.0,0,0.236667,1.5,0.79,0.0,0.203052
3,3,0,0,3,4,3,1,1,1.0,1,...,0.205556,0.0,0.258611,0.0,0,0.233333,1.5,0.78,0.0,0.160714
4,4,0,1,2,4,3,1,4,1.0,1,...,0.203056,0.0,0.189444,0.0,0,0.229167,1.5,0.78,0.0,0.130909


In [8]:
df.columns

Index(['trip_id', 'household_id', 'person_n', 'trip_n', 'travel_mode',
       'purpose', 'fueltype', 'faretype', 'bus_scale', 'survey_year',
       'travel_year', 'travel_month', 'travel_date', 'day_of_week',
       'start_time', 'age', 'female', 'driving_license', 'car_ownership',
       'distance', 'dur_walking', 'dur_cycling', 'dur_pt_access',
       'dur_pt_rail', 'dur_pt_bus', 'dur_pt_int', 'pt_interchanges',
       'dur_driving', 'cost_transit', 'cost_driving_fuel',
       'cost_driving_ccharge', 'driving_traffic_percent'],
      dtype='object')

In [18]:
df['dur_pt_total'] = df['dur_pt_access'] + df['dur_pt_rail'] + df['dur_pt_bus'] + df['dur_pt_int'] 
df['cost_driving'] = df['cost_driving_fuel'] + df['cost_driving_ccharge']

In [20]:
cols = ['trip_id', 'travel_mode',      
        'dur_walking', 'dur_cycling', 'dur_pt_total', 'dur_driving', 
        'cost_transit', 'cost_driving']

df = df[cols]

In [21]:
#  (1: walk, 2: cycle, 3: public transport, 4: drive)
df.travel_mode.value_counts()

4    35808
3    28605
1    14268
2     2405
Name: travel_mode, dtype: int64

In [23]:
data = db.Database("lmc",df)

In [24]:
data.variables

{'trip_id': trip_id,
 'travel_mode': travel_mode,
 'dur_walking': dur_walking,
 'dur_cycling': dur_cycling,
 'dur_pt_total': dur_pt_total,
 'dur_driving': dur_driving,
 'cost_transit': cost_transit,
 'cost_driving': cost_driving}

In [26]:
globals().update(data.variables)

In [28]:
# Create parameters
asc_walk = Beta('asc_walk',0,None,None,0)
asc_cycle = Beta('asc_cycle',0,None,None,0)
asc_trsit = Beta('asc_trsit',0,None,None,0)
asc_drive = Beta('asc_drive',0,None,None,1)

b_dur = Beta('b_dur',0,None,None,0)
b_cos = Beta('b_cos',0,None,None,0)

In [29]:
cols

['trip_id',
 'travel_mode',
 'dur_walking',
 'dur_cycling',
 'dur_pt_total',
 'dur_driving',
 'cost_transit',
 'cost_driving']

In [32]:
# Create utility function
v_walk = asc_walk + b_dur * dur_walking
v_cycle = asc_cycle + b_dur * dur_cycling
v_trsit = asc_trsit + b_dur * dur_pt_total + b_cos * cost_transit
v_drive = asc_drive + b_dur * dur_driving + b_cos * cost_driving
v = {1:v_walk, 2:v_cycle, 3:v_trsit, 4:v_drive}


In [34]:
logprob = models.logit(v, None, travel_mode)

In [35]:
mlogit = bio.BIOGEME(data, logprob)
mlogit.modelName = "11_logit_lmc"

In [36]:
%%time
res = mlogit.estimate()

In [37]:
v_walk

(asc_walk(0) + (b_dur(0) * dur_walking))

In [38]:
type(v_walk)

biogeme.expressions.Plus