In [1]:
import pandas as pd
import numpy as np
import pylogit as pl
from collections import OrderedDict

# Data Processing

In [2]:
df = pd.read_csv('CE264_April 15, 2022_10.41.csv', skiprows=[1, 2]).fillna('')
df = df[(df['Q13']!='') & (df['Q8']!='')]

In [3]:
sample = df[['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'Q6', 'Q7', 'Q8', 'Q9', 'Q10', 'Q13', 'Q14', 'Q15', 'Q16', 
             'F-1-1-1', 'F-1-1-2', 'F-1-1-3', 'F-1-1-4', 'F-1-2-1', 'F-1-2-2', 'F-1-2-3', 'F-1-2-4',
             'F-2-1-1', 'F-2-1-2', 'F-2-1-3', 'F-2-1-4', 'F-2-2-1', 'F-2-2-2', 'F-2-2-3', 'F-2-2-4',
             'F-3-1-1', 'F-3-1-2', 'F-3-1-3', 'F-3-1-4', 'F-3-2-1', 'F-3-2-2', 'F-3-2-3', 'F-3-2-4',
             'F-4-1-1', 'F-4-1-2', 'F-4-1-3', 'F-4-1-4', 'F-4-2-1', 'F-4-2-2', 'F-4-2-3', 'F-4-2-4']]

col_name = {'Q1': 'is_driver', 'Q2': 'age', 'Q3': 'gender', 'Q4': 'occ', 'Q5': 'income', 'Q6': 'hh_size',
            'Q7': 'have_driven', 'Q8': 'trip_purp', 'Q9': 'start_time', 'Q10': 'trip_freq',
            'Q13': 'choice_1', 'Q14': 'choice_2', 'Q15': 'choice_3', 'Q16': 'choice_4',
            'F-1-1-1': 't_detour_1a', 'F-1-1-2': 't_cong_1a', 'F-1-1-3': 't_shift_1a', 'F-1-1-4': 'cost_1a',
            'F-1-2-1': 't_detour_1b', 'F-1-2-2': 't_cong_1b', 'F-1-2-3': 't_shift_1b', 'F-1-2-4': 'cost_1b',
            'F-2-1-1': 't_detour_2a', 'F-2-1-2': 't_cong_2a', 'F-2-1-3': 't_shift_2a', 'F-2-1-4': 'cost_2a',
            'F-2-2-1': 't_detour_2b', 'F-2-2-2': 't_cong_2b', 'F-2-2-3': 't_shift_2b', 'F-2-2-4': 'cost_2b',
            'F-3-1-1': 't_detour_3a', 'F-3-1-2': 't_cong_3a', 'F-3-1-3': 't_shift_3a', 'F-3-1-4': 'cost_3a',
            'F-3-2-1': 't_detour_3b', 'F-3-2-2': 't_cong_3b', 'F-3-2-3': 't_shift_3b', 'F-3-2-4': 'cost_3b',
            'F-4-1-1': 't_detour_4a', 'F-4-1-2': 't_cong_4a', 'F-4-1-3': 't_shift_4a', 'F-4-1-4': 'cost_4a',
            'F-4-2-1': 't_detour_4b', 'F-4-2-2': 't_cong_4b', 'F-4-2-3': 't_shift_4b', 'F-4-2-4': 'cost_4b'}

sample = sample.rename(columns=col_name).reset_index(drop=True)

sample['is_driver'] = sample['is_driver'].map({'Yes': 1, 'No': 0})
sample['age'] = sample['age'].map({'Below 18\t': 1, '18-25': 2, '26-35': 3, '36-45': 4, '45-60': 5, 'above 60': 6})
sample['gender'] = sample['gender'].map({'Male': 1, 'Female': 2, 'Non-binary / third gender': 3,
                                         'Prefer not to say': 4})
sample['occ'] = sample['occ'].map({'Student at UC Berkeley': 1, 'Faculty at UC Berkeley': 2, 'Non UC Berkeley': 3})
sample['income'] = sample['income'].map({'Less than $50k': 1, ' $50k-150k': 2, 'Greater than $150k': 3})
sample['hh_size'] = sample['hh_size'].map({'1': 1, '2': 2, '3': 3, '4': 4, '>=5': 5})
sample['have_driven'] = sample['have_driven'].map({'Yes': 1, 'No': 0})
sample['trip_purp'] = sample['trip_purp'].map({'Work': 1, 'School': 2, 'Shopping': 3, 'Leisure': 4, 'Other': 5})
sample['start_time'] = [int(t) if t.isdigit() else '' for t in sample['start_time']]
sample['trip_freq'] = sample['trip_freq'].map({'1': 1, '2': 2, '3': 3, '4': 4, '>=5': 5})
for i in range(1, 5):
    sample[f'choice_{i}'] = sample[f'choice_{i}'].map({'Alternative A': 1, 'Alternative B': 2, 'Alternative C': 3})

In [4]:
# If we only analyze drivers' choice
sample = sample[sample['is_driver']==1].reset_index(drop=True)

In [5]:
sample.insert(0, 'id', [i+1 for i in sample.index])

In [6]:
sample.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,33,34,35,36,37,38,39,40,41,42
id,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,...,34.0,35.0,36.0,37.0,38.0,39.0,40.0,41.0,42.0,43.0
is_driver,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
age,3.0,4.0,2.0,1.0,1.0,6.0,5.0,2.0,6.0,4.0,...,3.0,3.0,3.0,2.0,3.0,3.0,3.0,3.0,2.0,2.0
gender,3.0,3.0,2.0,2.0,2.0,4.0,4.0,3.0,4.0,2.0,...,1.0,2.0,1.0,3.0,2.0,2.0,2.0,2.0,1.0,1.0
occ,3.0,3.0,1.0,2.0,2.0,3.0,2.0,3.0,2.0,2.0,...,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,3.0,3.0
income,2.0,2.0,1.0,1.0,3.0,1.0,3.0,1.0,2.0,2.0,...,2.0,1.0,2.0,2.0,1.0,3.0,2.0,2.0,1.0,1.0
hh_size,5.0,3.0,2.0,4.0,4.0,1.0,2.0,2.0,4.0,2.0,...,3.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,4.0
have_driven,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0
trip_purp,1.0,1.0,5.0,2.0,5.0,2.0,1.0,2.0,1.0,5.0,...,4.0,4.0,4.0,3.0,4.0,4.0,5.0,4.0,5.0,3.0
start_time,,,,,,,,,,,...,12.0,,10.0,20.0,9.0,11.0,10.0,,,


In [7]:
sample_long = sample.iloc[:, :11]
sample_long = sample_long.loc[sample_long.index.repeat(12)].reset_index(drop=True)

t_d, t_c, t_s, c, choice = [[] for i in range(5)]
for i in sample_long.index:
    y = i//12
    if (i%12)%3 == 0:
        x = int((i%12)/3*8)
        t_d.append(sample.iloc[y, 15+x])
        t_c.append(sample.iloc[y, 16+x])
        t_s.append(int(sample.iloc[y, 17+x]))
        c.append(int(sample.iloc[y, 18+x]))
    elif (i%12)%3 == 1:
        x = int((i%12-1)/3*8)
        t_d.append(sample.iloc[y, 19+x])
        t_c.append(sample.iloc[y, 20+x])
        t_s.append(sample.iloc[y, 21+x])
        c.append(int(sample.iloc[y, 22+x]))
    else:
        t_d.append(0)
        t_c.append(0)
        t_s.append(0)
        c.append(0)

sample_long['t_detour'] = t_d
sample_long['t_cong'] = t_c
sample_long['t_shift'] = t_s
sample_long['cost'] = c

for i in range(43):
    temp = [0 for a in range(12)]
    for j in range(4):
        temp[3*j+sample.iloc[i, 11+j]-1] = 1
    choice.extend(temp)

sample_long['choice'] = choice
sample_long.insert(1, 'alt', [i%3+1 for i in sample_long.index])
sample_long.insert(2, 'sit', [i//3+1 for i in sample_long.index])

In [8]:
sample_long.head(12)

Unnamed: 0,id,alt,sit,is_driver,age,gender,occ,income,hh_size,have_driven,trip_purp,start_time,trip_freq,t_detour,t_cong,t_shift,cost,choice
0,1,1,1,1,3,3,3,2,5,1,1,,1,35,60,30,20,0
1,1,2,1,1,3,3,3,2,5,1,1,,1,45,40,60,10,1
2,1,3,1,1,3,3,3,2,5,1,1,,1,0,0,0,0,0
3,1,1,2,1,3,3,3,2,5,1,1,,1,25,60,15,15,0
4,1,2,2,1,3,3,3,2,5,1,1,,1,45,60,45,0,1
5,1,3,2,1,3,3,3,2,5,1,1,,1,0,0,0,0,0
6,1,1,3,1,3,3,3,2,5,1,1,,1,65,40,45,20,1
7,1,2,3,1,3,3,3,2,5,1,1,,1,35,40,60,10,0
8,1,3,3,1,3,3,3,2,5,1,1,,1,0,0,0,0,0
9,1,1,4,1,3,3,3,2,5,1,1,,1,65,10,45,0,0


# Model Specification

In [9]:
basic_spec = OrderedDict()
basic_names = OrderedDict()

basic_spec['intercept'] = [3]
basic_names['intercept'] = ['ASC (won’t drive)']

basic_spec['t_detour'] = [[1, 2]]
basic_names['t_detour'] = ['t_detour (min)']

basic_spec['t_cong'] = [[1, 2]]
basic_names['t_cong'] = ['t_congestion (min)']

basic_spec['t_shift'] = [[1, 2]]
basic_names['t_shift'] = ['t_shift (min)']

basic_spec['cost'] = [[1, 2]]
basic_names['cost'] = ['cost ($)']

# Model Estimation

In [10]:
mixed = pl.create_choice_model(data=sample_long,
                               alt_id_col='alt',
                               obs_id_col='sit',
                               choice_col='choice',
                               specification=basic_spec,
                               model_type='Mixed Logit',
                               names=basic_names,
                               mixing_id_col='id',
                               mixing_vars=['t_detour (min)', 't_congestion (min)', 't_shift (min)', 'cost ($)'])

In [11]:
mixed.fit_mle(init_vals=np.zeros(9), num_draws=600, seed=123)
mixed.get_statsmodels_summary()

Log-likelihood at zero: -188.9613
Initial Log-likelihood: -188.9613


  warn('Method %s does not use Hessian information (hess).' % method,


Estimation Time for Point Estimation: 5.51 seconds.
Final log-likelihood: -172.1188


  self._store_inferential_results(np.sqrt(np.diag(self.robust_cov)),


0,1,2,3
Dep. Variable:,choice,No. Observations:,172.0
Model:,Mixed Logit Model,Df Residuals:,163.0
Method:,MLE,Df Model:,9.0
Date:,"Mon, 18 Apr 2022",Pseudo R-squ.:,0.089
Time:,19:28:55,Pseudo R-bar-squ.:,0.042
AIC:,362.238,Log-Likelihood:,-172.119
BIC:,390.565,LL-Null:,-188.961

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ASC (won’t drive),-1.4488,0.461,-3.144,0.002,-2.352,-0.545
t_detour (min),-0.0173,0.011,-1.554,0.120,-0.039,0.005
t_congestion (min),-0.0221,0.011,-2.051,0.040,-0.043,-0.001
t_shift (min),0.0021,0.006,0.376,0.707,-0.009,0.013
cost ($),-0.0452,0.026,-1.711,0.087,-0.097,0.007
Sigma t_detour (min),0.0271,0.014,2.003,0.045,0.001,0.054
Sigma t_congestion (min),0.0321,0.013,2.388,0.017,0.006,0.058
Sigma t_shift (min),-0.0216,0.009,-2.458,0.014,-0.039,-0.004
Sigma cost ($),0.0197,0.069,0.283,0.777,-0.116,0.156
