### <center>Choice Model</center>


1. import packages and data
2. preprocessing
3. model formatting, feeding, and summary

In [5]:
# 1 import packages and data
!pip install pylogit
import pandas as pd
import numpy as np

# regression package
import pylogit as pl
from collections import OrderedDict

travel = pd.read_excel('TravelModeAnalysisFinal.xlsx',sheet_name="Data",)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [6]:
# 2 Data preprocessing
travel_long = pd.wide_to_long(travel,stubnames=["Time","Invc","Invt"], i="Id",sep=".",j="alternative",suffix=r'\w+').reset_index()
mapping= {"car":0,"train":1,"air":2,"bus":3}
travel_long['choice_col']=(travel_long["alternative"]==travel_long['Choice'])*1
travel_long['alt_id']=travel_long['alternative'].replace(mapping)
travel_long = travel_long.sort_values(by=["Id","alt_id"],ascending=True)  
  #normalizing
travel_long["Time"]=travel_long["Time"]/100
travel_long["Invc"]=travel_long["Invc"]/100
travel_long["Invt"]=travel_long["Invt"]/100
travel_long["Hinc"]=travel_long["Hinc"]/100


In [7]:
# 3 Model1 formatting
model_1_spec = OrderedDict()
model_1_names = OrderedDict()

model_1_spec["Time"] = "all_same"
model_1_names["Time"] = "Time"

model_1_spec["Invc"] = "all_same"
model_1_names["Invc"] = "Invc"

model_1_spec["Invt"] = "all_same"
model_1_names["Invt"] = "Invt"

model_1_spec["intercept"] = [0,1,3]
model_1_names["intercept"] = ["ASC: {}".format(x) for x in ["car","train","bus"]]

# feeding the model
model_1 = pl.create_choice_model(data=travel_long,alt_id_col="alt_id",
                                 obs_id_col="Id",choice_col="choice_col",
                                 specification=model_1_spec,model_type="MNL",
                                 names=model_1_names)
model_1.fit_mle(np.zeros(6), method='newton-cg') # initial values = zeros

# Look at the estimation summaries
model_1.get_statsmodels_summary()

Log-likelihood at zero: -291.1218
Initial Log-likelihood: -291.1218
Estimation Time for Point Estimation: 0.07 seconds.
Final log-likelihood: -192.8885


  **kwargs)


0,1,2,3
Dep. Variable:,choice_col,No. Observations:,210.0
Model:,Multinomial Logit Model,Df Residuals:,204.0
Method:,MLE,Df Model:,6.0
Date:,"Mon, 31 Oct 2022",Pseudo R-squ.:,0.337
Time:,17:23:05,Pseudo R-bar-squ.:,0.317
AIC:,397.777,Log-Likelihood:,-192.889
BIC:,417.860,LL-Null:,-291.122

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Time,-9.6887,1.034,-9.368,0.000,-11.716,-7.662
Invc,-1.3912,0.665,-2.092,0.036,-2.695,-0.088
Invt,-0.3995,0.085,-4.704,0.000,-0.566,-0.233
ASC: car,-4.7399,0.868,-5.464,0.000,-6.440,-3.040
ASC: train,-0.7867,0.603,-1.305,0.192,-1.968,0.394
ASC: bus,-1.4336,0.681,-2.106,0.035,-2.768,-0.099


In [8]:
# 3 Model2 formatting
model_2_spec = OrderedDict()
model_2_names = OrderedDict()

model_2_spec["Time"] = "all_same"
model_2_names["Time"] = "Time"

model_2_spec["Invc"] = "all_same"
model_2_names["Invc"] = "Invc"

model_2_spec["Invt"] = "all_same"
model_2_names["Invt"] = "Invt"

model_2_spec["Hinc"] = [0,1,3]
model_2_names["Hinc"] = [f"Hinc_{x}"for x in ["car","train","bus"]]

model_2_spec["intercept"] = [0,1,3]
model_2_names["intercept"] = ["ASC: {}".format(x) 
                              for x in ["car","train","bus"]]

# feeding the model2
model_2 = pl.create_choice_model(data=travel_long,
                                 alt_id_col="alt_id",
                                 obs_id_col="Id",
                                 choice_col="choice_col",
                                 specification=model_2_spec,
                                 model_type="MNL",
                                 names=model_2_names)
model_2.fit_mle(np.zeros(9), method='newton-cg')

# Look at the estimation summaries
model_2.get_statsmodels_summary()

Log-likelihood at zero: -291.1218
Initial Log-likelihood: -291.1218
Estimation Time for Point Estimation: 0.09 seconds.
Final log-likelihood: -182.2186


0,1,2,3
Dep. Variable:,choice_col,No. Observations:,210.0
Model:,Multinomial Logit Model,Df Residuals:,201.0
Method:,MLE,Df Model:,9.0
Date:,"Mon, 31 Oct 2022",Pseudo R-squ.:,0.374
Time:,17:23:09,Pseudo R-bar-squ.:,0.343
AIC:,382.437,Log-Likelihood:,-182.219
BIC:,412.561,LL-Null:,-291.122

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Time,-9.5284,1.036,-9.202,0.000,-11.558,-7.499
Invc,-0.4499,0.721,-0.624,0.533,-1.863,0.963
Invt,-0.3665,0.087,-4.222,0.000,-0.537,-0.196
Hinc_car,0.2103,1.210,0.174,0.862,-2.160,2.581
Hinc_train,-5.5895,1.536,-3.640,0.000,-8.599,-2.580
Hinc_bus,-2.3111,1.646,-1.404,0.160,-5.537,0.914
ASC: car,-4.2474,1.007,-4.220,0.000,-6.220,-2.275
ASC: train,1.2421,0.817,1.521,0.128,-0.359,2.843
ASC: bus,-0.1844,0.897,-0.206,0.837,-1.942,1.573
