In [1]:
# Source code from: https://github.com/timothyb0912/pylogit

In [4]:
from collections import OrderedDict
import numpy as np
import pandas as pd
import pylogit as pl

### Load Swiss metro data

In [5]:
sm_long = pd.read_csv("./data/swissmetro_long.csv")
sm_long.shape

(19143, 22)

### Create the model specification
The model specification being used in this example is the following:
$$
\begin{aligned}
V_{i, \textrm{Train}} &= \textrm{ASC Train} + \\
&\quad \beta _{ \textrm{tt_train} } \textrm{Travel Time} _{ \textrm{Train}} + \\
&\quad \beta _{ \textrm{tc_train} } \textrm{Travel Cost}_{\textrm{Train}} * \left( GA == 0 \right) + \\
&\quad \beta _{ \textrm{headway_train} } \textrm{Headway} _{\textrm{Train}} \\
\\
V_{i, \textrm{Swissmetro}} &= \textrm{ASC Swissmetro} + \\
&\quad \beta _{ \textrm{tt_sm} } \textrm{Travel Time} _{ \textrm{Swissmetro}} + \\
&\quad \beta _{ \textrm{tc_sm} } \textrm{Travel Cost}_{\textrm{Swissmetro}} * \left( GA == 0 \right) + \\
&\quad \beta _{ \textrm{headway_sm} } \textrm{Heaway} _{\textrm{Swissmetro}} \\
% &\quad \beta _{ \textrm{seat} } \left( \textrm{Seat Configuration} == 1 \right) \\
% &\quad \beta _{ \textrm{survey} } \left( \textrm{Train Survey} == 1 \right) \\
% &\quad \beta _{ \textrm{first_class} } \left( \textrm{First Class} == 0 \right) \\
\\
V_{i, \textrm{Car}} &= \beta _{ \textrm{tt_car} } \textrm{Travel Time} _{ \textrm{Car}} + \\
&\quad \beta _{ \textrm{tc_car}} \textrm{Travel Cost}_{\textrm{Car}} \\
% &\quad \beta _{\textrm{luggage}=1} \left( \textrm{Luggage} == 1 \right) + \\
% &\quad \beta _{\textrm{luggage}>1} \left( \textrm{Luggage} > 1 \right)
\end{aligned}
$$

In [6]:
# Get free_ticket
sm_long["free_ticket"] = (((sm_long["GA"] == 1) | (sm_long["WHO"] == 2)) &
                            sm_long["mode_id"].isin([1,2])).astype(int)
# Update travel cost
sm_long["travel_cost"] = sm_long["travel_cost"] * (sm_long["free_ticket"] == 0)

In [7]:
spec = OrderedDict()
spec["intercept"] = [1, 2]
spec["travel_time"] = [1, 2, 3]
spec["travel_cost"] = [1, 2, 3]
spec["headway"] = [1, 2]

In [8]:
# Estimate the multinomial logit model (MNL)
sm_mnl = pl.create_choice_model(data=sm_long,
                                alt_id_col="mode_id",
                                obs_id_col="custom_id",
                                choice_col="CHOICE",
                                specification=spec,
                                model_type="MNL")

# Specify the initial values and method for the optimization.
sm_mnl.fit_mle(np.zeros(10))

# Look at the estimation results
sm_mnl.get_statsmodels_summary()

Log-likelihood at zero: -6,964.6630
Initial Log-likelihood: -6,964.6630
Estimation Time for Point Estimation: 0.11 seconds.
Final log-likelihood: -5,533.4622




0,1,2,3
Dep. Variable:,CHOICE,No. Observations:,6768.0
Model:,Multinomial Logit Model,Df Residuals:,6758.0
Method:,MLE,Df Model:,10.0
Date:,"Thu, 11 Mar 2021",Pseudo R-squ.:,0.205
Time:,14:59:10,Pseudo R-bar-squ.:,0.204
AIC:,11086.924,Log-Likelihood:,-5533.462
BIC:,11155.124,LL-Null:,-6964.663

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
intercept_1,0.3955,0.137,2.896,0.004,0.128,0.663
intercept_2,0.2729,0.105,2.599,0.009,0.067,0.479
travel_time_1,-0.0147,0.001,-19.071,0.000,-0.016,-0.013
travel_time_2,-0.0115,0.001,-13.766,0.000,-0.013,-0.010
travel_time_3,-0.0088,0.001,-12.543,0.000,-0.010,-0.007
travel_cost_1,-0.0049,0.001,-5.466,0.000,-0.007,-0.003
travel_cost_2,-0.0021,0.000,-4.890,0.000,-0.003,-0.001
travel_cost_3,-0.0030,0.001,-3.365,0.001,-0.005,-0.001
headway_1,-0.0052,0.001,-5.085,0.000,-0.007,-0.003


### Evaluation metrics

In [9]:
from sklearn.metrics import accuracy_score, f1_score, classification_report

In [10]:
def model_pred(data, model, alt_id_col, obs_id_col, choice_col):
    data['predicted'] = model.predict(data)  
    is_chosen = data.groupby([obs_id_col])['predicted'].idxmax()
    data['predicted_choice'] = 0
    data.loc[is_chosen.values,'predicted_choice'] = 1
    
    actual = data.loc[data[choice_column] ==1,alt_id_column]
    pred = data.loc[data['predicted_choice'] ==1,alt_id_column]
    return actual, pred   

def nll():
    pass

alt_id_column = "mode_id"
obs_id_column = "custom_id"
choice_column = "CHOICE"

##### MNL model results

In [12]:
actual, predict = model_pred(sm_long, sm_mnl, alt_id_column, obs_id_column, choice_column)
print(classification_report(actual, predict))

              precision    recall  f1-score   support

           1       0.00      0.00      0.00       908
           2       0.63      0.95      0.76      4090
           3       0.62      0.22      0.33      1770

    accuracy                           0.63      6768
   macro avg       0.42      0.39      0.36      6768
weighted avg       0.55      0.63      0.55      6768



  _warn_prf(average, modifier, msg_start, len(result))


In [13]:
accuracy_score(actual, predict), f1_score(actual, predict, average="weighted")

(0.6334219858156028, 0.5457266773507111)