# Example of the Conditional Logit Model on ModeCanada Dataset
This tutorial is modified from the [Random utility model and the multinomial logit model](https://cran.r-project.org/web/packages/mlogit/vignettes/c3.rum.html) in th documentation of `mlogit` package in R.

In [1]:
import argparse

import pandas as pd
import torch
import torch.nn.functional as F

from deepchoice.data import ChoiceDataset, utils
from deepchoice.model import ConditionalLogitModel
from deepchoice.utils.std import parameter_std

In [2]:
if torch.cuda.is_available():
    print(f'CUDA device used: {torch.cuda.get_device_name()}')

CUDA device used: NVIDIA GeForce RTX 3090


In [3]:
args = argparse.Namespace(data_path='./',
                          batch_size=-1,  # full-batch.
                          shuffle=False,
                          num_epochs=5000,
                          device='cuda' if torch.cuda.is_available() else 'cpu')

## Load Dataset

In [4]:
df = pd.read_csv('./ModeCanada.csv', index_col=0)
df = df.query('noalt == 4').reset_index(drop=True)
df.sort_values(by='case', inplace=True)
df.head()

Unnamed: 0,case,alt,choice,dist,cost,ivt,ovt,freq,income,urban,noalt
0,109,train,0,377,58.25,215,74,4,45,0,4
1,109,air,1,377,142.8,56,85,9,45,0,4
2,109,bus,0,377,27.52,301,63,8,45,0,4
3,109,car,0,377,71.63,262,0,0,45,0,4
4,110,train,0,377,58.25,215,74,4,70,0,4


In [5]:
label = df[df['choice'] == 1].sort_values(by='case')['alt'].reset_index(drop=True)

In [6]:
item_names = ['air', 'bus', 'car', 'train']
num_items = 4
encoder = dict(zip(item_names, range(num_items)))
label = label.map(lambda x: encoder[x])
label = torch.LongTensor(label)

In [7]:
price_cost_freq_ovt = utils.pivot3d(df, dim0='case', dim1='alt',
                                    values=['cost', 'freq', 'ovt'])
# session_income = torch.Tensor(df[['income']].values).view(-1, 1)
session_income = df.groupby('case')['income'].first()
session_income = torch.Tensor(session_income.values).view(-1, 1)
price_ivt = utils.pivot3d(df, dim0='case', dim1='alt', values='ivt')

In [8]:
dataset= ChoiceDataset(label=label,
                       price_cost_freq_ovt=price_cost_freq_ovt,
                       session_income=session_income,
                       price_ivt=price_ivt
                       ).to(args.device)
data_loader = utils.create_data_loader(dataset, batch_size=-1, shuffle=True)

In [9]:
dataset

ChoiceDataset(label=[2779], user_index=[], session_index=[2779], item_availability=[], observable_prefix=[5], price_cost_freq_ovt=[2779, 4, 3], session_income=[2779, 1], price_ivt=[2779, 4, 1], device=cuda:0)

## Create the Model

In [10]:
model = ConditionalLogitModel(coef_variation_dict={'price_cost_freq_ovt': 'constant',
                                                   'session_income': 'item',
                                                   'price_ivt': 'item-full',
                                                   'intercept': 'item'},
                              num_param_dict={'price_cost_freq_ovt': 3,
                                              'session_income': 1,
                                              'price_ivt': 1,
                                              'intercept': 1},
                              num_items=4)

model = model.to(args.device)

## Train the Model

In [11]:
model(dataset)

tensor([[ 42.2821,  -7.0396,  35.9776,   0.8567],
        [ 42.2821,  -7.0396,  35.9776,   0.8567],
        [ 42.2821,  -7.0396,  35.9776,   0.8567],
        ...,
        [ 32.4732, -23.5966,  32.6375, -25.7322],
        [ 32.4732, -23.5966,  32.6375, -25.7322],
        [ 40.9294, -27.8248,  37.0273, -29.7092]], device='cuda:0',
       grad_fn=<AddBackward0>)

In [12]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
for e in range(args.num_epochs):
    ll = 0.0
    for batch in data_loader:
        y_pred = model(batch)
        loss = F.cross_entropy(y_pred, batch.label, reduction='mean')
        ll -= loss.detach().item() * len(batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if e % (args.num_epochs // 10) == 0:
        print(f'{e=:}: {ll=:}')

e=0: ll=-45907.07237434387
e=500: ll=-1881.2439885139465
e=1000: ll=-1878.6955969929695
e=1500: ll=-1877.7501164078712
e=2000: ll=-1879.0268796086311
e=2500: ll=-1876.5692595243454
e=3000: ll=-1876.5064814686775
e=3500: ll=-1886.098438322544
e=4000: ll=-1881.4160898327827
e=4500: ll=-1882.2823938727379


## Parameter Estimation

### R Output
```r
install.packages("mlogit")
library("mlogit")
data("ModeCanada", package = "mlogit")
MC <- dfidx(ModeCanada, subset = noalt == 4)
ml.MC1 <- mlogit(choice ~ cost + freq + ovt | income | ivt, MC, reflevel='air')

summary(ml.MC1)
```
```
Call:
mlogit(formula = choice ~ cost + freq + ovt | income | ivt, data = MC, 
    reflevel = "air", method = "nr")

Frequencies of alternatives:choice
      air     train       bus       car 
0.3738755 0.1666067 0.0035984 0.4559194 

nr method
9 iterations, 0h:0m:0s 
g'(-H)^-1g = 0.00014 
successive function values within tolerance limits 

Coefficients :
                    Estimate Std. Error  z-value  Pr(>|z|)    
(Intercept):train  3.2741952  0.6244152   5.2436 1.575e-07 ***
(Intercept):bus    0.6983381  1.2802466   0.5455 0.5854292    
(Intercept):car    1.8441129  0.7085089   2.6028 0.0092464 ** 
cost              -0.0333389  0.0070955  -4.6986 2.620e-06 ***
freq               0.0925297  0.0050976  18.1517 < 2.2e-16 ***
ovt               -0.0430036  0.0032247 -13.3356 < 2.2e-16 ***
income:train      -0.0381466  0.0040831  -9.3426 < 2.2e-16 ***
income:bus        -0.0890867  0.0183471  -4.8556 1.200e-06 ***
income:car        -0.0279930  0.0038726  -7.2286 4.881e-13 ***
ivt:air            0.0595097  0.0100727   5.9080 3.463e-09 ***
ivt:train         -0.0014504  0.0011875  -1.2214 0.2219430    
ivt:bus           -0.0067835  0.0044334  -1.5301 0.1259938    
ivt:car           -0.0064603  0.0018985  -3.4029 0.0006668 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Log-Likelihood: -1874.3
McFadden R^2:  0.35443 
Likelihood ratio test : chisq = 2058.1 (p.value = < 2.22e-16)
```

In [13]:
for k, v in model.named_parameters():
    print(f'{k} = {v.detach().cpu()}')

coef_dict.price_cost_freq_ovt.coef = tensor([-0.0387,  0.0942, -0.0427])
coef_dict.session_income.coef = tensor([[-0.0872],
        [-0.0266],
        [-0.0366]])
coef_dict.price_ivt.coef = tensor([[ 0.0597],
        [-0.0067],
        [-0.0058],
        [-0.0012]])
coef_dict.intercept.coef = tensor([[-0.0642],
        [ 1.0867],
        [ 2.6332]])


## Standard Error Estimation

In [14]:
batch = dataset[torch.Tensor(range(len(dataset))).long()]
def nll_loss(model):
    y_pred = model(batch)
    return F.cross_entropy(y_pred, batch.label, reduction='sum')
std = parameter_std(model, nll_loss)
print(std)

{'coef_dict.price_cost_freq_ovt.coef': tensor([0.0071, 0.0051, 0.0032]), 'coef_dict.session_income.coef': tensor([[0.0177],
        [0.0039],
        [0.0040]]), 'coef_dict.price_ivt.coef': tensor([[0.0101],
        [0.0043],
        [0.0019],
        [0.0012]]), 'coef_dict.intercept.coef': tensor([[1.2426],
        [0.6981],
        [0.6107]])}
