# Tutorial on Modeling Outside Option
## Conditional Logit Model

In [1]:
from time import time
import pandas as pd
import torch

from torch_choice.data import ChoiceDataset, utils, example_datasets
from torch_choice.model import ConditionalLogitModel
from torch_choice.model.nested_logit_model import NestedLogitModel

from torch_choice import run

if torch.cuda.is_available():
    print(f'CUDA device used: {torch.cuda.get_device_name()}')
    device = 'cuda'
else:
    print('Running tutorial on CPU.')
    device = 'cpu'

Running tutorial on CPU.


In [17]:
dataset = example_datasets.load_mode_canada_dataset()
print(dataset)

No `session_index` is provided, assume each choice instance is in its own session.
ChoiceDataset(num_items=4, num_users=1, num_sessions=2779, label=[], item_index=[2779], user_index=[], session_index=[2779], item_availability=[], itemsession_cost_freq_ovt=[2779, 4, 3], session_income=[2779, 1], itemsession_ivt=[2779, 4, 1], device=cpu)


In [18]:
OPTIMIZER = "LBFGS"

### Without the Outside Option (`model_outside_option=False`, by default)

In [19]:
model = ConditionalLogitModel(
    formula='(itemsession_cost_freq_ovt|constant) + (session_income|item) + (itemsession_ivt|item-full) + (intercept|item)',
    dataset=dataset,
    num_items=4)
start_time = time()
run(model, dataset, num_epochs=500, learning_rate=0.01, model_optimizer=OPTIMIZER, batch_size=-1)
print('Time taken:', time() - start_time)

ConditionalLogitModel(
  (coef_dict): ModuleDict(
    (itemsession_cost_freq_ovt[constant]): Coefficient(variation=constant, num_items=4, num_users=None, num_params=3, 3 trainable parameters in total, initialization=normal, device=cpu).
    (session_income[item]): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total, initialization=normal, device=cpu).
    (itemsession_ivt[item-full]): Coefficient(variation=item-full, num_items=4, num_users=None, num_params=1, 4 trainable parameters in total, initialization=normal, device=cpu).
    (intercept[item]): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total, initialization=normal, device=cpu).
  )
)
Conditional logistic discrete choice model, expects input features:

X[itemsession_cost_freq_ovt[constant]] with 3 parameters, with constant level variation.
X[session_income[item]] with 1 parameters, with item level variation.
X[itemsession_ivt[i

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(

  | Name  | Type                  | Params
------------------------------------------------
0 | model | ConditionalLogitModel | 13    
------------------------------------------------
13        Trainable params
0         Non-trainable params
13        Total params
0.000     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.


Time taken for training: 11.514221906661987
Skip testing, no test dataset is provided.
Log-likelihood: [Training] -1874.3427734375, [Validation] N/A, [Test] N/A

| Coefficient                           |   Estimation |   Std. Err. |    z-value |    Pr(>|z|) | Significance   |
|:--------------------------------------|-------------:|------------:|-----------:|------------:|:---------------|
| itemsession_cost_freq_ovt[constant]_0 |  -0.0333408  |  0.0070955  |  -4.69886  | 2.61615e-06 | ***            |
| itemsession_cost_freq_ovt[constant]_1 |   0.0925299  |  0.00509756 |  18.1518   | 0           | ***            |
| itemsession_cost_freq_ovt[constant]_2 |  -0.0430029  |  0.00322473 | -13.3354   | 0           | ***            |
| session_income[item]_0                |  -0.0890857  |  0.0183473  |  -4.85553  | 1.20063e-06 | ***            |
| session_income[item]_1                |  -0.0279928  |  0.00387254 |  -7.22854  | 4.88276e-13 | ***            |
| session_income[item]_2         

### With the Outside Option (`model_outside_option=True`, need to set the keyword argument) But we are not assign any `-1` in `item_index`, this is just for consistency check.

In [5]:
model = ConditionalLogitModel(
    formula='(itemsession_cost_freq_ovt|constant) + (session_income|item) + (itemsession_ivt|item-full) + (intercept|item)',
    dataset=dataset,
    num_items=4,
    model_outside_option=True)
start_time = time()
run(model, dataset, num_epochs=500, learning_rate=0.01, model_optimizer=OPTIMIZER, batch_size=-1)
print('Time taken:', time() - start_time)

ConditionalLogitModel(
  (coef_dict): ModuleDict(
    (itemsession_cost_freq_ovt[constant]): Coefficient(variation=constant, num_items=4, num_users=None, num_params=3, 3 trainable parameters in total, initialization=normal, device=cpu).
    (session_income[item]): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total, initialization=normal, device=cpu).
    (itemsession_ivt[item-full]): Coefficient(variation=item-full, num_items=4, num_users=None, num_params=1, 4 trainable parameters in total, initialization=normal, device=cpu).
    (intercept[item]): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total, initialization=normal, device=cpu).
  )
)
Conditional logistic discrete choice model, expects input features:

X[itemsession_cost_freq_ovt[constant]] with 3 parameters, with constant level variation.
X[session_income[item]] with 1 parameters, with item level variation.
X[itemsession_ivt[i

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(

  | Name  | Type                  | Params
------------------------------------------------
0 | model | ConditionalLogitModel | 13    
------------------------------------------------
13        Trainable params
0         Non-trainable params
13        Total params
0.000     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.


Time taken for training: 13.11423945426941
Skip testing, no test dataset is provided.
Log-likelihood: [Training] -1929.06787109375, [Validation] N/A, [Test] N/A

| Coefficient                           |   Estimation |   Std. Err. |    z-value |    Pr(>|z|) | Significance   |
|:--------------------------------------|-------------:|------------:|-----------:|------------:|:---------------|
| itemsession_cost_freq_ovt[constant]_0 |  0.01429     |  0.00519588 |   2.75025  | 0.00595499  | **             |
| itemsession_cost_freq_ovt[constant]_1 |  0.0853419   |  0.00508123 |  16.7955   | 0           | ***            |
| itemsession_cost_freq_ovt[constant]_2 | -0.0421181   |  0.0031688  | -13.2915   | 0           | ***            |
| session_income[item]_0                | -0.0932219   |  0.0184227  |  -5.06016  | 4.18903e-07 | ***            |
| session_income[item]_1                | -0.0313845   |  0.00394436 |  -7.95682  | 1.77636e-15 | ***            |
| session_income[item]_2         

### Set outside option with some `item_index` as `-1` (`model_outside_option=True`, need to set the keyword argument)

In [20]:
dataset.item_index[:100] = -1
model = ConditionalLogitModel(
    formula='(itemsession_cost_freq_ovt|constant) + (session_income|item) + (itemsession_ivt|item-full) + (intercept|item)',
    dataset=dataset,
    num_items=4,
    model_outside_option=True)
start_time = time()
# run(model, dataset, num_epochs=500, learning_rate=0.01, model_optimizer="LBFGS", batch_size=-1)
run(model, dataset, num_epochs=500, learning_rate=0.01, model_optimizer="Adam", batch_size=-1)
print('Time taken:', time() - start_time)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(

  | Name  | Type                  | Params
------------------------------------------------
0 | model | ConditionalLogitModel | 13    
------------------------------------------------
13        Trainable params
0         Non-trainable params
13        Total params
0.000     Total estimated model params size (MB)


ConditionalLogitModel(
  (coef_dict): ModuleDict(
    (itemsession_cost_freq_ovt[constant]): Coefficient(variation=constant, num_items=4, num_users=None, num_params=3, 3 trainable parameters in total, initialization=normal, device=cpu).
    (session_income[item]): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total, initialization=normal, device=cpu).
    (itemsession_ivt[item-full]): Coefficient(variation=item-full, num_items=4, num_users=None, num_params=1, 4 trainable parameters in total, initialization=normal, device=cpu).
    (intercept[item]): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total, initialization=normal, device=cpu).
  )
)
Conditional logistic discrete choice model, expects input features:

X[itemsession_cost_freq_ovt[constant]] with 3 parameters, with constant level variation.
X[session_income[item]] with 1 parameters, with item level variation.
X[itemsession_ivt[i

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=500` reached.


Time taken for training: 4.497446060180664
Skip testing, no test dataset is provided.
Log-likelihood: [Training] -27110.9609375, [Validation] N/A, [Test] N/A

| Coefficient                           |   Estimation |   Std. Err. |   z-value |    Pr(>|z|) | Significance   |
|:--------------------------------------|-------------:|------------:|----------:|------------:|:---------------|
| itemsession_cost_freq_ovt[constant]_0 |     0.751185 |  0.0512883  |  14.6463  | 0           | ***            |
| itemsession_cost_freq_ovt[constant]_1 |     0.715526 |  0.0510717  |  14.0102  | 0           | ***            |
| itemsession_cost_freq_ovt[constant]_2 |    -0.528915 |  0.0344409  | -15.3572  | 0           | ***            |
| session_income[item]_0                |     0.725119 |  0.0696999  |  10.4034  | 0           | ***            |
| session_income[item]_1                |     1.6348   |  0.107821   |  15.1622  | 0           | ***            |
| session_income[item]_2                |  

### The output has an extra column compared to the outside option model without `-1` in `item_index`

In [23]:
model(dataset).shape

torch.Size([2779, 5])

## Nested Logit Model

In [2]:
dataset = example_datasets.load_house_cooling_dataset_v1()
nest_to_item = {0: [1, 3, 5, 6], 1: [0, 2, 4]}
# let's add some outside option.
dataset.item_index[1] = -1
dataset.item_index[5] = -1

model = NestedLogitModel(nest_to_item=nest_to_item,
                        nest_formula='',
                        item_formula='(price_obs|constant)',
                        dataset=dataset,
                        shared_lambda=True,
                        model_outside_option=True)
run(model, dataset, num_epochs=5000, model_optimizer="Adam")

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(

  | Name  | Type             | Params
-------------------------------------------
0 | model | NestedLogitModel | 8     
-------------------------------------------
8         Trainable params
0         Non-trainable params
8         Total params
0.000     Total estimated model params size (MB)


No `session_index` is provided, assume each choice instance is in its own session.
No `session_index` is provided, assume each choice instance is in its own session.
NestedLogitModel(
  (nest_coef_dict): ModuleDict()
  (item_coef_dict): ModuleDict(
    (price_obs[constant]): Coefficient(variation=constant, num_items=7, num_users=None, num_params=7, 7 trainable parameters in total, initialization=normal, device=cpu).
  )
)
[Train dataset] JointDataset with 2 sub-datasets: (
	nest: ChoiceDataset(num_items=7, num_users=1, num_sessions=250, label=[], item_index=[250], user_index=[], session_index=[250], item_availability=[], device=cpu)
	item: ChoiceDataset(num_items=7, num_users=1, num_sessions=250, label=[], item_index=[250], user_index=[], session_index=[250], item_availability=[], price_obs=[250, 7, 7], device=cpu)
)
[Validation dataset] None
[Test dataset] None


  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=5000` reached.


Time taken for training: 45.74995470046997
Skip testing, no test dataset is provided.
Log-likelihood: [Training] -335.226806640625, [Validation] N/A, [Test] N/A

| Coefficient                |   Estimation |   Std. Err. |    z-value |    Pr(>|z|) | Significance   |
|:---------------------------|-------------:|------------:|-----------:|------------:|:---------------|
| lambda_weight_0            |   4.37304    |   1.86999   |  2.33853   | 0.0193596   | *              |
| item_price_obs[constant]_0 |   0.00795997 |   0.0420231 |  0.189419  | 0.849764    |                |
| item_price_obs[constant]_1 |  -0.746037   |   0.292652  | -2.54923   | 0.010796    | *              |
| item_price_obs[constant]_2 |  -0.12708    |   0.0818627 | -1.55235   | 0.120578    |                |
| item_price_obs[constant]_3 |  -0.0514844  |   0.825194  | -0.0623907 | 0.950252    |                |
| item_price_obs[constant]_4 |  -0.256044   |   0.125928  | -2.03325   | 0.0420273   | *              |
| item

NestedLogitModel(
  (nest_coef_dict): ModuleDict()
  (item_coef_dict): ModuleDict(
    (price_obs[constant]): Coefficient(variation=constant, num_items=7, num_users=None, num_params=7, 7 trainable parameters in total, initialization=normal, device=cpu).
  )
)

###  We have 7 items (indexed 0~6) here, and the model returns log-probabilities of 8 items, the last one corresponds to the outside option.

In [3]:
print(f"{model.forward(dataset.datasets).shape=:}")
print(f"{model.forward(dataset.datasets)=:}")

model.forward(dataset.datasets).shape=torch.Size([250, 8])
model.forward(dataset.datasets)=tensor([[ -1.6006,  -2.5346,  -2.7618,  ...,  -2.2190,  -2.1256,  -2.0676],
        [ -5.6120,  -1.2068,  -8.7118,  ...,  -1.1401,  -1.0282,  -6.3757],
        [ -6.7152,  -1.5403,  -9.5321,  ...,  -1.0362,  -0.8852,  -6.4030],
        ...,
        [ -9.4013,  -1.3690, -13.5576,  ...,  -1.0305,  -0.9560,  -9.5871],
        [ -5.6782,  -1.4666,  -8.5401,  ...,  -1.0656,  -0.9184,  -5.7905],
        [ -7.8609,  -1.3691, -11.3919,  ...,  -1.1095,  -0.8994,  -7.6504]],
       grad_fn=<AddBackward0>)
