# Materials for Torch-Choice Paper

In [1]:
from time import time
import numpy as np
import torch
import torch_choice
from tqdm import tqdm
from typing import List
from torch_choice.data import ChoiceDataset, utils
from torch_choice.model import ConditionalLogitModel

## The Car Choice Dataset Example

In [2]:
import pandas as pd
import torch 
import torch_choice
import torch_choice.utils
from torch_choice.utils.easy_data_wrapper import EasyDatasetWrapper
from torch_choice.utils.run_helper import run

## Performance Benchmark
**Copy the following description to the paper**.
We designed a simple performance benchmark based on the transportation choice dataset: we duplicates $K$ copies of the original dataset of 2779 observations and compare time taken by various implementations. We compared the time cost of only the estimation process, since there are ample possibilities for further optimizing the estimation process (e.g., tuning learning rates, early stopping), we could under-estimate performances here. However, we wish to highlight how K 
The metric $\frac{\text{log-likelihood}}{K}$ is used to check that various optimizers converged to the same solution.

In [3]:
! mkdir -p './benchmark_data'

In [30]:
num_copies = 3
df = pd.read_csv('./public_datasets/ModeCanada.csv')
df_list = list()
num_cases = df['case'].max()
for i in range(num_copies):
    df_copy = df.copy()
    df_copy['case'] += num_cases * i
    df_list.append(df_copy)
df = pd.concat(df_list, ignore_index=True)

In [31]:
df

Unnamed: 0.1,Unnamed: 0,case,alt,choice,dist,cost,ivt,ovt,freq,income,urban,noalt
0,1,1,train,0,83,28.25,50,66,4,45,0,2
1,2,1,car,1,83,15.77,61,0,0,45,0,2
2,3,2,train,0,83,28.25,50,66,4,25,0,2
3,4,2,car,1,83,15.77,61,0,0,25,0,2
4,5,3,train,0,83,28.25,50,66,4,70,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...
46555,15516,12970,car,1,347,65.93,267,0,0,35,0,3
46556,15517,12971,train,0,323,60.60,193,200,3,45,0,2
46557,15518,12971,car,1,323,61.37,278,0,0,45,0,2
46558,15519,12972,train,0,150,28.50,63,105,1,70,0,2


In [41]:
def duplicate_mode_canada_datasets(num_copies: int):
    df = pd.read_csv('./public_datasets/ModeCanada.csv', index_col=0)
    df_list = list()
    num_cases = df['case'].max()
    for i in range(num_copies):
        df_copy = df.copy()
        df_copy['case'] += num_cases * i
        df_list.append(df_copy)
    df = pd.concat(df_list, ignore_index=True)
    df = df.query('noalt == 4').reset_index(drop=True)
    df.sort_values(by='case', inplace=True)
    item_index = df[df['choice'] == 1].sort_values(by='case')['alt'].reset_index(drop=True)
    item_names = ['air', 'bus', 'car', 'train']
    num_items = 4
    encoder = dict(zip(item_names, range(num_items)))
    item_index = item_index.map(lambda x: encoder[x])
    item_index = torch.LongTensor(item_index)
    price_cost_freq_ovt = utils.pivot3d(df, dim0='case', dim1='alt',
                                        values=['cost', 'freq', 'ovt'])
    price_ivt = utils.pivot3d(df, dim0='case', dim1='alt', values='ivt')
    session_income = df.groupby('case')['income'].first()
    session_income = torch.Tensor(session_income.values).view(-1, 1)

    # session_index = torch.arange(len(session_income))
    
    dataset = ChoiceDataset(
        # item_index=item_index.repeat(num_copies),
        item_index=item_index,
        session_index=torch.arange(len(session_income)),
        price_cost_freq_ovt=price_cost_freq_ovt,
        session_income=session_income,
        price_ivt=price_ivt)
    return df, dataset.clone()

In [42]:
df, dataset = duplicate_mode_canada_datasets(10)

In [45]:
performance_records = list()
# k_range = [1, 5, 10, 100, 1_000, 10_000]
k_range = [50, 500, 5_000]
dataset_at_k = dict()
for k in tqdm(k_range):
    df, dataset = duplicate_mode_canada_datasets(k)
    dataset_at_k[k] = dataset.clone()
    # df.to_csv(f'./benchmark_data/mode_canada_{k}.csv', index=False)

100%|██████████| 3/3 [03:08<00:00, 62.69s/it]


In [10]:
for k in k_range:
    # run for 3 times.
    for _ in range(3):
        dataset = duplicate_mode_canada_datasets(k)
        model = model = ConditionalLogitModel(
            formula='(price_cost_freq_ovt|constant) + (session_income|item) + (price_ivt|item-full) + (intercept|item)',
            dataset=dataset,
            num_items=4)
        # only time the model estimation.
        start_time = time()
        model, ll = run(model, dataset, batch_size=-1, learning_rate=0.03 , num_epochs=1000, compute_std=True, return_final_training_log_likelihood=True)
        end_time = time()
        performance_records.append(dict(k=k, time=end_time - start_time, ll=ll))

ConditionalLogitModel(
  (coef_dict): ModuleDict(
    (price_cost_freq_ovt): Coefficient(variation=constant, num_items=4, num_users=None, num_params=3, 3 trainable parameters in total, device=cpu).
    (session_income): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total, device=cpu).
    (price_ivt): Coefficient(variation=item-full, num_items=4, num_users=None, num_params=1, 4 trainable parameters in total, device=cpu).
    (intercept): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total, device=cpu).
  )
)
Conditional logistic discrete choice model, expects input features:

X[price_cost_freq_ovt] with 3 parameters, with constant level variation.
X[session_income] with 1 parameters, with item level variation.
X[price_ivt] with 1 parameters, with item-full level variation.
X[intercept] with 1 parameters, with item level variation.
device=cpu
ChoiceDataset(label=[], item_index=[27790], 

In [46]:
performance_records

[]

### Simulation Setup (Depreciated)
Example utility construction:
$$
U_{uis} = \lambda_i + \beta_u^\top \bm{x}_\text{item}^{(i)} + \gamma^\top \bm{x}_\text{session}^{(s)} + \epsilon
$$

In [None]:
num_items = 10
num_users = 5
num_sessions = 1
N = 50000
# generate a random user ui.
user_index = torch.LongTensor(np.random.choice(num_users, size=N))
# construct users.
# item_index = torch.LongTensor(np.random.choice(num_items, size=N))
# construct sessions.
session_index = torch.LongTensor(np.random.choice(num_sessions, size=N))
rational_prob = 0.99

: 

In [140]:
user_obs = torch.rand(num_users, 3)
item_obs = torch.rand(num_items, 3)
session_obs = torch.rand(num_sessions, 3)
# price_obs = torch.randn(num_sessions, num_items, 12)
item_index = torch.LongTensor(np.random.choice(num_items, size=N))
user_index = torch.LongTensor(np.random.choice(num_users, size=N))
session_index = torch.LongTensor(np.random.choice(num_sessions, size=N))
item_availability = torch.ones(num_sessions, num_items).bool()

In [141]:
lambda_item = torch.rand(num_items) * 10
lambda_item[0] = 0
beta_user = torch.rand(num_users, item_obs.shape[-1]) * 10
gamma_constant = torch.rand(session_obs.shape[-1]) * 10

In [142]:
item_index = list()

for n in tqdm(range(N)):
    u, s = user_index[n], session_index[n]
    if np.random.rand() <= rational_prob:
        # (num_items, 1)
        # utilities = lambda_item + (beta_user[u].view(1, -1).expand(num_items, -1) * item_obs).sum(dim=-1) + (gamma_constant.view(1, -1).expand(num_items, -1) * session_obs[s].view(1, -1).expand(num_items, -1)).sum(dim=-1)
        utilities = lambda_item
        p = torch.nn.functional.softmax(utilities, dim=0).detach().numpy()
        item_index.append(np.random.choice(num_items, p=p))
        # item_index.append(int(np.argmax(utilities)))
    else:
        item_index.append(int(np.random.choice(num_items, size=1)))
item_index = torch.LongTensor(item_index)

100%|██████████| 50000/50000 [00:00<00:00, 64854.88it/s]


In [155]:
df = pd.DataFrame(data={'item_index': item_index, 'user_index': user_index, 'session_index': session_index})
df.to_csv('./benchmark_data/choice_data.csv', index=False)

In [143]:
dataset = ChoiceDataset(item_index=item_index, user_index=user_index, session_index=session_index, item_obs=item_obs, user_obs=user_obs, session_obs=session_obs, num_items=num_items)
dataset

ChoiceDataset(label=[], item_index=[50000], provided_num_items=[1], user_index=[50000], session_index=[50000], item_availability=[], item_obs=[10, 3], user_obs=[5, 3], session_obs=[1, 3], device=cpu)

In [151]:
# model = ConditionalLogitModel(formula='(1|item-full) + (item_obs|user) + (session_obs|constant)', dataset=dataset, num_items=num_items, num_users=num_users)
model = ConditionalLogitModel(formula='(1|item)', dataset=dataset, num_items=num_items, num_users=num_users)
print(np.mean((model(dataset).argmax(dim=1) == item_index).float().numpy()))
model

0.00134


ConditionalLogitModel(
  (coef_dict): ModuleDict(
    (intercept): Coefficient(variation=item, num_items=10, num_users=5, num_params=1, 9 trainable parameters in total, device=cpu).
  )
)
Conditional logistic discrete choice model, expects input features:

X[intercept] with 1 parameters, with item level variation.
device=cpu

In [154]:
model = run(model, dataset, batch_size=-1, learning_rate=0.3 , num_epochs=1000, compute_std=False)
np.mean((model(dataset).argmax(dim=1) == item_index).float().numpy())

ConditionalLogitModel(
  (coef_dict): ModuleDict(
    (intercept): Coefficient(variation=item, num_items=10, num_users=5, num_params=1, 9 trainable parameters in total, device=cpu).
  )
)
Conditional logistic discrete choice model, expects input features:

X[intercept] with 1 parameters, with item level variation.
device=cpu
ChoiceDataset(label=[], item_index=[50000], provided_num_items=[1], user_index=[50000], session_index=[50000], item_availability=[], item_obs=[10, 3], user_obs=[5, 3], session_obs=[1, 3], device=cpu)
Epoch 100: Log-likelihood=-81310.9375
Epoch 200: Log-likelihood=-81305.359375
Epoch 300: Log-likelihood=-81305.2421875
Epoch 400: Log-likelihood=-81305.2265625
Epoch 500: Log-likelihood=-81305.234375
Epoch 600: Log-likelihood=-81305.2265625
Epoch 700: Log-likelihood=-81308.171875
Epoch 800: Log-likelihood=-81305.2265625
Epoch 900: Log-likelihood=-81339.671875
Epoch 1000: Log-likelihood=-81305.234375


0.40572

# Verify Parameter

In [146]:
beta_user

tensor([[4.7301, 6.1908, 7.1181],
        [3.4178, 8.8197, 4.9632],
        [4.9116, 4.5997, 0.7213],
        [8.3757, 0.5155, 4.8729],
        [8.5097, 6.4045, 2.3534]])

In [147]:
model.coef_dict['item_obs'].coef

KeyError: 'item_obs'

In [148]:
lambda_item

tensor([0.0000, 6.0579, 0.8783, 7.2887, 6.3035, 1.2217, 4.7925, 6.6317, 4.6998,
        5.0522])

In [149]:
model.coef_dict['intercept'].coef.squeeze()

tensor([ 3.1896, -1.4328,  4.4139,  3.4304, -1.5620,  1.9348,  3.7506,  1.8674,
         2.2129], grad_fn=<SqueezeBackward0>)

In [137]:
gamma_constant

tensor([1.1350, 8.2167, 7.8468])

In [138]:
model.coef_dict['session_obs'].coef.squeeze()

tensor([-1.0625, -2.9416, -0.7111], grad_fn=<SqueezeBackward0>)

In [153]:
np.mean((model(dataset).argmax(dim=1) == item_index).float().numpy())

0.40572