# Introduction
This is a very short tutorial on using `torch-choice` package.

Author: Tianyu Du

Date: Jun. 22, 2022

Update: Jun. 22, 2022

In [1]:
__author__ = 'Tianyu Du'

In [2]:
import pandas as pd
import torch
import torch_choice
from torch_choice.model import ConditionalLogitModel
from torch_choice.utils.easy_data_wrapper import EasyDatasetWrapperV2
from torch_choice.utils.run_helper import run

In [3]:
df_raw = pd.read_csv('./public_datasets/ModeCanada.csv')
df_raw = df_raw.query('noalt == 4').reset_index(drop=True)
df_raw.sort_values(by='case', inplace=True)
df_raw.head()

Unnamed: 0.1,Unnamed: 0,case,alt,choice,dist,cost,ivt,ovt,freq,income,urban,noalt
0,304,109,train,0,377,58.25,215,74,4,45,0,4
1,305,109,air,1,377,142.8,56,85,9,45,0,4
2,306,109,bus,0,377,27.52,301,63,8,45,0,4
3,307,109,car,0,377,71.63,262,0,0,45,0,4
4,308,110,train,0,377,58.25,215,74,4,70,0,4


In [4]:
df = pd.DataFrame(data={'purchase': df_raw['choice'],
                        'purchase_record': df_raw['case'],
                        'item_name': df_raw['alt'],
                        'user_index': df_raw['case'],
                        'session_index': df_raw['case']})

df = pd.concat([df, df_raw[['dist', 'cost', 'ivt', 'ovt', 'freq', 'income', 'urban']]], axis=1)
df.head()

Unnamed: 0,purchase,purchase_record,item_name,user_index,session_index,dist,cost,ivt,ovt,freq,income,urban
0,0,109,train,109,109,377,58.25,215,74,4,45,0
1,1,109,air,109,109,377,142.8,56,85,9,45,0
2,0,109,bus,109,109,377,27.52,301,63,8,45,0
3,0,109,car,109,109,377,71.63,262,0,0,45,0
4,0,110,train,110,110,377,58.25,215,74,4,70,0


# Tell the `EasyDatasetWrapper` about observables

1. price observable: cost, freq, ovt, ivt
2. session observables: income.

In [5]:
df.head()

Unnamed: 0,purchase,purchase_record,item_name,user_index,session_index,dist,cost,ivt,ovt,freq,income,urban
0,0,109,train,109,109,377,58.25,215,74,4,45,0
1,1,109,air,109,109,377,142.8,56,85,9,45,0
2,0,109,bus,109,109,377,27.52,301,63,8,45,0
3,0,109,car,109,109,377,71.63,262,0,0,45,0
4,0,110,train,110,110,377,58.25,215,74,4,70,0


In [6]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'{device=:}')

device=cpu


In [7]:
data = EasyDatasetWrapperV2(main_data=df,
                          # TODO: better naming convention? Need to discuss.
                          # after discussion, we add it to the default value
                          # in the data wrapper class.
                          # these are just names.
                          purchase_record_column='purchase_record',
                          choice_column='purchase',
                          item_name_column='item_name',
                          user_index_column='user_index',
                          session_index_column='session_index',
                          # it can be derived from columns of the dataframe or supplied as 
                          # separate dataframe.
                          session_observable_columns=['income'],
                          price_observable_columns=['cost', 'freq', 'ovt', 'ivt'],
                          device=device)

Creating choice dataset from stata format data-frames...


In [8]:
data.summary()

* Space of 4 items:
              0    1    2      3
item name  air  bus  car  train
* Number of purchase records/cases: 2779.
* Preview of main data frame:
       purchase  purchase_record item_name  user_index  session_index  dist  \
0             0              109     train         109            109   377   
1             1              109       air         109            109   377   
2             0              109       bus         109            109   377   
3             0              109       car         109            109   377   
4             0              110     train         110            110   377   
...         ...              ...       ...         ...            ...   ...   
11109         0             4320       air        4320           4320   342   
11113         0             4321       air        4321           4321   388   
11114         0             4321       bus        4321           4321   388   
11112         0             4321     train        432

In [10]:
model = ConditionalLogitModel(coef_variation_dict={'price_cost': 'constant',
                                                   'price_freq': 'constant',
                                                   'price_ovt': 'constant',
                                                   'session_income': 'item',
                                                   'price_ivt': 'item-full',
                                                   'intercept': 'item'},
                              num_param_dict={'price_cost': 1,
                                              'price_freq': 1,
                                              'price_ovt': 1,
                                              'session_income': 1,
                                              'price_ivt': 1,
                                              'intercept': 1},
                              num_items=4)

In [13]:
run(model, data.choice_dataset, num_epochs=5000, learning_rate=0.01, batch_size=-1)

ConditionalLogitModel(
  (coef_dict): ModuleDict(
    (price_cost): Coefficient(variation=constant, num_items=4, num_users=None, num_params=1, 1 trainable parameters in total).
    (price_freq): Coefficient(variation=constant, num_items=4, num_users=None, num_params=1, 1 trainable parameters in total).
    (price_ovt): Coefficient(variation=constant, num_items=4, num_users=None, num_params=1, 1 trainable parameters in total).
    (session_income): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total).
    (price_ivt): Coefficient(variation=item-full, num_items=4, num_users=None, num_params=1, 4 trainable parameters in total).
    (intercept): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total).
  )
)
Conditional logistic discrete choice model, expects input features:

X[price_cost] with 1 parameters, with constant level variation.
X[price_freq] with 1 parameters, with constant level va

ConditionalLogitModel(
  (coef_dict): ModuleDict(
    (price_cost): Coefficient(variation=constant, num_items=4, num_users=None, num_params=1, 1 trainable parameters in total).
    (price_freq): Coefficient(variation=constant, num_items=4, num_users=None, num_params=1, 1 trainable parameters in total).
    (price_ovt): Coefficient(variation=constant, num_items=4, num_users=None, num_params=1, 1 trainable parameters in total).
    (session_income): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total).
    (price_ivt): Coefficient(variation=item-full, num_items=4, num_users=None, num_params=1, 4 trainable parameters in total).
    (intercept): Coefficient(variation=item, num_items=4, num_users=None, num_params=1, 3 trainable parameters in total).
  )
)
Conditional logistic discrete choice model, expects input features:

X[price_cost] with 1 parameters, with constant level variation.
X[price_freq] with 1 parameters, with constant level va