In [1]:
# import required dependencies.
from tqdm import tqdm
import numpy as np
import pandas as pd
import torch
from torch_choice.data import ChoiceDataset, JointDataset

  Referenced from: <B3E58761-2785-34C6-A89B-F37110C88A05> /Users/tianyudu/miniforge3/envs/dev/lib/python3.9/site-packages/torchvision/image.so
  Expected in:     <AE6DCE26-A528-35ED-BB3D-88890D27E6B9> /Users/tianyudu/miniforge3/envs/dev/lib/python3.9/site-packages/torch/lib/libtorch_cpu.dylib
  warn(f"Failed to load image Python extension: {e}")


In [2]:
# let's get a helper
def print_dict_shape(d):
    for key, val in d.items():
        if torch.is_tensor(val):
            print(f'dict.{key}.shape={val.shape}')

## Creating  `ChoiceDataset` Object

In [3]:
# Feel free to modify it as you want.
num_users = 10
num_items = 4
num_sessions = 500

length_of_dataset = 10000

In [4]:
# create observables/features, the number of parameters are arbitrarily chosen.
# generate 128 features for each user, e.g., race, gender.
user_obs = torch.randn(num_users, 128)
# generate 64 features for each user, e.g., quality.
item_obs = torch.randn(num_items, 64)
# generate 32 features for each user item pair.
useritem_obs = torch.randn(num_users, num_items, 32)
# generate 10 features for each session, e.g., weekday indicator.
session_obs = torch.randn(num_sessions, 10)
# generate 12 features for each session user pair, e.g., the budget of that user at the shopping day.
price_obs = torch.randn(num_sessions, num_items, 12)
# generate 16 useritemsession observable.
usersessionitem_obs = torch.randn(num_users, num_sessions, num_items, 16)

We then generate random observable tensors for users, items, sessions and price observables, the size of observables of each type (i.e., the last dimension in the shape) is arbitrarily chosen.

**Notes on Encodings** Since we will be using PyTorch to train our model, we represent their identities with *consecutive* integer values instead of the raw human-readable names of items (e.g., Dell 24-inch LCD monitor). Similarly, you would need to encode user indices and session indices as well.
Raw item names can be encoded easily with [sklearn.preprocessing.LabelEncoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html) (The [sklearn.preprocessing.OrdinalEncoder](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OrdinalEncoder.html) works as well).

In [5]:
item_index = torch.LongTensor(np.random.choice(num_items, size=length_of_dataset))
user_index = torch.LongTensor(np.random.choice(num_users, size=length_of_dataset))
session_index = torch.LongTensor(np.random.choice(num_sessions, size=length_of_dataset))

# assume all items are available in all sessions.
item_availability = torch.ones(num_sessions, num_items).bool()

### Step 2: Initialize the `ChoiceDataset`.
You can construct a choice set using the following code, which manage all information for you.

In [6]:
dataset = ChoiceDataset(
    # pre-specified keywords of __init__
    item_index=item_index,  # required.
    # optional:
    user_index=user_index,
    session_index=session_index,
    item_availability=item_availability,
    user_obs=user_obs,
    item_obs=item_obs,
    useritem_obs=useritem_obs,
    session_obs=session_obs,
    price_obs=price_obs,
    usersessionitem_obs=usersessionitem_obs)

## What you can do with the `ChoiceDataset`?

### `print(dataset)` and `dataset.__str__`
The command `print(dataset)` will provide a quick overview of shapes of tensors included in the object as well as where the dataset is located (i.e., host memory or GPU memory).

In [7]:
print(dataset)

ChoiceDataset(label=[], item_index=[10000], user_index=[10000], session_index=[10000], item_availability=[500, 4], user_obs=[10, 128], item_obs=[4, 64], useritem_obs=[10, 4, 32], session_obs=[500, 10], price_obs=[500, 4, 12], usersessionitem_obs=[10, 500, 4, 16], device=cpu)


# Confirm Correctness using the `x_dict` Method
The port between `ChoiceDataset` and model classes is a method called `x_dict` in the dataset. The `x_dict` method compiles all information in the dataset into a dictionary, with observable names as keys, and all values of this dictionary are tensors with shape `(length_of_dataset, num_items, *. The `*` denotes the number of corresponding observables and could be different for different observable tensors.

In [8]:
print_dict_shape(dataset.x_dict)

dict.user_obs.shape=torch.Size([10000, 4, 128])
dict.item_obs.shape=torch.Size([10000, 4, 64])
dict.useritem_obs.shape=torch.Size([10000, 4, 32])
dict.session_obs.shape=torch.Size([10000, 4, 10])
dict.price_obs.shape=torch.Size([10000, 4, 12])
dict.usersessionitem_obs.shape=torch.Size([10000, 4, 16])


In [9]:
# check the `x_dict` indeed have what we are expecting, test 10 random records.
for n in tqdm(np.random.choice(length_of_dataset, 10)):
    u = user_index[n]
    s = session_index[n]

    for i in range(num_items):
        for k in range(128):
            expected = user_obs[u, k]
            got = dataset.x_dict["user_obs"][n, i, k]
            assert expected == got

        for k in range(64):
            expected = item_obs[i, k]
            got = dataset.x_dict["item_obs"][n, i, k]
            assert expected == got

        for k in range(32):
            expected = useritem_obs[u, i, k]
            got = dataset.x_dict["useritem_obs"][n, i, k]
            assert expected == got

        for k in range(10):
            expected = session_obs[s, k]
            got = dataset.x_dict["session_obs"][n, i, k]
            assert expected == got


        for k in range(12):
            expected = price_obs[s, i, k]
            got = dataset.x_dict["price_obs"][n, i, k]
            assert expected == got

        for k in range(16):
            expected = usersessionitem_obs[u, s, i, k]
            got = dataset.x_dict["usersessionitem_obs"][n, i, k]
            assert expected == got

print("all good!")

100%|██████████| 10/10 [00:17<00:00,  1.74s/it]

all good!





# Build a CLM Model

In [10]:
from time import time
from torch_choice.model import ConditionalLogitModel
from torch_choice import run

In [11]:
model = ConditionalLogitModel(
    formula='(user_obs|item) + (item_obs|user) + (useritem_obs|constant) + (session_obs|item) + (price_obs|constant) + (usersessionitem_obs|constant) + (intercept|item)',
    dataset=dataset,
    num_users=num_users,
    num_items=num_items)

In [12]:
model

ConditionalLogitModel(
  (coef_dict): ModuleDict(
    (user_obs[item]): Coefficient(variation=item, num_items=4, num_users=10, num_params=128, 384 trainable parameters in total, device=cpu).
    (item_obs[user]): Coefficient(variation=user, num_items=4, num_users=10, num_params=64, 640 trainable parameters in total, device=cpu).
    (useritem_obs[constant]): Coefficient(variation=constant, num_items=4, num_users=10, num_params=32, 32 trainable parameters in total, device=cpu).
    (session_obs[item]): Coefficient(variation=item, num_items=4, num_users=10, num_params=10, 30 trainable parameters in total, device=cpu).
    (price_obs[constant]): Coefficient(variation=constant, num_items=4, num_users=10, num_params=12, 12 trainable parameters in total, device=cpu).
    (usersessionitem_obs[constant]): Coefficient(variation=constant, num_items=4, num_users=10, num_params=16, 16 trainable parameters in total, device=cpu).
    (intercept[item]): Coefficient(variation=item, num_items=4, num_us

In [13]:
start_time = time()
run(model, dataset, num_epochs=10, learning_rate=0.01, model_optimizer="Adam", batch_size=-1)
print('Time taken:', time() - start_time)

GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


ConditionalLogitModel(
  (coef_dict): ModuleDict(
    (user_obs[item]): Coefficient(variation=item, num_items=4, num_users=10, num_params=128, 384 trainable parameters in total, device=cpu).
    (item_obs[user]): Coefficient(variation=user, num_items=4, num_users=10, num_params=64, 640 trainable parameters in total, device=cpu).
    (useritem_obs[constant]): Coefficient(variation=constant, num_items=4, num_users=10, num_params=32, 32 trainable parameters in total, device=cpu).
    (session_obs[item]): Coefficient(variation=item, num_items=4, num_users=10, num_params=10, 30 trainable parameters in total, device=cpu).
    (price_obs[constant]): Coefficient(variation=constant, num_items=4, num_users=10, num_params=12, 12 trainable parameters in total, device=cpu).
    (usersessionitem_obs[constant]): Coefficient(variation=constant, num_items=4, num_users=10, num_params=16, 16 trainable parameters in total, device=cpu).
    (intercept[item]): Coefficient(variation=item, num_items=4, num_us

  rank_zero_warn(
  rank_zero_warn(

  | Name  | Type                  | Params
------------------------------------------------
0 | model | ConditionalLogitModel | 1.1 K 
------------------------------------------------
1.1 K     Trainable params
0         Non-trainable params
1.1 K     Total params
0.004     Total estimated model params size (MB)
  rank_zero_warn(


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 44.40it/s, loss=7.1e+04, v_num=44] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 1/1 [00:00<00:00, 39.46it/s, loss=7.1e+04, v_num=44]
Time taken for training: 0.9634578227996826
Skip testing, no test dataset is provided.
Log-likelihood: [Training] -56230.62890625, [Validation] N/A, [Test] N/A

| Coefficient                      |   Estimation |   Std. Err. |       z-value |      Pr(>|z|) | Significance   |
|:---------------------------------|-------------:|------------:|--------------:|--------------:|:---------------|
| user_obs[item]_0                 | -0.0889893   | 139.667     |  -0.000637154 |   0.999492    |                |
| user_obs[item]_1                 | -0.0341076   | nan         | nan           | nan           |                |
| user_obs[item]_2                 | -0.0982556   | nan         | nan           | nan           |                |
| user_obs[item]_3                 |  0.0670713   |  18.9429    |   0.00354071  |   0.997175    |                |
| user_obs[item]_4                 |  0.0836471   | nan         | nan 