In [1]:
%env PYTORCH_ENABLE_MPS_FALLBACK=1

env: PYTORCH_ENABLE_MPS_FALLBACK=1


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import torch, torchvision

from torch_choice.data import ChoiceDataset
from bemb.model import LitBEMBFlex



# Using BEMB Model on the MNIST Dataset
Even though BEMB was designed for factorizing matrices, it works on more traditional classification tasks such as the MNIST dataset.

## Step 1. Download the MNIST Dataset
The `torchvision` module provides an easy way to access the MNIST dataset of hand-written digits.

In [3]:
mnist_train = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=None)
mnist_test = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=None)

In [4]:
print(f'{mnist_train.data.shape=:}')
print(f'{mnist_train.targets.shape=:}')
print(f'{mnist_test.data.shape=:}')
print(f'{mnist_test.targets.shape=:}')

mnist_train.data.shape=torch.Size([60000, 28, 28])
mnist_train.targets.shape=torch.Size([60000])
mnist_test.data.shape=torch.Size([10000, 28, 28])
mnist_test.targets.shape=torch.Size([10000])


In [5]:
X = torch.cat([mnist_train.data.reshape(60000, -1), mnist_test.data.reshape(10000, -1)], dim=0)
y = torch.cat([mnist_train.targets, mnist_test.targets], dim=0)

Define all features as user features.

In [6]:
train_index = torch.arange(60000)
test_index = torch.arange(60000, 60000 + 10000)

In [7]:
N_train = 60000
N_test = 10000
N = N_train + N_test

In [8]:
DEVICE = 'mps'

In [9]:
dataset = ChoiceDataset(user_index=torch.arange(N), item_index=y, user_obs=X)
# dataset = ChoiceDataset(user_index=torch.zeros(N), session_index=torch.arange(N), item_index=y, session_obs=X).to(DEVICE)
# we don't have a validation set.
dataset_list = [dataset[train_index], dataset[test_index], dataset[test_index]]
dataset_list

No `session_index` is provided, assume each choice instance is in its own session.


[ChoiceDataset(label=[], item_index=[60000], user_index=[60000], session_index=[60000], item_availability=[], user_obs=[70000, 784], device=cpu),
 ChoiceDataset(label=[], item_index=[10000], user_index=[10000], session_index=[10000], item_availability=[], user_obs=[70000, 784], device=cpu),
 ChoiceDataset(label=[], item_index=[10000], user_index=[10000], session_index=[10000], item_availability=[], user_obs=[70000, 784], device=cpu)]

## Construct the BEMB Model

In [10]:
bemb = LitBEMBFlex(
    learning_rate=0.01,  # set the learning rate, feel free to play with different levels.
    pred_item=True,  # let the model predict item_index, don't change this one.
    num_seeds=4,  # number of Monte Carlo samples for estimating the ELBO.
    utility_formula='alpha_item + beta_item * user_obs',  # the utility formula.
    num_users=N,
    num_items=10,
    # num_user_obs=dataset.user_obs.shape[1],
    obs2prior_dict={'alpha_item': False, 'beta_item': False},
    # the dimension of latents, since the utility is an inner product of theta and alpha, they should have
    # the same dimension.
    coef_dim_dict={'alpha_item': 1, 'beta_item': 28**2},
).to(DEVICE)

BEMB: utility formula parsed:
[{'coefficient': ['alpha_item'], 'observable': None},
 {'coefficient': ['beta_item'], 'observable': 'user_obs'}]


## Fit the BEMB Model

In [11]:
bemb = bemb.fit_model(dataset_list, batch_size=len(dataset) // 20, num_epochs=50, num_workers=0)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /Users/tianyudu/Development/bemb/tutorials/mnist/lightning_logs

  | Name  | Type     | Params
-----------------------------------
0 | model | BEMBFlex | 15.7 K
-----------------------------------
15.7 K    Trainable params
0         Non-trainable params
15.7 K    Total params
0.063     Total estimated model params size (MB)


Bayesian EMBedding Model with U[user, item, session] = alpha_item + beta_item * user_obs
Total number of parameters: 15700.
With the following coefficients:
ModuleDict(
  (alpha_item): BayesianCoefficient(num_classes=10, dimension=1, prior=N(0, I))
  (beta_item): BayesianCoefficient(num_classes=10, dimension=784, prior=N(0, I))
)
[]
[Training dataset] ChoiceDataset(label=[], item_index=[60000], user_index=[60000], session_index=[60000], item_availability=[], user_obs=[70000, 784], device=cpu)
[Validation dataset] ChoiceDataset(label=[], item_index=[10000], user_index=[10000], session_index=[10000], item_availability=[], user_obs=[70000, 784], device=cpu)
[Testing dataset] ChoiceDataset(label=[], item_index=[10000], user_index=[10000], session_index=[10000], item_availability=[], user_obs=[70000, 784], device=cpu)
                                                                           

  rank_zero_warn(
  rank_zero_warn(


Epoch 49: 100%|██████████| 21/21 [00:03<00:00,  5.81it/s, loss=1.24e+06, v_num=0, val_acc=0.914, val_ll=-2.95]

  rank_zero_warn(



time taken: 187.7522497177124
Testing DataLoader 0: 100%|██████████| 29/29 [00:00<00:00, 121.75it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc                   0.914
         test_ll            -2.949439179197525
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
