## Introduction to basket modelling with SHOPPER
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/artefactory/choice-learn/blob/main/notebooks/basket_models/shopper_tutorial.ipynb)

We use a synthetic dataset to demonstrate how to use the SHOPPER model [1].

In [None]:
# Install necessary requirements

# If you run this notebook on Google Colab, or in standalone mode, you need to install the required packages.
# Uncomment the following lines:

# !pip install choice-learn

# If you run the notebook within the GitHub repository, you need to run the following lines, that can skipped otherwise:
import os
import sys

sys.path.append("../../")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf

from sklearn.manifold import TSNE

from choice_learn.basket_models.trip_dataset import Trip, TripDataset
from choice_learn.basket_models.shopper import Shopper

In [None]:
# List all physical GPUs
physical_gpus = tf.config.list_physical_devices("GPU")
print(f"Available physical GPUs: {physical_gpus}")
# Select GPUs to use
selected_gpus = []  # Choose the GPUs you want to use ([] = CPU)
# Set the selected GPUs to be visible
tf.config.set_visible_devices(selected_gpus, "GPU")
# Verify the visible GPUs
visible_gpus = tf.config.get_visible_devices("GPU")
print(f"Visible GPUs: {visible_gpus}")

# Limit GPU memory growth
if physical_gpus:
  try:
    for gpu in physical_gpus:
      # Allocate only as much GPU memory as needed for the runtime allocations
      tf.config.experimental.set_memory_growth(gpu, True)
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

### Dataset

First, we generate a TripDataset with an interaction matrix.

In [None]:
n_items = 9  # Include the checkout item 0

In [None]:
def create_interaction_matrix(n: int) -> np.ndarray:
    """Generate a random interaction matrix of size n x n.
    
    The matrix is symmetric and the diagonal is filled with zeros.
    The matrix is divided into two parts:
    - Complementary pairs (positive values)
    - Substitutable pairs (negative values)

    Parameters
    ----------
    n: int
        Size of the square matrix
    
    Returns
    -------
    matrix: np.ndarray
        The interaction matrix
    """
    matrix = np.zeros((n, n))
    
    for i in range(n):
        for j in range(i + 1):
            # Generate a random value from a normal distribution
            matrix[i, j] = np.random.normal(loc=0.0, scale=1.0)

            # Copy the lower triangle to the upper triangle
            matrix[j, i] = matrix[i, j]

        # Same item
        matrix[i, i] = 0
    
    return matrix

In [None]:
interaction_matrix = create_interaction_matrix(n_items)

print(interaction_matrix)

In [None]:
def plot_interaction_matrix(matrix: np.ndarray, half: bool = False) -> None:
    """Plot the interaction matrix.
    
    The matrix is displayed with a color map and the value
    of each cell is displayed inside the cell.

    Parameters
    ----------
    matrix: np.ndarray
        The interaction matrix to plot
    half: bool, optional
        If True, only the lower half of the matrix is displayed
        By default False

    Returns
    -------
    matrix: np.ndarray
        The interaction matrix
    """
    if half:
        # Mask: elements below the k-th diagonal are set to False, rest to True
        mask = np.triu(np.ones_like(matrix, dtype=bool), k=1)
        matrix = np.ma.array(matrix, mask=mask)

    plt.figure(figsize=(8, 6))
    plt.imshow(matrix, cmap='coolwarm', interpolation='none')
    plt.colorbar(label='Interaction Value')
    plt.title('Interaction Matrix')
    plt.xlabel('Item B')
    plt.ylabel('Item A')
    plt.xticks(ticks=np.arange(matrix.shape[0]), labels=np.arange(matrix.shape[0]))
    plt.yticks(ticks=np.arange(matrix.shape[1]), labels=np.arange(matrix.shape[1]))

    # Display the value inside each case
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            if not half or (half and j <= i):
                plt.text(j, i, f'{matrix[i, j]:.2f}', ha='center', va='center', color='black')
    
    # Color the cases in black for checkout item
    for i in range(matrix.shape[0]):
        plt.gca().add_patch(plt.Rectangle((i-0.5, -0.5), 1, 1, fill=True, color="black", alpha=0.3))
        plt.gca().add_patch(plt.Rectangle((-0.5, i-0.5), 1, 1, fill=True, color="black", alpha=0.3))
    
    plt.show()

In [None]:
plot_interaction_matrix(interaction_matrix, half=True)  # 0 is the checkout item

In [None]:
plot_interaction_matrix(interaction_matrix, half=False)

In [None]:
def generate_baskets(
    interaction_matrix: np.ndarray,
    assortment: np.ndarray,
    num_baskets: int
) -> list:
    """Generate baskets based on the interaction matrix.

    Size of the baskets: between 3 and  n_items - 4 (including the checkout item)

    Parameters
    ----------
    interaction_matrix: np.ndarray
        The interaction matrix to plot
    assortment: np.ndarray
        The assortment of items available in the store
    num_baskets: int
        The number of baskets to generate

    Returns
    -------
    baskets: list
        List of generated baskets
    """
    n_items = interaction_matrix.shape[0]
    
    baskets = []
    for _ in range(num_baskets):
        # Build the list of available items from the assortment
        available_items = np.array([item_id for item_id in range(interaction_matrix.shape[0]) if assortment[item_id] == 1])

        # Start with a random item (different from the checkout item)
        basket = np.array([np.random.choice(available_items[available_items != 0])])

        # Complete the basket with a new item until the checkout item is selected
        item_a, item_b = basket[-1], None
        while item_b != 0:
            # Get probabilities of the next item given the previous item
            # by applying the softmax function to the interaction values
            # of the row corresponding to the previous item
            probabilities = np.exp(interaction_matrix[item_a, :])
            probabilities /= probabilities.sum()

            while (item_b is None) or (item_b == item_a) or (item_b not in available_items) or (item_b in basket):
                # No duplicate items
                item_b = np.random.choice(n_items, p=probabilities)
            
            basket = np.append(basket, item_b)

            if len(basket) == n_items - 4:
                # Don't generate baskets of size > n_items - 4 to enable negative sampling
                # with 2 negative samples while considering different assortments
                if basket[-1]:
                    # Impose the checkout item as the last item
                    basket[-1] = 0
                break

        baskets.append(basket)

    return baskets


In [None]:
a1, a2= np.ones(n_items), np.ones(n_items)
a1[3], a1[6] = 0, 0
a2[2], a2[4] = 0, 0

assortments = np.array([a1, a2])

readable_assortment_1 = [f"Item {i}" for i in range(n_items) if a1[i] == 1]
readable_assortment_2 = [f"Item {i}" for i in range(n_items) if a2[i] == 1]

In [None]:
print(f"Assortments are encoded as availability matrices indicating the availability (1) or not (0) of the products:\n{assortments=}\n")
print(
    "Here, the variable 'assortments' can be read as:\n",
    f"- Assortment 1 = {readable_assortment_1}\n",
    f"- Assortment 2 = {readable_assortment_2}"
)

In [None]:
num_baskets = 1000

purchases_assortment_1 = generate_baskets(
    interaction_matrix=interaction_matrix,
    assortment=assortments[0],
    # Half of the baskets with assortment 1
    num_baskets=num_baskets // 2,
)
purchases_assortment_2 = generate_baskets(
    interaction_matrix=interaction_matrix,
    assortment=assortments[1],
    # Half of the baskets with assortment 2
    num_baskets=num_baskets // 2,
)

In [None]:
print(f"{len(purchases_assortment_1)=}, {len(purchases_assortment_2)=}\n")

print(f"First baskets from assortment {a1}: {purchases_assortment_1[:10]}")
print(f"First baskets from assortment {a2}: {purchases_assortment_2[:10]}\n")

min_length_purchases_assortment_1, max_length_purchases_assortment_1= min(len(basket) for basket in purchases_assortment_1), max(len(basket) for basket in purchases_assortment_1)
print(f"Minimum and maximum lengths of arrays in baskets from assortment {a1}: {min_length_purchases_assortment_1} & {max_length_purchases_assortment_1}")
min_length_purchases_assortment_2, max_length_purchases_assortment_2 = min(len(basket) for basket in purchases_assortment_2), max(len(basket) for basket in purchases_assortment_2)
print(f"Minimum and maximum lengths of arrays in baskets from assortment {a2}: {min_length_purchases_assortment_2} & {max_length_purchases_assortment_2}")

In [None]:
nrows = 1

customer = 0
week = 0
prices = np.arange(1, n_items + 1) * 1.1

trip_list_assortment_1 = list(
    np.concatenate(
        [
            [Trip(id=i, purchases=basket, customer=customer, week=week, prices=prices, assortment=0)
            for i in range(k*nrows, (k + 1)*nrows)] for k, basket in enumerate(purchases_assortment_1)
        ]
    )
)

trip_list_assortment_2 = list(
    np.concatenate(
        [
            [Trip(id=i, purchases=basket, customer=customer, week=week, prices=prices, assortment=1)
            for i in range(k*nrows, (k + 1)*nrows)] for k, basket in enumerate(purchases_assortment_2)
        ]
    )
)

We split the dataset into train and test subsets.

In [None]:
split_ratio = 0.8
threshold_1, threshold_2 = int(len(trip_list_assortment_1) * split_ratio), int(len(trip_list_assortment_2) * split_ratio)
trip_list_train = trip_list_assortment_1[:threshold_1] + trip_list_assortment_2[:threshold_2]
trip_list_val = trip_list_assortment_1[threshold_1:] + trip_list_assortment_2[threshold_2:]

In [None]:
trip_dataset_train = TripDataset(trips=trip_list_train, assortments=assortments)
trip_dataset_val = TripDataset(trips=trip_list_val, assortments=assortments)

n_items_train, n_customers_train = trip_dataset_train.n_items, trip_dataset_train.n_customers
n_items_val, n_customers_val = trip_dataset_val.n_items, trip_dataset_val.n_customers

print(f"{len(trip_dataset_train)=}, {n_items_train=}, {n_customers_train=}\n")
print(f"{len(trip_dataset_val)=}, {n_items_val=}, {n_customers_val=}")

### Training Shopper models

Now we can fit several SHOPPER models with different utility formulas taking into account different effects (price elasticity, seasonality, etc.). We will use the same learning rate and number of epochs for all models.

In [None]:
# Hyperparameters
latent_sizes = {"preferences": 4, "price": 3, "season": 3}
n_negative_samples = 2
optimizer = "adam"
lr = 1e-3
epochs = 100
batch_size = 256

In [None]:
# 1st model: basic utility formula
shopper_1 = Shopper(
    item_popularity=False,
    price_effects=False,
    seasonal_effects=False,
    think_ahead=False,
    latent_sizes=latent_sizes,
    n_negative_samples=n_negative_samples,
    optimizer=optimizer,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)

# 2nd model: price effects
shopper_2 = Shopper(
    item_popularity=False,
    price_effects=True,
    seasonal_effects=False,
    think_ahead=False,
    latent_sizes=latent_sizes,
    n_negative_samples=n_negative_samples,
    optimizer=optimizer,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)

# 3rd model: item popularity + price effects + seasonal effects
shopper_3 = Shopper(
    item_popularity=True,
    price_effects=True,
    seasonal_effects=True,
    think_ahead=False,
    latent_sizes=latent_sizes,
    n_negative_samples=n_negative_samples,
    optimizer=optimizer,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)

# Feel free to explore other models by changing the hyperparameters!

In [None]:
# Instantiate the models
shopper_1.instantiate(n_items=n_items_train, n_customers=n_customers_train)
shopper_2.instantiate(n_items=n_items_train, n_customers=n_customers_train)
shopper_3.instantiate(n_items=n_items_train, n_customers=n_customers_train)

In [None]:
# Train the models
history_1, history_2, history_3 = shopper_1.fit(trip_dataset=trip_dataset_train), shopper_2.fit(trip_dataset=trip_dataset_train), shopper_3.fit(trip_dataset=trip_dataset_train)

In [None]:
plt.plot(history_1["train_loss"], label=f"1st model: basic utility formula")
plt.plot(history_2["train_loss"], label=f"2nd model: price effects")
plt.plot(history_3["train_loss"], label=f"3rd model: item popularity + price effects + seasonal effects")
plt.xlabel("Epoch")
plt.ylabel("Training Loss")
plt.legend()
plt.title("Training of SHOPPER models")
plt.show()

### Visualize the embeddings

We display item popularity for the models that include this effect in their utility formula.

In [None]:
# Calculate the frequency of each item in the datasets
item_counts_train = np.zeros(n_items)
for basket in purchases_assortment_1 + purchases_assortment_2:
    for item in basket:
        item_counts_train[item] += 1

# Normalize the counts to get frequencies
item_frequencies_train = item_counts_train / item_counts_train.sum()

if shopper_1.item_popularity:
    plt.figure(figsize=(8, 4))

    # Plot ground truth item frequencies
    plt.subplot(1, 2, 1)
    plt.bar(range(len(item_frequencies_train)), item_frequencies_train)
    plt.title('Ground truth item frequencies\n(train dataset)')
    plt.xlabel('Item')
    plt.ylabel('Frequency')
    plt.xticks(ticks=range(len(item_frequencies_train)), labels=range(len(item_frequencies_train)))

    # Plot lambdas for 1st model
    plt.subplot(1, 2, 2)
    plt.bar(range(len(shopper_1.lambda_.numpy())), np.exp(shopper_1.lambda_.numpy()))
    plt.axhline(y=1, color='r', linestyle='--')
    plt.title('Item popularity for 1st model')
    plt.xlabel('Item')
    plt.ylabel('exp(lambda)')
    plt.xticks(ticks=range(len(shopper_1.lambda_.numpy())), labels=range(len(shopper_1.lambda_.numpy())))

    plt.tight_layout()

if shopper_2.item_popularity:
    plt.figure(figsize=(8, 4))

    # Plot ground truth item frequencies
    plt.subplot(1, 2, 1)
    plt.bar(range(len(item_frequencies_train)), item_frequencies_train)
    plt.title('Ground truth item frequencies\n(train dataset)')
    plt.xlabel('Item')
    plt.ylabel('Frequency')
    plt.xticks(ticks=range(len(item_frequencies_train)), labels=range(len(item_frequencies_train)))

    # Plot lambdas for 2nd model
    plt.subplot(1, 2, 2)
    plt.bar(range(len(shopper_2.lambda_.numpy())), np.exp(shopper_2.lambda_.numpy()))
    plt.axhline(y=1, color='r', linestyle='--')
    plt.title('Item popularity for 2nd model')
    plt.xlabel('Item')
    plt.ylabel('exp(lambda)')
    plt.xticks(ticks=range(len(shopper_2.lambda_.numpy())), labels=range(len(shopper_2.lambda_.numpy())))

    plt.tight_layout()

if shopper_3.item_popularity:
    plt.figure(figsize=(8, 4))

    # Plot ground truth item frequencies
    plt.subplot(1, 2, 1)
    plt.bar(range(len(item_frequencies_train)), item_frequencies_train)
    plt.title('Ground truth item frequencies\n(train dataset)')
    plt.xlabel('Item')
    plt.ylabel('Frequency')
    plt.xticks(ticks=range(len(item_frequencies_train)), labels=range(len(item_frequencies_train)))

    # Plot lambdas for 3rd model
    plt.subplot(1, 2, 2)
    plt.bar(range(len(shopper_3.lambda_.numpy())), np.exp(shopper_3.lambda_.numpy()))
    plt.axhline(y=1, color='r', linestyle='--')
    plt.title('Item popularity for 3rd model')
    plt.xlabel('Item')
    plt.ylabel('exp(lambda)')
    plt.xticks(ticks=range(len(shopper_3.lambda_.numpy())), labels=range(len(shopper_3.lambda_.numpy())))

    plt.tight_layout()

plt.show()

We compute t-SNE of alpha embeddings to plot them in 2D.

In [None]:
# Alpha embeddings (for each item)
alpha_embeddings_1 = shopper_1.alpha.numpy()
alpha_embeddings_2 = shopper_2.alpha.numpy()
alpha_embeddings_3 = shopper_3.alpha.numpy()

print(f"{alpha_embeddings_1.shape=} {alpha_embeddings_2.shape=} {alpha_embeddings_3.shape=}")  # Shape: (n_items, latent_size)

In [None]:
# t-SNE (for each item)
perplexity = 5
# early_exaggeration =
# learning_rate =
tsne = TSNE(n_components=2, perplexity=perplexity)

alpha_tsne_1 = tsne.fit_transform(alpha_embeddings_1)
alpha_tsne_2 = tsne.fit_transform(alpha_embeddings_2)
alpha_tsne_3 = tsne.fit_transform(alpha_embeddings_3)

print(f"{alpha_tsne_1.shape=} {alpha_tsne_2.shape=} {alpha_tsne_3.shape=}")  # Shape: (n_items, 2)

In [None]:
plt.figure(figsize=(12, 4))

# Plot t-SNE embeddings for 1st model
plt.subplot(1, 3, 1)
plt.scatter(alpha_tsne_1[:, 0], alpha_tsne_1[:, 1], label='t-SNE embeddings')
for i in range(n_items_train):
    plt.annotate(i, (alpha_tsne_1[i, 0], alpha_tsne_1[i, 1]))
plt.title('t-SNE embeddings for 1st model')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.legend()

# Plot t-SNE embeddings for 2nd model
plt.subplot(1, 3, 2)
plt.scatter(alpha_tsne_2[:, 0], alpha_tsne_2[:, 1], label='t-SNE embeddings')
for i in range(n_items_train):
    plt.annotate(i, (alpha_tsne_2[i, 0], alpha_tsne_2[i, 1]))
plt.title('t-SNE embeddings for 2nd model')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.legend()

# Plot t-SNE embeddings for 3rd model
plt.subplot(1, 3, 3)
plt.scatter(alpha_tsne_3[:, 0], alpha_tsne_3[:, 1], label='t-SNE embeddings')
for i in range(n_items_train):
    plt.annotate(i, (alpha_tsne_3[i, 0], alpha_tsne_3[i, 1]))
plt.title('t-SNE embeddings for 3rd model')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.legend()

plt.tight_layout()
plt.show()

### Inference with SHOPPER models

We evaluate the models on the validation dataset.

In [None]:
n_permutations = 3

nll_1 = shopper_1.evaluate(trip_dataset_val, n_permutations)
nll_2 = shopper_2.evaluate(trip_dataset_val, n_permutations)
nll_3 = shopper_3.evaluate(trip_dataset_val, n_permutations)

In [None]:
print(
    "Mean negative log-likelihood on the validation set:\n",
    f"- 1st model: {nll_1:.4f}\n",
    f"- 2nd model: {nll_2:.4f}\n",
    f"- 3rd model: {nll_3:.4f}",
)

print("\nWe can see that the more complex the model, the lower the negative log-likelihood.")

We can also compute various utilities and probabilities. We are going to use the 3rd model for this.

In [None]:
# Item utilities
item_batch_inference=np.array([2, 0, 4])
basket_inference = np.array([1, 3])
customer_inference = 0
week_inference = 0
price_inference = 5.
available_items_inference = np.ones(trip_dataset_train.n_items)
available_items_inference[4] = 0  # Consider that item 4 is not available during inference
assortment_inference = np.array(
    [
        item_id for item_id in trip_dataset_train.get_all_items() if available_items_inference[item_id] == 1
    ]
)

item_utilities = shopper_3.compute_batch_utility(
    item_batch=item_batch_inference,
    basket_batch=np.tile(basket_inference, (3, 1)),
    customer_batch=np.tile(customer_inference, 3),
    week_batch=np.tile(week_inference, 3),
    price_batch=np.tile(price_inference, 3),
    available_item_batch=np.tile(available_items_inference, (3, 1)),
)

print(
    f"The customer n°{customer_inference} has already in this basket the items {basket_inference}.\n",
    f"He is shopping during week n°{week_inference} in a shop where the prices of the items are respectively {prices}",
    f"and whose assortment (ie the set of available items) is {assortment_inference}).\n",
    f"Under these circumstances, the utility of the selected items are:\n"
)
for i, item_id in enumerate(item_batch_inference):
    if item_id == 0:
        print(f"- Item {item_id} (checkout item): {item_utilities[i]:.4f}")
    else:
        print(f"- Item {item_id}: {item_utilities[i]:.4f}")

In [None]:
# Item likelihoods
item_batch=np.array([2, 0, 4])
item_likelihoods = shopper_3.compute_item_likelihood(
    basket=basket_inference,
    available_items=available_items_inference,  # Consider all items available
    customer=customer_inference,
    week=week_inference,
    prices=prices,
)

print(
    f"The customer n°{customer_inference} has already in this basket the items {basket_inference}.\n",
    f"He is shopping during week n°{week_inference} in a shop where the prices of the items are respectively {prices}",
    f"and whose assortment (ie the set of available items) is {assortment_inference}).\n",
    f"Under these circumstances, the likelihoods that each item will be the next item he is going to add to his basket are:\n"
)
for i, item_id in enumerate(trip_dataset_train.get_all_items()):
    if item_id == 0:
        print(f"- Item {item_id} (checkout item, the customer decides to end his shopping trip): {item_likelihoods[i]:.4f}")
    else:
        print(f"- Item {item_id}: {item_likelihoods[i]:.4f}")
print(f"\nN.B.: The item likelihoods sum to {np.sum(item_likelihoods):.4f}")

In [None]:
# Ordered basket likelihoods
basket_ordered = np.array([1, 3, 0])
basket_ordered_likelihood = shopper_3.compute_ordered_basket_likelihood(
    basket=basket_ordered,
    available_items=available_items_inference,  # Consider all items available
    customer=customer_inference,
    week=week_inference,
    prices=prices,
)

print(f"Likelihood for ordered basket {basket_ordered}: {basket_ordered_likelihood:.4f}")

In [None]:
# Unordered basket likelihoods
n_permutations = 2

basket_unordered = np.array([1, 3, 0])
basket_unordered_likelihood = shopper_3.compute_unordered_basket_likelihood(
    basket=basket_unordered,
    available_items=available_items_inference,  # Consider all items available
    customer=customer_inference,
    week=week_inference,
    prices=prices,
    n_permutations=n_permutations,
)
print(f"Likelihood for unordered basket {basket_unordered}: {basket_unordered_likelihood:.4f} (with {n_permutations} permutations to approximate all possible orders)")

### References
[1] SHOPPER: A Probabilistic Model of Consumer Choice with Substitutes and Complements, Ruiz, F. J. R.; Athey, S.; Blei, D. M. (2019), Annals of Applied Statistic
(URL: https://arxiv.org/abs/1711.03560)