# Synthetic Experiments

## Sample synthetic data

In [None]:
import sys
sys.path.append("../")

import matplotlib.pyplot as plt
import numpy as np

import choice_learn
from python.data import SyntheticDataGenerator
from choice_learn.basket_models import Trip, TripDataset

## Sample purchased baskets

In [None]:
items_nests = {0:[0, 1, 2],
1: [3, 4, 5],
2: [6],
3: [7]}

nests_interactions = [["", "compl", "neutral", "neutral"],
["compl", "", "neutral", "neutral"],
["neutral", "neutral", "", "neutral"],
["neutral", "neutral", "neutral", ""]]

data_gen = SyntheticDataGenerator(items_nest=items_nests, nests_interactions=nests_interactions)

In [None]:
dataset = data_gen.generate_dataset(n_baskets=1000)

In [None]:
trip_list = []
for basket in dataset:
    trip_list.append(Trip(purchases=basket, prices=np.zeros((8, )), assortment=0))

trip_dataset = TripDataset(trips=trip_list, available_items=np.ones((1, 8)))

## Modelling 

In [None]:
from choice_learn.basket_models import AleaCarta, Shopper

In [None]:
latent_sizes = {"preferences": 6, "price": 3, "season": 3}
n_negative_samples = 2
optimizer = "adam"
lr = 5e-4
epochs = 50
batch_size = 32

aleacarta = AleaCarta(
    item_intercept=False,
    price_effects=False,
    seasonal_effects=False,
    latent_sizes=latent_sizes,
    n_negative_samples=n_negative_samples,
    optimizer=optimizer,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)

aleacarta.instantiate(n_items=8, n_stores=2)

In [None]:
history = aleacarta.fit(trip_dataset)

In [None]:
plt.plot(history["train_loss"])
plt.xlabel("Epoch")
plt.ylabel("Training Loss")
plt.show()

In [None]:
latent_sizes = {"preferences": 6, "price": 3, "season": 3}
n_negative_samples = 2
optimizer = "adam"
lr = 5e-4
epochs = 50
batch_size = 32

trip_list = []
for basket in dataset:
    trip_list.append(Trip(purchases=np.array(basket+[-1])+1, prices=np.zeros((9, )), assortment=0))

shopper_trip_dataset = TripDataset(trips=trip_list, available_items=np.ones((1, 9)))

shopper = Shopper(
    item_intercept=False,
    price_effects=False,
    seasonal_effects=False,
    think_ahead=False,
    latent_sizes=latent_sizes,
    n_negative_samples=n_negative_samples,
    optimizer=optimizer,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)

shopper.instantiate(n_items=9, n_stores=1)
history = shopper.fit(shopper_trip_dataset)
plt.plot(history["train_loss"])
plt.xlabel("Epoch")
plt.ylabel("Training Loss")
plt.show()

## Results

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np


mask = np.ones((8,8), dtype=bool)
alea_results = []
shopper_results = []
for i in range(8):
        r_1 = aleacarta.compute_batch_utility(item_batch=np.array(list(range(8))),
                basket_batch=np.array([[i] for _ in range(8)]) ,
                store_batch=np.array([0, 0, 0, 0, 0, 0, 0, 0]),
                week_batch=np.array([0, 0, 0, 0, 0, 0, 0, 0]),
                price_batch=np.array([[0, 0, 0, 0, 0, 0] for _ in range(8)]))

        r_2 = shopper.compute_batch_utility(item_batch=np.array(list(range(1, 9))),
                basket_batch=np.array([[i+1] for _ in range(8)]) ,
                available_item_batch=np.ones((9, 1)),
                store_batch=np.array([0, 0, 0, 0, 0, 0, 0, 0]),
                week_batch=np.array([0, 0, 0, 0, 0, 0, 0, 0]),
                price_batch=np.array([[0, 0, 0, 0, 0, 0] for _ in range(8)]))
        m = np.ones(8)
        m[i] = 0
        den = np.exp(r_1) * m
        r_1 = den / den.sum()
        den = np.exp(r_2) * m
        r_2 = den / den.sum()

        alea_results.append(r_1)
        shopper_results.append(r_2)
        mask[i][i] = False

alea_results = np.stack(alea_results)
shopper_results = np.stack(shopper_results)
mask = np.ma.masked_where(mask, alea_results)

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))

for ax_, model in enumerate([aleacarta, shopper]):

        axes[ax_].set_xticks([], [])
        axes[ax_].set_yticks([], [])
        im = axes[ax_].imshow((alea_results, shopper_results)[ax_], cmap="Spectral", alpha=0.99, vmin=0, vmax=1)
        axes[ax_].imshow(mask, cmap=mpl.colors.ListedColormap(['white']), alpha=1)
        axes[ax_].set_title(f"{model}")

cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.69])
fig.colorbar(im, cax=cbar_ax)
fig.suptitle("Estimated Conditional Probabilities")

## Experiments with different assortments

In [None]:
ground_truth = np.zeros((8, 8))
for i in range(8):
    for j in range(8):
        for basket in dataset:
            if i in basket and j in basket and i != j:
                ground_truth[i, j] += 1
for i in range(8):
    ground_truth[i] = ground_truth[i] / np.sum(ground_truth[i])

In [None]:
plt.imshow(ground_truth)

In [None]:
items_nests_store_1 = {0:[1, 2],
1: [4, 5],
2: [6],
3: [7]}

nests_interactions = [["", "compl", "neutral", "neutral"],
["compl", "", "neutral", "neutral"],
["neutral", "neutral", "", "neutral"],
["neutral", "neutral", "neutral", ""]]

data_gen_1 = SyntheticDataGenerator(items_nest=items_nests_store_1, nests_interactions=nests_interactions)
dataset_1 = data_gen_1.generate_dataset(n_baskets=4000)

items_nests_store_2 = {0:[0, 2],
1: [3, 5],
2: [6],
3: [7]}

nests_interactions = [["", "compl", "neutral", "neutral"],
["compl", "", "neutral", "neutral"],
["neutral", "neutral", "", "neutral"],
["neutral", "neutral", "neutral", ""]]

data_gen_2 = SyntheticDataGenerator(items_nest=items_nests_store_2, nests_interactions=nests_interactions)
dataset_2 = data_gen_2.generate_dataset(n_baskets=4000)

items_nests_store_3 = {0:[0, 1],
1: [3, 4],
2: [6],
3: [7]}

nests_interactions = [["", "compl", "neutral", "neutral"],
["compl", "", "neutral", "neutral"],
["neutral", "neutral", "", "neutral"],
["neutral", "neutral", "neutral", ""]]

data_gen_3 = SyntheticDataGenerator(items_nest=items_nests_store_3, nests_interactions=nests_interactions)
dataset_3 = data_gen_3.generate_dataset(n_baskets=4000)

In [None]:
heterogeneous_trip_list = []
for basket in dataset_1:
    heterogeneous_trip_list.append(Trip(purchases=basket, prices=np.zeros((8, )), assortment=0))
for basket in dataset_2:
    heterogeneous_trip_list.append(Trip(purchases=basket, prices=np.zeros((8, )), assortment=1))
for basket in dataset_3:
    heterogeneous_trip_list.append(Trip(purchases=basket, prices=np.zeros((8, )), assortment=2))

heterogenesous_trip_dataset = TripDataset(trips=heterogeneous_trip_list, available_items=np.array([[0, 1, 1, 0, 1, 1, 1, 1],
[1, 0, 1, 1, 0, 1, 1, 1],
[1, 1, 0, 1, 1, 0, 1, 1]]))

In [None]:
latent_sizes = {"preferences": 6, "price": 3, "season": 3}
n_negative_samples = 1
optimizer = "adam"
lr = 5e-4
epochs = 50
batch_size = 32

aleacarta = AleaCarta(
    item_intercept=False,
    price_effects=False,
    seasonal_effects=False,
    latent_sizes=latent_sizes,
    n_negative_samples=n_negative_samples,
    optimizer=optimizer,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)

aleacarta.instantiate(n_items=8, n_stores=2)
history = aleacarta.fit(heterogenesous_trip_dataset)

In [None]:
plt.plot(history["train_loss"])

In [None]:
heterogeneous_trip_list = []
for basket in dataset_1:
    heterogeneous_trip_list.append(Trip(purchases=np.array(basket+[-1])+1, prices=np.zeros((9, )), assortment=0))
for basket in dataset_2:
    heterogeneous_trip_list.append(Trip(purchases=np.array(basket+[-1])+1, prices=np.zeros((9, )), assortment=1))
for basket in dataset_3:
    heterogeneous_trip_list.append(Trip(purchases=np.array(basket+[-1])+1, prices=np.zeros((9, )), assortment=2))

heterogenesous_trip_dataset = TripDataset(trips=heterogeneous_trip_list, available_items=np.array([[1, 0, 1, 1, 0, 1, 1, 1, 1],
[1, 1, 0, 1, 1, 0, 1, 1, 1],
[1, 1, 1, 0, 1, 1, 0, 1, 1]]))


shopper = Shopper(
    item_intercept=False,
    price_effects=False,
    seasonal_effects=False,
    think_ahead=False,
    latent_sizes=latent_sizes,
    n_negative_samples=n_negative_samples,
    optimizer=optimizer,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)

shopper.instantiate(n_items=9, n_stores=1)
history = shopper.fit(heterogenesous_trip_dataset)

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np


mask = np.ones((8,8), dtype=bool)
alea_results = []
shopper_results = []
for i in range(8):
        r_1 = aleacarta.compute_batch_utility(item_batch=np.array(list(range(8))),
                basket_batch=np.array([[i] for _ in range(8)]) ,
                store_batch=np.array([0, 0, 0, 0, 0, 0, 0, 0]),
                week_batch=np.array([0, 0, 0, 0, 0, 0, 0, 0]),
                price_batch=np.array([[0, 0, 0, 0, 0, 0] for _ in range(8)]))

        r_2 = shopper.compute_batch_utility(item_batch=np.array(list(range(1, 9))),
                basket_batch=np.array([[i+1] for _ in range(8)]) ,
                available_item_batch=np.ones((9, 1)),
                store_batch=np.array([0, 0, 0, 0, 0, 0, 0, 0]),
                week_batch=np.array([0, 0, 0, 0, 0, 0, 0, 0]),
                price_batch=np.array([[0, 0, 0, 0, 0, 0] for _ in range(8)]))
        m = np.ones(8)
        m[i] = 0
        den = np.exp(r_1) * m
        r_1 = den / den.sum()
        den = np.exp(r_2) * m
        r_2 = den / den.sum()

        alea_results.append(r_1)
        shopper_results.append(r_2)
        mask[i][i] = False

alea_results = np.stack(alea_results)
shopper_results = np.stack(shopper_results)
mask = np.ma.masked_where(mask, alea_results)

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))

for ax_, model in enumerate([aleacarta, shopper]):

        axes[ax_].set_xticks([], [])
        axes[ax_].set_yticks([], [])
        im = axes[ax_].imshow((alea_results, shopper_results)[ax_], cmap="Spectral", alpha=0.99, vmin=0, vmax=1)
        axes[ax_].imshow(mask, cmap=mpl.colors.ListedColormap(['white']), alpha=1)
        axes[ax_].set_title(f"{model}")

cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.69])
fig.colorbar(im, cax=cbar_ax)
fig.suptitle("Estimated Conditional Probabilities")

### Testing on datasets issued from different assortments   

Generation of the test dataset:

In [None]:
items_nests_store_4 = {0:[0, 2],
1: [3, 4],
2: [6],
3: [7]}

nests_interactions = [["", "compl", "neutral", "neutral"],
["compl", "", "neutral", "neutral"],
["neutral", "neutral", "", "neutral"],
["neutral", "neutral", "neutral", ""]]

data_gen_4 = SyntheticDataGenerator(items_nest=items_nests_store_4, nests_interactions=nests_interactions)
dataset_4 = data_gen_4.generate_dataset(n_baskets=4000)

items_nests_store_full = {0:[0, 1, 2],
1: [3, 4, 5],
2: [6],
3: [7]}

data_gen_full = SyntheticDataGenerator(items_nest=items_nests_store_full, nests_interactions=nests_interactions)
dataset_full = data_gen_full.generate_dataset(n_baskets=4000)

Metric computation:
- Estimation of conditional probabilities
- Computation of metric values

In [None]:
conditional_probabilities = []
rec_ranks = []
for basket in dataset_4:
    for i in range(len(basket)):
        sub_basket = np.array(basket[:i]+basket[i+1:])
        probs = aleacarta.compute_item_likelihood(trip=Trip(purchases=sub_basket, prices=np.zeros((8, )), assortment=np.array([1., 0., 1., 1., 1., 0., 1., 1.])))
        conditional_probabilities.append(probs[basket[i]])
        rec_ranks.append(1 / (np.argsort(np.argsort(probs))[basket[i]] + 1))

In [None]:
print("Mean Log-Likelihood:", np.mean(np.log(conditional_probabilities)) * -1)
print("Mean Reciprocal Rank:", np.mean(rec_ranks))

In [None]:
base_asst = np.array([0., 1., 0., 1., 1., 1., 0., 1., 1.])
conditional_probabilities = []
rec_ranks = []
for basket in dataset_4:
    for i in range(len(basket)):
        sub_basket = np.array(basket[:i]+basket[i+1:]) + 1
        asst = base_asst.copy()
        for k in sub_basket:
            asst[k] = 0.
        probs = shopper.compute_item_likelihood(trip=Trip(purchases=sub_basket, prices=np.zeros((9, )), assortment=asst))
        conditional_probabilities.append(probs[basket[i]+1])
        rec_ranks.append(1 / (np.argsort(np.argsort(probs))[basket[i]+1] + 1))

In [None]:
print("Mean Log-Likelihood:", np.mean(np.log(conditional_probabilities)) * -1)
print("Mean Reciprocal Rank:", np.mean(rec_ranks))

In [None]:
conditional_probabilities = []
rec_ranks = []
for basket in dataset_full:
    for i in range(len(basket)):
        sub_basket = np.array(basket[:i]+basket[i+1:])
        probs = aleacarta.compute_item_likelihood(trip=Trip(purchases=sub_basket, prices=np.zeros((8, )), assortment=np.array([1., 1., 1., 1., 1., 1., 1., 1.])))
        conditional_probabilities.append(probs[basket[i]])
        rec_ranks.append(1 / (np.argsort(np.argsort(probs))[basket[i]] + 1))

In [None]:
print("Mean Log-Likelihood:", np.mean(np.log(conditional_probabilities)) * -1)
print("Mean Reciprocal Rank:", np.mean(rec_ranks))

In [None]:
base_asst = np.array([0., 1., 1., 1., 1., 1., 1., 1., 1.])
conditional_probabilities = []
rec_ranks = []
for basket in dataset_full:
    for i in range(len(basket)):
        sub_basket = np.array(basket[:i]+basket[i+1:]) + 1
        asst = base_asst.copy()
        for k in sub_basket:
            asst[k] = 0.
        probs = shopper.compute_item_likelihood(trip=Trip(purchases=sub_basket, prices=np.zeros((9, )), assortment=asst))
        conditional_probabilities.append(probs[basket[i]+1])
        rec_ranks.append(1 / (np.argsort(np.argsort(probs))[basket[i]+1] + 1))

In [None]:
print("Mean Log-Likelihood:", np.mean(np.log(conditional_probabilities)) * -1)
print("Mean Reciprocal Rank:", np.mean(rec_ranks))