## II. Bakery Dataset

In [None]:
import os
import sys
import matplotlib.pyplot as plt

os.environ["CUDA_VISIBLE_DEVICES"] = ""

sys.path.append("../")
print(os.getcwd())

from choice_learn.basket_models import AleaCarta

In [None]:
from choice_learn.datasets.bakery import load_bakery


data = load_bakery()
print(data)
print(f"\nThe TripDataset 'data' contains {data.n_items} distinct items that appear in {data.n_samples} transactions carried out at {data.n_stores} point(s) of sale with {data.n_assortments} different assortments.")

II.1 AleaCarta model

In [None]:
latent_sizes = {"preferences": 6, "price": 3, "season": 3}
n_negative_samples = 2
optimizer = "adam"
lr = 0.01
epochs = 15
# epochs = 1000
batch_size = 32



In [None]:
model = AleaCarta(
    # item_intercept=True,
    item_intercept=False,
    price_effects=False,
    seasonal_effects=False,
    latent_sizes=latent_sizes,
    n_negative_samples=n_negative_samples,
    optimizer=optimizer,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)

model.instantiate(n_items=data.n_items, n_stores=data.n_stores)

In [None]:
history = model.fit(trip_dataset=data[:2000], verbose=1)#, val_dataset=data[70000:71000])

In [None]:
print(model.evaluate(trip_dataset=data[:1000]))


In [None]:
plt.plot(history["train_loss"])
plt.plot(history["val_loss"])
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Training and Validation Loss")



plt.show()

In [None]:
import numpy as np

n_item = 50

n_comparaison = 50


item_batch = np.array(np.concatenate([[i]*n_comparaison for i in range(n_comparaison)]))
print('item_batch', item_batch)
basket_batch = np.concatenate([(np.arange(n_comparaison).reshape(-1, 1)) for i in range(n_comparaison)])

#basket_batch = np.array(np.concatenate([(np.arange(51)) for i in range(51)]))
print('basket_batch', basket_batch)
batch_size = n_comparaison**2

print('batch_size', batch_size)

available_item_batch =  np.array([[1]*n_item]*batch_size) 
price_batch = np.array([[1.0]*batch_size])

store_batch = np.array([0]*batch_size)
week_batch = np.array([0]*batch_size)



In [None]:
compute_batch_utility = model.compute_batch_utility(item_batch=item_batch, basket_batch=basket_batch, price_batch=price_batch, available_item_batch=available_item_batch, store_batch=store_batch, week_batch=week_batch)


In [None]:
import seaborn as sns
import math
matrix = np.zeros(shape = (n_item,n_comparaison))
print(type(matrix))
for i in range(n_comparaison):
    for j in range(n_comparaison):
        if i == j:
            matrix[i,j] = - math.inf
        else :
            matrix[i,j] = compute_batch_utility[i + j*n_comparaison]

for i in range(n_comparaison):
    sum = np.sum(np.exp(matrix[i,:]))
    for j in range(n_comparaison):
        matrix[i,j] = np.exp(matrix[i,j])/sum
        continue
    
#matrix = 1 / (1 + np.exp(-matrix))

print(np.sum(matrix, axis=1))
plt.figure(figsize=(30, 30))
sns.heatmap(matrix,
            annot=True,     
            fmt='.2f')
            #   ,cmap='viridis'   )   

plt.title(' P(Item| B = {basket} )') 
plt.ylabel('Item')   
plt.xlabel('basket')   
plt.show() 

II.2 (Test pour voir si la méthode prédict fonctionne)

II.3 Test des fonctions du notebook basic attention et affichage de la matrice de probas initiale

In [None]:
import tensorflow as tf
def visualise_tripdataset_trips(dataset, n_items):
    """
    Visualize the conditional probability P(i|j) of items co-occurring in baskets.

    Parameters
    ----------
    dataset : TripDataset
        The dataset containing trips.
    n_items : int
        Number of unique items.
    """
    distribution_matrix = np.zeros((n_items, n_items))
    for trip in dataset.trips:
        basket = trip.purchases
        for i in basket:
            for j in basket:
                if i != j:
                    distribution_matrix[i, j] += 1
    row_sums = distribution_matrix.sum(axis=1, keepdims=True)
    for i in range(len(row_sums)):
        if row_sums[i] != 0:
            distribution_matrix[i] = distribution_matrix[i]/row_sums[i]
    
    return distribution_matrix


In [None]:
initial_cond_proba = visualise_tripdataset_trips(data, n_item)

plt.figure(figsize=(30, 30))
sns.heatmap(initial_cond_proba,
            annot=True,     
            fmt='.2f')
            #   ,cmap='viridis'   )   

plt.title(' P(Item| B = {basket} )') 
plt.ylabel('Item')   
plt.xlabel('basket')   
plt.show() 


II.4 Basic-Attention Model

In [None]:
from choice_learn.basket_models.basic_attention_model import AttentionBasedContextEmbedding

epochs = 6

model2 = AttentionBasedContextEmbedding(
    n_negative_samples=n_negative_samples,
    optimizer=optimizer,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)

model2.instantiate(n_items=data.n_items)



In [None]:
history2 = model2.fit(trip_dataset=data[:66000], verbose=1, val_dataset=data[66000:67000])#, val_dataset=data[2000:3000])

plt.plot(history2["train_loss"])
plt.plot(history2["val_loss"])
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Training and Validation Loss")



plt.show()

In [None]:
print(model2.evaluate(trip_dataset=data[:1000]))
#model.evaluate(trip_dataset=data[50000:51000])

In [None]:
import numpy as np

n_item = 50

n_comparaison = 50


item_batch = np.array(np.concatenate([[i]*n_comparaison for i in range(n_comparaison)]))
print('item_batch', item_batch)
basket_batch = np.concatenate([(np.arange(n_comparaison).reshape(-1, 1)) for i in range(n_comparaison)])

#basket_batch = np.array(np.concatenate([(np.arange(51)) for i in range(51)]))
print('basket_batch', basket_batch)
batch_size = n_comparaison**2

print('batch_size', batch_size)

available_item_batch =  np.array([[1]*n_item]*batch_size) 
price_batch = np.array([[1.0]*batch_size])

store_batch = np.array([0]*batch_size)
week_batch = np.array([0]*batch_size)


In [None]:
compute_batch_utility = model2.compute_batch_utility(item_batch=item_batch, basket_batch=basket_batch, price_batch=price_batch, available_item_batch=available_item_batch, store_batch=store_batch, week_batch=week_batch)


In [None]:
import seaborn as sns
import math
matrix = np.zeros(shape = (n_comparaison,n_comparaison))

for i in range(n_comparaison):
    for j in range(n_comparaison):
        if i == j:
            matrix[i,j] = - math.inf
        else :
            matrix[i,j] = compute_batch_utility[i + j*n_comparaison]

for i in range(n_comparaison):
    sum = np.sum(np.exp(matrix[i,:]))
    for j in range(n_comparaison):
        matrix[i,j] = np.exp(matrix[i,j])/sum
        continue
    
#matrix = 1 / (1 + np.exp(-matrix))

print(np.sum(matrix, axis=1))
plt.figure(figsize=(30, 30))
sns.heatmap(matrix,
            annot=True,     
            fmt='.2f')
            #   ,cmap='viridis'   )   

plt.title(' P(Item| B = {basket} )') 
plt.ylabel('Item')   
plt.xlabel('basket')   
plt.show() 