In [None]:
"""
Implementation of an attention-based model for item recommendation.

Wang, Shoujin, Liang Hu, Longbing Cao, Xiaoshui Huang, Defu Lian, and Wei Liu.
"Attention-based transactional context embedding for next-item recommendation."
In Proceedings of the AAAI conference on artificial intelligence, vol. 32, no. 1. 2018.
"""

In [None]:
import os
import sys
import json
from tqdm.notebook import tqdm
import numpy as np
import tensorflow as tf
from pathlib import Path
import matplotlib.pyplot as plt

sys.path.append("./../../")
from choice_learn.basket_models import TripDataset
from choice_learn.basket_models.basic_attention_model import AttentionBasedContextEmbedding
from choice_learn.basket_models.synthetic_dataset import SyntheticDataGenerator

In [None]:
# Parameters
lr = 0.05
epochs = 40
n_baskets = 700
embedding_dim = 4
n_negative_samples = 3
assortments_matrix = np.array([[1,1,1,1,1,1,1,1]])

n_items = assortments_matrix.shape[1]


In [None]:
data_gen = SyntheticDataGenerator(
    proba_complementary_items=0.7,
    proba_neutral_items=0.3,
    noise_proba=0.15,
    items_nest = {0:[0, 1, 2],
                   1: [3, 4, 5],
                   2: [6],
                   3: [7]},
    nests_interactions = [["", "compl", "neutral", "neutral"],
                          ["compl", "", "neutral", "neutral"],
                          ["neutral", "neutral", "", "neutral"],
                          ["neutral", "neutral", "neutral", ""]])

In [None]:
# Visualisation functions

# Function to describe the distribution of items in a synthetic dataset
def visualise_tripdataset_trips(dataset,n_items):
    M = np.zeros((n_items,n_items))
    for trip in dataset.trips:
        basket = trip.purchases
        for i in basket:
            for j in basket:
                if i!= j:
                    M[i,j] += 1
    try:
        M = M / np.sum(M, axis=1, keepdims=True)
    except:
        pass
    M = np.nan_to_num(M, nan=0.0)
        
    plt.figure(figsize=(4, 3))
    plt.imshow(M, vmin=0, vmax=1, interpolation='nearest', cmap="coolwarm")
    plt.colorbar(label='P(i|j)')
    plt.title('Conditional Probability Heatmap P(i|j)')
    plt.xlabel('j')
    plt.ylabel('i')
    plt.show()

def get_model_representation(model, hist, n_items, test_dataset = None):
    if test_dataset is None:
        contexts = tf.constant([[i] for i in range(n_items)], dtype=tf.int32)
        target_items = [i for i in range(n_items)]
        available_items = assortments_matrix[0]
    else:
        contexts = []
        for batch in test_dataset.iter_batch(1, data_method = "aleacarta"):
            contexts.append(batch[1][0])
        contexts = tf.ragged.constant(
                        [row[row != -1] for row in contexts], dtype=tf.int32
                    )
        available_items = batch[-1][0]
        
    context_prediction = model.predict(contexts, available_items = available_items)
    predicted_items = [np.argmax(context_prediction[i]) for i in range(context_prediction.shape[0])]

    if test_dataset is None:
        M = np.stack(context_prediction)
        for i in range(len(assortments_matrix[0])):
            if assortments_matrix[0,i] == 0:
                M[i,:] = 0
    else:
        M = np.zeros((n_items, n_items))
        for i in range(contexts.shape[0]):
            for j in contexts[i]:
                M[predicted_items[i],j] += 1
        M = M / np.sum(M, axis=1, keepdims=True)
        M = np.nan_to_num(M, nan=0.0)

    
    fig, axes = plt.subplots(1, 2, figsize=(7, 3))
    im1 = axes[0].imshow(
        M, vmin=0.0, vmax=1,interpolation='nearest',cmap="coolwarm")
    axes[0].set_title("Model P(i|j) on test dataset")
    plt.colorbar(im1, ax=axes[0])
    axes[1].plot(hist["train_loss"], label="Training Loss")
    axes[1].set_xlabel("Training Steps")
    axes[1].set_ylabel("Loss")
    axes[1].set_title("Training Loss History")
    
    plt.tight_layout()
    plt.show()

In [None]:
# Generate a full assortment synthetic dataset

trip_dataset_train = data_gen.generate_trip_dataset(n_baskets,assortments_matrix)
trip_dataset_test = data_gen.generate_trip_dataset(n_baskets,assortments_matrix)
visualise_tripdataset_trips(trip_dataset_train,n_items)

In [None]:
# Instantiate and train the model 1 on the full assortment dataset

model1 = AttentionBasedContextEmbedding(
    epochs=epochs,
    lr=lr,
    embedding_dim=embedding_dim,
    n_negative_samples=n_negative_samples)

model1.instantiate(n_items=len(assortments_matrix[0]))
history1 = model1.fit(trip_dataset_train)


In [None]:
get_model_representation(model1, history1, n_items)
print(model1)
loss_train_dataset = model1.evaluate(trip_dataset_train)
loss_test_dataset = model1.evaluate(trip_dataset_test)
print(f"Loss of model1 on the train dataset {loss_train_dataset}")
print(f"Loss of model1 on the test dataset {loss_test_dataset}")

In [None]:
embedding_dims = np.arange(1, 18, 4)
learning_rates = embedding_dims * 0.002
epochs_r = embedding_dims * 40

train_losses_emb = np.zeros(embedding_dims.shape[0])
test_losses_emb = np.zeros(embedding_dims.shape[0])
train_losses_lr = np.zeros(embedding_dims.shape[0])
test_losses_lr = np.zeros(embedding_dims.shape[0])
train_losses_epochs = np.zeros(embedding_dims.shape[0])
test_losses_epochs = np.zeros(embedding_dims.shape[0])

for i in range(embedding_dims.shape[0]):
    # Grid search for embedding_dim
    model = AttentionBasedContextEmbedding(
        epochs=epochs,
        lr=lr,
        embedding_dim=embedding_dims[i],
        n_negative_samples=n_negative_samples
    )
    model.instantiate(n_items=len(assortments_matrix[0]))
    history = model.fit(trip_dataset_train)
    train_losses_emb[i] = model.evaluate(trip_dataset_train)
    test_losses_emb[i] = model.evaluate(trip_dataset_test)

    # Grid search for learning_rate
    model = AttentionBasedContextEmbedding(
        epochs=epochs,
        lr=learning_rates[i],
        embedding_dim=embedding_dim,
        n_negative_samples=n_negative_samples
    )
    model.instantiate(n_items=len(assortments_matrix[0]))
    history = model.fit(trip_dataset_train)
    train_losses_lr[i] = model.evaluate(trip_dataset_train)
    test_losses_lr[i] = model.evaluate(trip_dataset_test)

    # Grid search for epochs
    model = AttentionBasedContextEmbedding(
        epochs=epochs_r[i],
        lr=lr,
        embedding_dim=embedding_dim,
        n_negative_samples=n_negative_samples
    )
    model.instantiate(n_items=len(assortments_matrix[0]))
    history = model.fit(trip_dataset_train)
    train_losses_epochs[i] = model.evaluate(trip_dataset_train)
    test_losses_epochs[i] = model.evaluate(trip_dataset_test)

In [None]:
# Plot params vs. loss

fig, axes = plt.subplots(1, 3, figsize=(12, 5))

# 1st subplot: loss vs embedding dimension
axes[0].plot(embedding_dims, train_losses_emb, marker='o', label='Train Loss')
axes[0].plot(embedding_dims, test_losses_emb, marker='s', label='Test Loss')
axes[0].set_xlabel('Embedding Dimension')
axes[0].set_ylabel('Loss')
axes[0].set_title('Loss vs. Embedding Dimension')
axes[0].legend()
axes[0].grid(True)

# 2nd subplot: loss vs learning rate
axes[1].plot(learning_rates, train_losses_lr, marker='o', label='Train Loss')
axes[1].plot(learning_rates, test_losses_lr, marker='s', label='Test Loss')
axes[1].set_xlabel('Learning Rate')
axes[1].set_ylabel('Loss')
axes[1].set_title('Loss vs. Learning Rate')
axes[1].legend()
axes[1].grid(True)


# 3rd subplot: loss vs epochs 
axes[2].plot(epochs_r, train_losses_epochs, marker='o', label='Train Loss')
axes[2].plot(epochs_r, test_losses_epochs, marker='s', label='Test Loss')
axes[2].set_xlabel('Epochs')
axes[2].set_ylabel('Loss')
axes[2].set_title('Loss vs. Epochs')
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.show()

# Best embedding dimension (lowest test loss)
best_emb_idx = np.argmin(test_losses_emb)
best_embedding_dim = embedding_dims[best_emb_idx]
best_emb_loss = test_losses_emb[best_emb_idx]

# Best learning rate (lowest test loss)
best_lr_idx = np.argmin(test_losses_lr)
best_learning_rate = learning_rates[best_lr_idx]
best_lr_loss = test_losses_lr[best_lr_idx]

# Best value for epochs (lowest test loss)
best_epochs_idx = np.argmin(test_losses_epochs)
best_epochs = epochs_r[best_epochs_idx]
best_epochs_loss = test_losses_epochs[best_epochs_idx]

print(f"Best embedding_dim: {best_embedding_dim} (test loss: {best_emb_loss:.4f})")
print(f"Best learning_rate: {best_learning_rate} (test loss: {best_lr_loss:.4f})")
print(f"Best number of epochs: {best_epochs} (test loss: {best_epochs_loss:.4f})")

In [None]:
# Train on A1, A2, A3 and test on A4 cf J.Désir, V.Auriau, E. Malherbes paper

assortment1 = np.array([[1,1,0,1,1,0,1,1]])
assortment2 = np.array([[1,0,1,0,1,1,1,1]])
assortment3 = np.array([[0,1,1,1,0,1,1,1]])
assortment4 = np.array([[1,1,0,0,1,1,1,1]])

n_baskets = 1000

trip_dataset_1 = data_gen.generate_trip_dataset(n_baskets,assortment1)
trip_dataset_2 = data_gen.generate_trip_dataset(n_baskets,assortment2)
trip_dataset_3 = data_gen.generate_trip_dataset(n_baskets,assortment3)

paper_trip_dataset_train = trip_dataset_1.concatenate(trip_dataset_2).concatenate(trip_dataset_3)
paper_trip_dataset_test = data_gen.generate_trip_dataset(n_baskets,assortment4)

print("Items distribution in the train dataset (A1,A2,A3)")
visualise_tripdataset_trips(paper_trip_dataset_train,n_items)
print("Items distribution in the test dataset (A4)")
visualise_tripdataset_trips(paper_trip_dataset_test,n_items)

In [None]:
# Instantiate and train the model 2

model2 = AttentionBasedContextEmbedding(
    epochs=best_epochs,
    lr=best_learning_rate,
    embedding_dim=best_embedding_dim,
    n_negative_samples=n_negative_samples)

model2.instantiate(n_items=n_items)

history2 = model2.fit(trip_dataset_train)

In [None]:
get_model_representation(model2, history2, n_items,paper_trip_dataset_test)
print(model2)
loss_paper_train_dataset = model2.evaluate(paper_trip_dataset_train)
loss_paper_test_dataset = model2.evaluate(paper_trip_dataset_test)
print(f"Loss of model2 on the train dataset {loss_paper_train_dataset}")
print(f"Loss of model2 on the test dataset {loss_paper_test_dataset}")

In [None]:
## Test save and load methods

# Create evaluation dataset
eval_dataset = data_gen.generate_trip_dataset(100, assortments_matrix)

# Evaluate model
loss_eval_dataset_1 = model1.evaluate(eval_dataset)
print(f"Loss of model1 on the evaluation dataset {loss_eval_dataset_1}")

# Save model
model1.save_model("attn_model.json")

# Create a model 3 without instantiating
model3 = AttentionBasedContextEmbedding(
    epochs=epochs,
    lr=lr,
    embedding_dim=embedding_dim,
    n_negative_samples=n_negative_samples)

# Load first model and compare results on evaluation dataset
model3.load_model("attn_model.json")
loss_eval_dataset_3 = model3.evaluate(eval_dataset)
print(f"Loss of model3 on the evaluation dataset {loss_eval_dataset_3}")
os.remove("attn_model.json")
os.remove("attn_model_empty_context_embedding.npy")
os.remove("attn_model_wa.npy")
os.remove("attn_model_Wi.npy")
os.remove("attn_model_Wo.npy")