# Implementation of an attention-based model for item recommendation.

Wang, Shoujin, Liang Hu, Longbing Cao, Xiaoshui Huang, Defu Lian, and Wei Liu.
"Attention-based transactional context embedding for next-item recommendation."
In Proceedings of the AAAI conference on artificial intelligence, vol. 32, no. 1. 2018.

In [None]:
import os
import sys
import json
import time
from tqdm.notebook import tqdm
import numpy as np
import seaborn as sns
import tensorflow as tf
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

sys.path.append("./../../")
from choice_learn.basket_models import TripDataset
from choice_learn.basket_models.basic_attention_model import AttentionBasedContextEmbedding
from choice_learn.basket_models.synthetic_dataset import SyntheticDataGenerator

### Parameters

In [None]:
lr = 0.05
epochs = 30
n_baskets = 1000
embedding_dim = 4
n_negative_samples = 3
full_assortment_matrix = np.array([[1,1,1,1,1,1,1,1]])
n_items = full_assortment_matrix.shape[1]


### Synthetic Data Generator

In [None]:
data_gen = SyntheticDataGenerator(
    proba_complementary_items=0.7,
    proba_neutral_items=0.3,
    noise_proba=0.15,
    items_nest = {0:[0, 1, 2],
                   1: [3, 4, 5],
                   2: [6],
                   3: [7]},
    nests_interactions = [["", "compl", "neutral", "neutral"],
                          ["compl", "", "neutral", "neutral"],
                          ["neutral", "neutral", "", "neutral"],
                          ["neutral", "neutral", "neutral", ""]])

### Two functions to visualise distributions

* `visualise_tripdataset_trips` : to show on a heatmap the conditional items distribution P(i|j) in a tripdataset
* `get_model_representation` : to show on a heatmap the conditional items distribution P(i|j) when calling a model's predict()

In [None]:
def visualise_tripdataset_trips(dataset, n_items):
    """
    Visualize the conditional probability P(i|j) of items co-occurring in baskets.

    Parameters
    ----------
    dataset : TripDataset
        The dataset containing trips.
    n_items : int
        Number of unique items.
    """
    distribution_matrix = np.zeros((n_items, n_items))
    for trip in dataset.trips:
        basket = trip.purchases
        for i in basket:
            for j in basket:
                if i != j:
                    distribution_matrix[i, j] += 1
    row_sums = distribution_matrix.sum(axis=1, keepdims=True)
    for i in range(len(row_sums)):
        if row_sums[i] != 0:
            distribution_matrix[i] = distribution_matrix[i]/row_sums[i]
    
    return distribution_matrix



def get_model_representation(model, n_items, test_dataset=None, assortment_matrix=None):
    """
    Visualize the model's conditional probability matrix and training loss history.

    Parameters
    ----------
    model : AttentionBasedContextEmbedding
        The trained model.
    hist : dict
        Training history with "train_loss" key.
    n_items : int
        Number of unique items.
    test_dataset : TripDataset, optional
        Dataset for evaluation. If None, uses single-item contexts.
    assortment_matrix : np.ndarray, optional
        Binary matrix indicating available items.
    """
    if assortment_matrix is None:
        assortment_matrix = np.ones((1, n_items), dtype=int)

    if test_dataset is None:
        available_items = assortment_matrix[0]
        contexts = tf.constant([[i] for i in range(n_items)], dtype=tf.int32)
        
    else:
        contexts = []
        for batch in test_dataset.iter_batch(1, data_method="aleacarta"):
            contexts.append(batch[1][0])
        contexts = tf.ragged.constant(
            [row[row != -1] for row in contexts], dtype=tf.int32
        )
        available_items = batch[-1][0]

    context_prediction = model.predict(contexts, available_items=available_items)
    predicted_items = [np.argmax(context_prediction[i]) for i in range(context_prediction.shape[0])]

    if test_dataset is None:
        distribution_matrix = np.stack(context_prediction)
        for i in range(len(available_items)):
            if available_items[i] == 0:
                distribution_matrix[i] *= 0
    else:
        distribution_matrix = np.zeros((n_items, n_items))
        for i in range(contexts.shape[0]):
            for j in contexts[i]:
                distribution_matrix[predicted_items[i], j] += 1

    row_sums = distribution_matrix.sum(axis=1, keepdims=True)
    for i in range(len(row_sums)):
        if row_sums[i] != 0:
            distribution_matrix[i] = distribution_matrix[i]/row_sums[i]

    return distribution_matrix

### Generate full assortments synthetic dataset : train & test

In [None]:
trip_dataset_train = data_gen.generate_trip_dataset(n_baskets,full_assortment_matrix)
trip_dataset_test = data_gen.generate_trip_dataset(n_baskets,full_assortment_matrix)

    
distribution_matrix = visualise_tripdataset_trips(trip_dataset_train,n_items)

plt.figure(figsize=(4, 3))
plt.imshow(distribution_matrix, vmin=0, vmax=1, interpolation='nearest', cmap="coolwarm")
plt.colorbar(label='P(i|j)')
plt.title('Items distribution in the train dataset (A_full)')
plt.xlabel('j')
plt.ylabel('i')
plt.tight_layout()
plt.show()

### Instantiate and train the model 1 on A_full
 -> The model uses the true NCE sampling distribution; items frequencies aware

In [None]:
model1 = AttentionBasedContextEmbedding(
    epochs=epochs,
    lr=lr,
    embedding_dim=embedding_dim,
    n_negative_samples=n_negative_samples)

model1.instantiate(n_items=len(full_assortment_matrix[0]), use_true_nce_distribution=True)
history1 = model1.fit(trip_dataset_train)


### Instantiate and train the model 2 on A_full
-> The model uses a uniform sampling distribution for NCE (1/(n_items-1))

In [None]:
model2 = AttentionBasedContextEmbedding(
    epochs=epochs,
    lr=lr,
    embedding_dim=embedding_dim,
    n_negative_samples=n_negative_samples)

model2.instantiate(n_items=len(full_assortment_matrix[0]), use_true_nce_distribution = False)
history2 = model2.fit(trip_dataset_train)


Model 1 is using the true NCE distribution

The following plot of the evaluation on the test dataset shows P(i|j) in the predictions for the test dataset

In [None]:
M1 = get_model_representation(model1, n_items)
M2 = get_model_representation(model1, n_items, trip_dataset_test)
M3 = visualise_tripdataset_trips(trip_dataset_train, n_items)

fig, axes = plt.subplots(1, 3, figsize=(10, 4), constrained_layout = True)

im1 = axes[0].imshow(M1, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[0].set_title("Model1 evaluated on [[0], [1], ...]")

im2 = axes[1].imshow(M2, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[1].set_title("Model1 evaluated on test_dataset")

im3 = axes[2].imshow(M3, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[2].set_title("Training distribution")

cbar = fig.colorbar(im1, ax=axes, orientation='vertical', shrink=0.8)
cbar.set_label("Probability")

plt.show()

loss_train_dataset = model1.evaluate(trip_dataset_train)
loss_test_dataset = model1.evaluate(trip_dataset_test)
print(f"Loss of model1 on the train dataset {loss_train_dataset}")
print(f"Loss of model1 on the test dataset {loss_test_dataset}")
print("Used loss for evaluation: NLL")

Model 2 is using a uniform sampling distribution for NCE

The following plot of the evaluation on the test dataset shows P(i|j) in the predictions for the test dataset

In [None]:
M1 = get_model_representation(model2, n_items)
M2 = get_model_representation(model2, n_items, trip_dataset_test)
M3 = visualise_tripdataset_trips(trip_dataset_train, n_items)

fig, axes = plt.subplots(1, 3, figsize=(10, 4), constrained_layout = True)

im1 = axes[0].imshow(M1, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[0].set_title("Model2 evaluated on [[0], [1], ...]")

im2 = axes[1].imshow(M2, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[1].set_title("Model2 evaluated on test_dataset")

im3 = axes[2].imshow(M3, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[2].set_title("Training distribution")

cbar = fig.colorbar(im1, ax=axes, orientation='vertical', shrink=0.8)
cbar.set_label("Probability")

plt.show()

loss_train_dataset = model2.evaluate(trip_dataset_train)
loss_test_dataset = model2.evaluate(trip_dataset_test)
print(f"Loss of model2 on the train dataset {loss_train_dataset}")
print(f"Loss of model2 on the test dataset {loss_test_dataset}")
print("Used loss for evaluation: NLL")

### Remarks
* The uniform sampling distribution yields a smaller loss.
* The plot of the P(i|j) distribution, calculated after evaluation on the test dataset, differs from the base model

  Maybe we should focus on P(i|baskets without {i}) instead.

In [None]:
import itertools
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

def plot_Pi_given_not_i(model, n_items, available_items=None, max_context_size=None):
    """
    Plot the average probability P(i | context not containing i) for each item i,
    averaged over all possible contexts that do not include i.

    Parameters
    ----------
    model : object
        The trained model with a .predict() method.
    n_items : int
        Number of unique items.
    available_items : array-like or None, optional
        Binary array indicating available items. If None, all items are available.
    max_context_size : int or None, optional
        Maximum context size to consider for tractability. If None, uses all sizes.
    """
    Pi_given_not_i = np.zeros(n_items)
    counts = np.zeros(n_items)

    for i in range(n_items):
        context_candidates = [j for j in range(n_items) if j != i]
        if max_context_size is not None:
            context_sizes = range(1, max_context_size + 1)
        else:
            context_sizes = range(1, n_items)
        for k in context_sizes:
            for context in itertools.combinations(context_candidates, k):
                context_tensor = tf.ragged.constant([list(context)], dtype=tf.int32)
                avail = np.ones(n_items, dtype=np.float32) if available_items is None else available_items
                probas = model.predict(context_tensor, available_items=avail)
                Pi_given_not_i[i] += probas[0, i]
                counts[i] += 1

    return Pi_given_not_i / np.maximum(counts, 1)

def empirical_Pi_given_not_i(tripdataset, n_items, max_context_size=None):
    """
    Compute empirical P(i | context not containing i) from a TripDataset.

    Parameters
    ----------
    tripdataset : TripDataset
        The dataset containing trips.
    n_items : int
        Number of unique items.
    max_context_size : int or None
        If set, only consider contexts up to this size.

    Returns
    -------
    np.ndarray
        Array of shape (n_items,) with empirical P(i | context not containing i).
    """
    numerators = np.zeros(n_items)
    denominators = np.zeros(n_items)

    for trip in tripdataset.trips:
        basket = list(trip.purchases)
        for idx, target in enumerate(basket):
            context = basket[:idx] + basket[idx+1:]
            if target >= n_items:
                continue
            if max_context_size is not None and len(context) > max_context_size:
                continue
            # For every item, if it is NOT in the context, increment denominator
            for i in range(n_items):
                if i not in context:
                    denominators[i] += 1
                    if target == i:
                        numerators[i] += 1

    return numerators / np.maximum(denominators, 1)

pi_not_i_train_dataset = empirical_Pi_given_not_i(trip_dataset_train, n_items)
pi_not_i_model1 = plot_Pi_given_not_i(model1, n_items)
pi_not_i_model2 = plot_Pi_given_not_i(model2, n_items)

fig, axes = plt.subplots(1, 3, figsize=(10, 4))
axes[0].bar(np.arange(n_items), pi_not_i_model1)
axes[0].set_ylabel(r"Average $P(i\,|\,\mathrm{context\ not\ containing}\ i)$")
axes[0].set_title("Model 1")

axes[1].bar(np.arange(n_items), pi_not_i_model2)
axes[1].set_title("Model 2")

axes[2].bar(np.arange(n_items), pi_not_i_train_dataset)
axes[2].set_title("Train dataset")

fig.supxlabel("Item index")
plt.tight_layout()
plt.show()

### Train on A1, A2, A3 and test on A4 cf J.Désir, V.Auriau, E. Malherbes paper


In [None]:
# Assortments definition
assortment1 = np.array([[1,1,0,1,1,1,1,1]])
assortment2 = np.array([[1,0,1,0,1,1,1,1]])
assortment3 = np.array([[0,1,1,1,0,1,1,1]])
assortment4 = np.array([[1,1,0,0,1,1,1,1]])
assortment_full = np.array([[1,1,1,1,1,1,1,1]])

n_baskets = 500

trip_dataset_1 = data_gen.generate_trip_dataset(n_baskets,assortment1)
trip_dataset_2 = data_gen.generate_trip_dataset(n_baskets,assortment2)
trip_dataset_3 = data_gen.generate_trip_dataset(n_baskets,assortment3)


paper_trip_dataset_train = trip_dataset_1.concatenate(trip_dataset_2).concatenate(trip_dataset_3)
paper_trip_dataset_test_a4 = data_gen.generate_trip_dataset(10*n_baskets,assortment4)
paper_trip_dataset_test_full = data_gen.generate_trip_dataset(10*n_baskets,assortment_full)


M1 = visualise_tripdataset_trips(paper_trip_dataset_train,n_items)
M2 = visualise_tripdataset_trips(paper_trip_dataset_test_a4,n_items)
M3 = visualise_tripdataset_trips(paper_trip_dataset_test_full,n_items)

fig, axes = plt.subplots(1, 3, figsize=(10, 4), constrained_layout = True)

im1 = axes[0].imshow(M1, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[0].set_title("Train dataset (A1,A2,A3)")

im2 = axes[1].imshow(M2, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[1].set_title("Test dataset (A4)")

im3 = axes[2].imshow(M3, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[2].set_title("Test dataset (A_full)")

cbar = fig.colorbar(im1, ax=axes, orientation='vertical', shrink=0.8)
cbar.set_label("Probability")

plt.show()

### Instantiate and train model 3 on A1,A2,A3 with uniform sampling for NCE

In [None]:
model3 = AttentionBasedContextEmbedding(
    epochs=50,
    lr=0.05,
    embedding_dim=3,
    n_negative_samples=3)

model3.instantiate(n_items=n_items,use_true_nce_distribution = False)
history3 = model3.fit(paper_trip_dataset_train)

In [None]:
M1 = get_model_representation(model3, n_items, assortment_matrix = [[1,1,0,0,1,1,1,1]])
M2 = get_model_representation(model3, n_items)
M3 = visualise_tripdataset_trips(paper_trip_dataset_train,n_items)


fig, axes = plt.subplots(1, 3, figsize=(10, 4), constrained_layout = True)

im1 = axes[0].imshow(M3, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[0].set_title("Train dataset (A1,A2,A3)")

im2 = axes[1].imshow(M1, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[1].set_title("Model3 evaluated on A4")

im3 = axes[2].imshow(M2, vmin=0.0, vmax=1, interpolation='nearest', cmap="coolwarm")
axes[2].set_title("Model3 evaluated on Afull")


cbar = fig.colorbar(im1, ax=axes, orientation='vertical', shrink=0.8)
cbar.set_label("Probability")

plt.show()

loss_paper_train_dataset = model3.evaluate(paper_trip_dataset_train)
loss_paper_test_dataset_a4 = model3.evaluate(paper_trip_dataset_test_a4)
loss_paper_test_dataset_afull = model3.evaluate(paper_trip_dataset_test_full)
print(f"Loss of model3 on the train dataset {loss_paper_train_dataset}")
print(f"Loss of model3 on the test dataset A4 {loss_paper_test_dataset_a4}")
print(f"Loss of model2 on the test dataset Afull {loss_paper_test_dataset_afull}")
print("Used loss for evaluation : NLL")

In [None]:
pi_not_i_train_dataset = empirical_Pi_given_not_i(trip_dataset_train, n_items)
pi_not_i_model3_a4 = plot_Pi_given_not_i(model3, n_items, available_items = [1,1,0,0,1,1,1,1])
pi_not_i_model3_full = plot_Pi_given_not_i(model3, n_items)

fig, axes = plt.subplots(1, 3, figsize=(10, 4))
axes[0].bar(np.arange(n_items), pi_not_i_model3_full)
axes[0].set_ylabel(r"Average $P(i\,|\,\mathrm{context\ not\ containing}\ i)$")
axes[0].set_title("Model 3 on Afull")

axes[1].bar(np.arange(n_items), pi_not_i_model3_a4)
axes[1].set_title("Model 3 on A4")

axes[2].bar(np.arange(n_items), pi_not_i_train_dataset)
axes[2].set_title("Train dataset")

fig.supxlabel("Item index")
plt.tight_layout()
plt.show()

### Test save and load methods

In [None]:
# Create evaluation dataset
eval_dataset = data_gen.generate_trip_dataset(100, full_assortment_matrix)

# Evaluate model
loss_eval_dataset_1 = model1.evaluate(eval_dataset)
print(f"Loss of model1 on the evaluation dataset {loss_eval_dataset_1}")

# Save model
model1.save_model("attn_model.json")

# Create a model 3 without instantiating
model3 = AttentionBasedContextEmbedding(
    epochs=epochs,
    lr=lr,
    embedding_dim=embedding_dim,
    n_negative_samples=n_negative_samples)

# Load first model and compare results on evaluation dataset
model3.load_model("attn_model.json")
loss_eval_dataset_3 = model3.evaluate(eval_dataset)
print(f"Loss of model3 on the evaluation dataset {loss_eval_dataset_3}")
os.remove("attn_model.json")
os.remove("attn_model_empty_context_embedding.npy")
os.remove("attn_model_wa.npy")
os.remove("attn_model_Wi.npy")
os.remove("attn_model_Wo.npy")

# Why is True NCE not as good as we predicted?

In [None]:
def calculate_hit_rate(predictions, targets, k=10):
    """Calculate Hit Rate @ K"""
    hit_count = 0
    for i, target in enumerate(targets):
        top_k_items = np.argsort(predictions[i])[-k:]
        if target in top_k_items:
            hit_count += 1
    return hit_count / len(targets)

def calculate_mrr(predictions, targets, k=10):
    """Calculate Mean Reciprocal Rank @ K"""
    mrr_sum = 0
    for i, target in enumerate(targets):
        top_k_items = np.argsort(predictions[i])[-k:][::-1]  # Sort descending
        try:
            rank = list(top_k_items).index(target) + 1
            mrr_sum += 1.0 / rank
        except ValueError:
            pass  # Target not in top-k
    return mrr_sum / len(targets)

def calculate_ndcg(predictions, targets, k=10):
    """Calculate Normalized Discounted Cumulative Gain @ K"""
    ndcg_sum = 0
    for i, target in enumerate(targets):
        top_k_items = np.argsort(predictions[i])[-k:][::-1]  # Sort descending
        try:
            rank = list(top_k_items).index(target) + 1
            dcg = 1.0 / np.log2(rank + 1)
            idcg = 1.0  # Perfect ranking for binary relevance
            ndcg_sum += dcg / idcg
        except ValueError:
            pass  # Target not in top-k
    return ndcg_sum / len(targets)

def evaluate_model_comprehensive(model, test_dataset, k_values=[1, 5, 10, 20]):
    """Comprehensive evaluation of the model"""
    results = {}
    all_predictions = []
    all_targets = []
    
    print("Generating predictions...")
    for batch in test_dataset.iter_batch(batch_size=100, shuffle=False, data_method="aleacarta"):
        target_items, context_items, _, _, _, _, available_items = batch
        
        # Convert to proper format for prediction
        context_ragged = tf.ragged.constant([row[row != -1] for row in context_items], dtype=tf.int32)
        predictions = model.predict(context_ragged, available_items)
        
        all_predictions.extend(predictions)
        all_targets.extend(target_items)
    
    all_predictions = np.array(all_predictions)
    all_targets = np.array(all_targets)
    
    print(f"Evaluating {len(all_targets)} predictions...")
    
    # Calculate metrics for different k values
    for k in k_values:
        results[f'hit_rate@{k}'] = calculate_hit_rate(all_predictions, all_targets, k)
        results[f'mrr@{k}'] = calculate_mrr(all_predictions, all_targets, k)
        results[f'ndcg@{k}'] = calculate_ndcg(all_predictions, all_targets, k)
    
    return results, all_predictions, all_targets

In [None]:
def create_synthetic_dataset(n_items=50, n_baskets_train=4000, n_baskets_test=500):
    """Create synthetic dataset for evaluation"""
    
    # Define item nests (product categories)
    items_nest = {
        0: list(range(0, 10)),      # Electronics
        1: list(range(10, 20)),     # Clothing  
        2: list(range(20, 30)),     # Food
        3: list(range(30, 40)),     # Books
        4: list(range(40, 50))      # Sports
    }
    
    # Define nest interactions (complementary, neutral relationships)
    nests_interactions = [
        ["", "neutral", "neutral", "compl", "neutral"],      # Electronics
        ["neutral", "", "compl", "neutral", "compl"],        # Clothing
        ["neutral", "compl", "", "neutral", "neutral"],      # Food  
        ["compl", "neutral", "neutral", "", "neutral"],      # Books
        ["neutral", "compl", "neutral", "neutral", ""]       # Sports
    ]
    
    # Create data generator
    generator = SyntheticDataGenerator(
        items_nest=items_nest,
        nests_interactions=nests_interactions,
        proba_complementary_items=0.7,
        proba_neutral_items=0.3,
        noise_proba=0.1,
        plant_seed=42
    )
    
    # Create assortment matrix (all items available in single assortment for simplicity)
    assortments_matrix = np.ones((1, n_items), dtype=int)
    
    # Generate training dataset
    print("Generating training dataset...")
    train_dataset = generator.generate_trip_dataset(
        n_baskets=n_baskets_train, 
        assortments_matrix=assortments_matrix,
        len_basket=None
    )
    
    # Generate test dataset
    print("Generating test dataset...")
    test_dataset = generator.generate_trip_dataset(
        n_baskets=n_baskets_test, 
        assortments_matrix=assortments_matrix,
        len_basket=None
    )
    
    print(f"Train dataset: {len(train_dataset)} trips")
    print(f"Test dataset: {len(test_dataset)} trips")
    print(f"Max basket length: {train_dataset.max_length}")
    print(f"Number of items: {train_dataset.n_items}")
    
    return train_dataset, test_dataset

# Create datasets
train_data, test_data = create_synthetic_dataset()


In [None]:
def train_and_evaluate_model(train_dataset, test_dataset, use_true_nce=True, model_name="model"):
    """Train and evaluate a single model configuration"""
    
    print(f"\n{'='*60}")
    print(f"Training {model_name} (use_true_nce_distribution={use_true_nce})")
    print(f"{'='*60}")
    
    # Model hyperparameters
    config = {
        'epochs': 40,
        'lr': 0.001,
        'embedding_dim': 64,
        'n_negative_samples': 10,
        'batch_size': 64,
        'optimizer': 'Adam'
    }
    
    print(f"Config: {config}")
    
    # Initialize model
    model = AttentionBasedContextEmbedding(**config)
    
    # Instantiate model
    model.instantiate(
        n_items=train_dataset.n_items,
        use_true_nce_distribution=use_true_nce
    )
    
    print(f"Model instantiated with {train_dataset.n_items} items")
    
    # Train model
    start_time = time.time()
    history = model.fit(train_dataset)
    training_time = time.time() - start_time
    
    
    # Evaluate model
    start_time = time.time()
    results, predictions, targets = evaluate_model_comprehensive(model, test_dataset)
    eval_time = time.time() - start_time
        
    # Print results
    print(f"\nResults for {model_name}:")
    for metric, value in results.items():
        print(f"  {metric}: {value:.4f}")
    
    return {
        'model': model,
        'history': history,
        'results': results,
        'predictions': predictions,
        'targets': targets,
        'config': config,
        'training_time': training_time,
        'eval_time': eval_time
    }

print("Training function defined!")


In [None]:
# Train model with true NCE distribution
results_true_nce = train_and_evaluate_model(
    train_data, test_data, 
    use_true_nce=True, 
    model_name="Model_TRUE_NCE"
)

# Train model with uniform NCE distribution  
results_false_nce = train_and_evaluate_model(
    train_data, test_data, 
    use_true_nce=False, 
    model_name="Model_UNIFORM_NCE"
)

print("\n" + "="*80)
print("TRAINING COMPLETED FOR BOTH MODELS")
print("="*80)


In [None]:
def compare_results(results_true, results_false):
    """Create detailed comparison of results"""
    
    print("\n" + "="*40)
    print("DETAILED RESULTS COMPARISON")
    print("="*40)
    
    # Create comparison DataFrame
    metrics = ['hit_rate@1', 'hit_rate@5', 'hit_rate@10', 'hit_rate@20',
               'mrr@1', 'mrr@5', 'mrr@10', 'mrr@20',
               'ndcg@1', 'ndcg@5', 'ndcg@10', 'ndcg@20']
    
    comparison_data = []
    for metric in metrics:
        true_val = results_true['results'][metric]
        false_val = results_false['results'][metric]
        improvement = ((true_val - false_val) / false_val * 100) if false_val > 0 else 0
        
        comparison_data.append({
            'Metric': metric,
            'True_NCE': true_val,
            'Uniform_NCE': false_val,
            'Improvement_%': improvement
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    
    print("\nMetrics Comparison:")
    print(comparison_df.round(4))
    
    # Training time comparison
    print(f"\nTraining Time Comparison:")
    print(f"True NCE: {results_true['training_time']:.2f} seconds")
    print(f"Uniform NCE: {results_false['training_time']:.2f} seconds")
    print(f"Time difference: {results_true['training_time'] - results_false['training_time']:.2f} seconds")
    
    return comparison_df

comparison_df = compare_results(results_true_nce, results_false_nce)

In [None]:
# Create comprehensive metrics comparison plots
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Hit Rate comparison
k_values = [1, 5, 10, 20]
hit_rates_true = [results_true_nce['results'][f'hit_rate@{k}'] for k in k_values]
hit_rates_false = [results_false_nce['results'][f'hit_rate@{k}'] for k in k_values]

axes[0,0].bar(np.arange(len(k_values)) - 0.2, hit_rates_true, 0.4, 
              label='True NCE', color='blue', alpha=0.7)
axes[0,0].bar(np.arange(len(k_values)) + 0.2, hit_rates_false, 0.4, 
              label='Uniform NCE', color='red', alpha=0.7)
axes[0,0].set_xlabel('K Value')
axes[0,0].set_ylabel('Hit Rate @ K')
axes[0,0].set_title('Hit Rate Comparison')
axes[0,0].set_xticks(range(len(k_values)))
axes[0,0].set_xticklabels(k_values)
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)

# MRR comparison
mrr_true = [results_true_nce['results'][f'mrr@{k}'] for k in k_values]
mrr_false = [results_false_nce['results'][f'mrr@{k}'] for k in k_values]

axes[0,1].bar(np.arange(len(k_values)) - 0.2, mrr_true, 0.4, 
              label='True NCE', color='blue', alpha=0.7)
axes[0,1].bar(np.arange(len(k_values)) + 0.2, mrr_false, 0.4, 
              label='Uniform NCE', color='red', alpha=0.7)
axes[0,1].set_xlabel('K Value')
axes[0,1].set_ylabel('MRR @ K')
axes[0,1].set_title('Mean Reciprocal Rank Comparison')
axes[0,1].set_xticks(range(len(k_values)))
axes[0,1].set_xticklabels(k_values)
axes[0,1].legend()
axes[0,1].grid(True, alpha=0.3)

# NDCG comparison
ndcg_true = [results_true_nce['results'][f'ndcg@{k}'] for k in k_values]
ndcg_false = [results_false_nce['results'][f'ndcg@{k}'] for k in k_values]

axes[1,0].bar(np.arange(len(k_values)) - 0.2, ndcg_true, 0.4, 
              label='True NCE', color='blue', alpha=0.7)
axes[1,0].bar(np.arange(len(k_values)) + 0.2, ndcg_false, 0.4, 
              label='Uniform NCE', color='red', alpha=0.7)
axes[1,0].set_xlabel('K Value')
axes[1,0].set_ylabel('NDCG @ K')
axes[1,0].set_title('NDCG Comparison')
axes[1,0].set_xticks(range(len(k_values)))
axes[1,0].set_xticklabels(k_values)
axes[1,0].legend()
axes[1,0].grid(True, alpha=0.3)

# Improvement percentage
improvements = []
for k in k_values:
    hr_imp = ((hit_rates_true[k_values.index(k)] - hit_rates_false[k_values.index(k)]) / 
              hit_rates_false[k_values.index(k)] * 100) if hit_rates_false[k_values.index(k)] > 0 else 0
    improvements.append(hr_imp)

axes[1,1].bar(range(len(k_values)), improvements, color='green', alpha=0.7)
axes[1,1].set_xlabel('K Value')
axes[1,1].set_ylabel('Improvement (%)')
axes[1,1].set_title('Hit Rate Improvement: True NCE vs Uniform NCE')
axes[1,1].set_xticks(range(len(k_values)))
axes[1,1].set_xticklabels(k_values)
axes[1,1].grid(True, alpha=0.3)
axes[1,1].axhline(y=0, color='black', linestyle='--', alpha=0.5)

plt.tight_layout()
plt.show()

In [None]:
def analyze_model_embeddings(model_true, model_false, train_dataset):
    """Analyze and compare learned embeddings"""
    
    print("\n" + "="*80)
    print("MODEL INTERPRETABILITY ANALYSIS")
    print("="*80)
    
    # Get embedding matrices
    Wi_true = model_true.Wi.numpy()
    Wo_true = model_true.Wo.numpy()
    Wi_false = model_false.Wi.numpy() 
    Wo_false = model_false.Wo.numpy()
    
    print(f"Input embedding dimensions: {Wi_true.shape}")
    print(f"Output embedding dimensions: {Wo_true.shape}")
    
    # Analyze embedding magnitudes
    print("\nEmbedding Magnitude Analysis:")
    print(f"True NCE - Input embeddings mean norm: {np.mean(np.linalg.norm(Wi_true, axis=1)):.4f}")
    print(f"True NCE - Output embeddings mean norm: {np.mean(np.linalg.norm(Wo_true, axis=1)):.4f}")
    print(f"Uniform NCE - Input embeddings mean norm: {np.mean(np.linalg.norm(Wi_false, axis=1)):.4f}")
    print(f"Uniform NCE - Output embeddings mean norm: {np.mean(np.linalg.norm(Wo_false, axis=1)):.4f}")
    
    # Analyze embedding similarity
    from sklearn.metrics.pairwise import cosine_similarity
    
    # Calculate pairwise similarities
    sim_Wi_true = cosine_similarity(Wi_true)
    sim_Wo_true = cosine_similarity(Wo_true)
    sim_Wi_false = cosine_similarity(Wi_false)
    sim_Wo_false = cosine_similarity(Wo_false)
    
    print(f"\nEmbedding Similarity Analysis:")
    print(f"True NCE - Input embeddings mean similarity: {np.mean(sim_Wi_true[np.triu_indices(len(sim_Wi_true), k=1)]):.4f}")
    print(f"True NCE - Output embeddings mean similarity: {np.mean(sim_Wo_true[np.triu_indices(len(sim_Wo_true), k=1)]):.4f}")
    print(f"Uniform NCE - Input embeddings mean similarity: {np.mean(sim_Wi_false[np.triu_indices(len(sim_Wi_false), k=1)]):.4f}")
    print(f"Uniform NCE - Output embeddings mean similarity: {np.mean(sim_Wo_false[np.triu_indices(len(sim_Wo_false), k=1)]):.4f}")
    
    # Visualize embedding similarities
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # True NCE embeddings
    im1 = axes[0,0].imshow(sim_Wi_true, cmap='viridis', vmin=-1, vmax=1)
    axes[0,0].set_title('True NCE - Input Embeddings Similarity')
    axes[0,0].set_xlabel('Item ID')
    axes[0,0].set_ylabel('Item ID')
    plt.colorbar(im1, ax=axes[0,0])
    
    im2 = axes[0,1].imshow(sim_Wo_true, cmap='viridis', vmin=-1, vmax=1)
    axes[0,1].set_title('True NCE - Output Embeddings Similarity')
    axes[0,1].set_xlabel('Item ID')
    axes[0,1].set_ylabel('Item ID')
    plt.colorbar(im2, ax=axes[0,1])
    
    # Uniform NCE embeddings
    im3 = axes[1,0].imshow(sim_Wi_false, cmap='viridis', vmin=-1, vmax=1)
    axes[1,0].set_title('Uniform NCE - Input Embeddings Similarity')
    axes[1,0].set_xlabel('Item ID')
    axes[1,0].set_ylabel('Item ID')
    plt.colorbar(im3, ax=axes[1,0])
    
    im4 = axes[1,1].imshow(sim_Wo_false, cmap='viridis', vmin=-1, vmax=1)
    axes[1,1].set_title('Uniform NCE - Output Embeddings Similarity')
    axes[1,1].set_xlabel('Item ID')
    axes[1,1].set_ylabel('Item ID')
    plt.colorbar(im4, ax=axes[1,1])
    
    plt.tight_layout()
    plt.show()
    
    return {
        'Wi_true': Wi_true, 'Wo_true': Wo_true,
        'Wi_false': Wi_false, 'Wo_false': Wo_false,
        'similarities': {
            'Wi_true': sim_Wi_true, 'Wo_true': sim_Wo_true,
            'Wi_false': sim_Wi_false, 'Wo_false': sim_Wo_false
        }
    }

embedding_analysis = analyze_model_embeddings(
    results_true_nce['model'], 
    results_false_nce['model'], 
    train_data
)

In [None]:
def generate_final_report(results_true, results_false, significance_results):
    """Generate comprehensive final report"""
    
    print("\n" + "="*100)
    print("FINAL EVALUATION REPORT: ATTENTION-BASED MODEL COMPARISON")
    print("="*100)
    
    print("\n📊 EXECUTIVE SUMMARY")
    print("-" * 50)
    
    # Best performing model
    hr10_true = results_true['results']['hit_rate@10']
    hr10_false = results_false['results']['hit_rate@10']
    best_model = "True NCE Distribution" if hr10_true > hr10_false else "Uniform NCE Distribution"
    improvement = abs(hr10_true - hr10_false) / min(hr10_true, hr10_false) * 100
    
    print(f"Best performing model: {best_model}")
    print(f"Performance improvement: {improvement:.2f}%")
    print(f"Statistical significance: {'Yes' if significance_results['paired_t_test']['p_value'] < 0.05 else 'No'}")
    
    print("\n📈 KEY METRICS COMPARISON")
    print("-" * 50)
    
    metrics_summary = [
        ("Hit Rate@10", hr10_true, hr10_false),
        ("MRR@10", results_true['results']['mrr@10'], results_false['results']['mrr@10']),
        ("NDCG@10", results_true['results']['ndcg@10'], results_false['results']['ndcg@10'])
    ]
    
    for metric, true_val, false_val in metrics_summary:
        better = "TRUE NCE" if true_val > false_val else "UNIFORM NCE"
        diff = abs(true_val - false_val)
        print(f"{metric:12} | True NCE: {true_val:.4f} | Uniform NCE: {false_val:.4f} | Best: {better} (+{diff:.4f})")
    
    print("\n⏱️  EFFICIENCY COMPARISON")
    print("-" * 50)
    print(f"Training time (True NCE):    {results_true['training_time']:.2f} seconds")
    print(f"Training time (Uniform NCE): {results_false['training_time']:.2f} seconds")
    print(f"Time overhead: {results_true['training_time'] - results_false['training_time']:.2f} seconds")
    
    print("\n🔬 STATISTICAL ANALYSIS")
    print("-" * 50)
    print(f"P-value (paired t-test): {significance_results['paired_t_test']['p_value']:.6f}")

    
    
    print("\n" + "="*100)

# Generate final report
generate_final_report(results_true_nce, results_false_nce, significance_results)