In [None]:
# 1. Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 2. Load preprocessed data
df = pd.read_csv('../data/Groceries_cleaned_dataset2.csv')
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)

# Create baskets (group by user and date)
baskets = df.groupby(['Member_number', 'Date'])['itemDescription'].apply(list).reset_index()

# 3. Load item index mappings (if not already loaded)
all_items = df['itemDescription'].unique()
item2idx = {item: i for i, item in enumerate(all_items)}
idx2item = {i: item for item, i in item2idx.items()}
num_items = len(all_items)

# 4. Load your trained model (make sure this matches your training code)
import tensorflow as tf

class ItemItemModel(tf.keras.Model):
    def __init__(self, num_items, embedding_dim=32):
        super().__init__()
        self.item_emb = tf.keras.layers.Embedding(num_items, embedding_dim,
                                                  embeddings_initializer='he_normal',
                                                  embeddings_regularizer=tf.keras.regularizers.l2(1e-6))

    def call(self, inputs):
        emb1 = self.item_emb(inputs['item1'])
        emb2 = self.item_emb(inputs['item2'])
        dot = tf.reduce_sum(emb1 * emb2, axis=1)
        return tf.nn.sigmoid(dot)

model = ItemItemModel(num_items)
model.compile(loss='binary_crossentropy', optimizer='adam')
model.load_weights('model_weights.h5')  # Adjust if you saved with a different filename

# 5. Define hybrid recommender
def hybrid_recommend(basket, top_k=5):
    basket_idxs = [item2idx[i] for i in basket if i in item2idx]
    if not basket_idxs:
        return []

    emb_scores = {}
    for i in range(num_items):
        if i in basket_idxs:
            continue
        pair_input = {"item1": np.array([i] * len(basket_idxs)),
                      "item2": np.array(basket_idxs)}
        score = model(pair_input).numpy().mean()
        emb_scores[idx2item[i]] = score

    ranked_items = sorted(emb_scores.items(), key=lambda x: x[1], reverse=True)
    recommendations = [item for item, _ in ranked_items[:top_k]]
    return recommendations

# 6. Define metrics
def precision_at_k(actual, predicted, k=5):
    actual_set = set(actual)
    predicted_set = set(predicted[:k])
    return len(actual_set & predicted_set) / float(k)

def recall_at_k(actual, predicted, k=5):
    actual_set = set(actual)
    predicted_set = set(predicted[:k])
    return len(actual_set & predicted_set) / float(len(actual_set)) if actual_set else 0

# 7. Create evaluation set
test_samples = []
for items in baskets['itemDescription']:
    if len(items) >= 2:
        input_items = items[:-1]
        target_item = items[-1]
        test_samples.append((input_items, target_item))

# 8. Evaluate model
top_k = 5
precisions = []
recalls = []

for input_items, target in test_samples:
    preds = hybrid_recommend(input_items, top_k=top_k)
    precisions.append(precision_at_k([target], preds, k=top_k))
    recalls.append(recall_at_k([target], preds, k=top_k))

print(f"Precision@{top_k}: {np.mean(precisions):.4f}")
print(f"Recall@{top_k}: {np.mean(recalls):.4f}")

# 9. Plot Precision & Recall at different k
ks = [1, 3, 5, 10]
precision_scores = []
recall_scores = []

for k in ks:
    p = [precision_at_k([t], hybrid_recommend(i, top_k=k), k) for i, t in test_samples]
    r = [recall_at_k([t], hybrid_recommend(i, top_k=k), k) for i, t in test_samples]
    precision_scores.append(np.mean(p))
    recall_scores.append(np.mean(r))

plt.figure(figsize=(8, 5))
plt.plot(ks, precision_scores, marker='o', label='Precision')
plt.plot(ks, recall_scores, marker='x', label='Recall')
plt.xlabel('k')
plt.ylabel('Score')
plt.title('Precision & Recall at k')
plt.legend()
plt.grid(True)
plt.show()
