## **Get the data and take a subset**

In [1]:
# Import packages
import pandas as pd
import numpy as np
import io
import matplotlib.pyplot as plt
import seaborn as sns
import math
from scipy.sparse import csr_matrix
#
from contextlib import redirect_stdout


In [2]:
# Set display option to show full content of columns
pd.options.display.max_colwidth = None

# Turn off scientific notation for pandas DataFrames
pd.options.display.float_format = '{:.2f}'.format

In [3]:
# Take the dataframes
# file_reviews = '/content/drive/MyDrive/Recommendation Engine BP/Home_and_Kitchen_filtered.csv'
file_reviews = '../data/Home_and_Kitchen_filtered.csv'
df_reviews = pd.read_csv(file_reviews)

file_items = '../data/meta_Home_and_Kitchen_filtered.csv'
df_items = pd.read_csv(file_items)

  df_reviews = pd.read_csv(file_reviews)


In [4]:
# Merge reviews with items using left_join
df_combined = df_reviews.merge(df_items, left_on='asin', right_on='asin', how = 'left')

# Convert 'unixReviewTime' to datetime format
df_combined['unixReviewTime'] = pd.to_datetime(df_combined['unixReviewTime'], unit='s')

In [5]:
# Take the subset to analys
df_subset = df_combined[['overall', 'reviewerID', 'asin', 'unixReviewTime']]
df_subset.drop_duplicates(inplace=True)

In [6]:
df_subset.head()

Unnamed: 0,overall,reviewerID,asin,unixReviewTime
0,5.0,A8LUWTIPU9CZB,560467893,2015-11-05
1,3.0,A3B6GKQQ1JJ167,560467893,2015-05-07
2,5.0,A3MCTN65BU7XRA,681795107,2014-01-22
3,1.0,A7JVZFSXVY9RL,681795107,2013-10-30
4,1.0,A2RQ7VLAK1SHPU,681795107,2013-09-20


In [7]:
df_subset.shape

(6646597, 4)

## **Codes for bpr**

In [14]:
import functions.interaction_matrix as interaction_matrix
from functions.bpr import BPRRecommender

In [15]:
# --- Example to test ---
user_id = 'A8LUWTIPU9CZB'
cutoff_time = timestamp=pd.Timestamp('2016-11-05')


In [16]:
# Initialize matrix builder and knn
builder = interaction_matrix.InteractionMatrixBuilder(min_users=5, cutoff_time=cutoff_time)
matrix = builder.build(df_subset)


In [11]:
# Initialize BPR model with hyperparameters
# Adjust these values when tuning: factors, lr, reg, epochs, batch_size
bpr = BPRRecommender(factors=64, lr=0.01, reg=0.01, epochs=5, batch_size=1024)

# Train the model using the interaction matrix from InteractionMatrixBuilder
bpr.fit(
    matrix=matrix,
    item_map=builder.item_map,
    items=builder.items,
    user_map=builder.user_map,
    users=builder.users
)

Training on Apple MPS (GPU)
Epoch 1/5 — BPR loss: 0.6931
Epoch 2/5 — BPR loss: 0.6931
Epoch 3/5 — BPR loss: 0.6931


KeyboardInterrupt: 

In [None]:
# Get the user's purchase history before the cutoff date
user_history = df_subset[
    (df_subset['reviewerID'] == user_id) &
    (df_subset['unixReviewTime'] < cutoff_time)
]['asin'].tolist()

print(f"User {user_id} has {len(user_history)} purchases before cutoff")

# Generate top 10 recommendations
recommendations = bpr.recommend(user_id=user_id, user_history=user_history, top_n=10)
print(f"\nTop 10 BPR recommendations:")
for i, item in enumerate(recommendations, 1):
    print(f"  {i}. {item}")

In [None]:
# Evaluate BPR recommendations against actual purchases after the cutoff date
from functions.rs_evaluation import (
    calculate_precision_at_k,
    calculate_recall_at_k,
    calculate_hit_at_k,
    calculate_ndcg_at_k
)

# Get the user's actual purchases after the cutoff date (ground truth)
actual_purchases = df_subset[
    (df_subset['reviewerID'] == user_id) &
    (df_subset['unixReviewTime'] >= cutoff_time)
]['asin'].tolist()

k = 10
print(f"Actual purchases after cutoff: {len(actual_purchases)}")
print(f"\nBPR Evaluation @{k}:")
print(f"  Precision@{k}: {calculate_precision_at_k(recommendations, actual_purchases, k):.4f}")
print(f"  Recall@{k}:    {calculate_recall_at_k(recommendations, actual_purchases, k):.4f}")
print(f"  Hit@{k}:       {calculate_hit_at_k(recommendations, actual_purchases, k)}")
print(f"  NDCG@{k}:      {calculate_ndcg_at_k(recommendations, actual_purchases, k):.4f}")

## **Codes for bpr (implicit package)**

In [10]:
from functions.bpr_implicit_package import BPRImplicitRecommender

In [17]:
# Initialize BPR model (implicit package) with hyperparameters
# Adjust these values when tuning: factors, lr, reg, epochs
bpr_implicit = BPRImplicitRecommender(factors=64, lr=0.01, reg=0.01, epochs=10)

# Train the model using the same interaction matrix from InteractionMatrixBuilder
bpr_implicit.fit(
    matrix=matrix,
    item_map=builder.item_map,
    items=builder.items,
    user_map=builder.user_map,
    users=builder.users
)

100%|██████████| 10/10 [00:11<00:00,  1.13s/it, train_auc=50.00%, skipped=0.06%]


<functions.bpr_implicit_package.BPRImplicitRecommender at 0x33a3a2c10>

In [18]:
# Get the user's purchase history before the cutoff date
user_history = df_subset[
    (df_subset['reviewerID'] == user_id) &
    (df_subset['unixReviewTime'] < cutoff_time)
]['asin'].tolist()

print(f"User {user_id} has {len(user_history)} purchases before cutoff")

# Generate top 10 recommendations
recommendations_implicit = bpr_implicit.recommend(user_id=user_id, user_history=user_history, top_n=10)
print(f"\nTop 10 BPR (implicit) recommendations:")
for i, item in enumerate(recommendations_implicit, 1):
    print(f"  {i}. {item}")

User A8LUWTIPU9CZB has 5 purchases before cutoff

Top 10 BPR (implicit) recommendations:
  1. B00AYR9AQO
  2. B000YJDIOM
  3. B008YRQ898
  4. B011NG8WVS
  5. B000GBLV42
  6. B000VBC0YM
  7. B000AKRTWM
  8. B00169V55I
  9. B00V3PU6QE
  10. B00MUA5KDK


In [20]:
# Evaluate BPR (implicit) recommendations against actual purchases after the cutoff date
# Evaluate BPR recommendations against actual purchases after the cutoff date
from functions.rs_evaluation import (
    calculate_precision_at_k,
    calculate_recall_at_k,
    calculate_hit_at_k,
    calculate_ndcg_at_k
)

actual_purchases = df_subset[
    (df_subset['reviewerID'] == user_id) &
    (df_subset['unixReviewTime'] >= cutoff_time)
]['asin'].tolist()

k = 10
print(f"Actual purchases after cutoff: {len(actual_purchases)}")
print(f"\nBPR (implicit) Evaluation @{k}:")
print(f"  Precision@{k}: {calculate_precision_at_k(recommendations_implicit, actual_purchases, k):.4f}")
print(f"  Recall@{k}:    {calculate_recall_at_k(recommendations_implicit, actual_purchases, k):.4f}")
print(f"  Hit@{k}:       {calculate_hit_at_k(recommendations_implicit, actual_purchases, k)}")
print(f"  NDCG@{k}:      {calculate_ndcg_at_k(recommendations_implicit, actual_purchases, k):.4f}")

Actual purchases after cutoff: 4

BPR (implicit) Evaluation @10:
  Precision@10: 0.0000
  Recall@10:    0.0000
  Hit@10:       0
  NDCG@10:      0.0000
