In [1]:
# Import packages
import pandas as pd
import numpy as np
import io
import matplotlib.pyplot as plt
import seaborn as sns
import math
from scipy.sparse import csr_matrix
#
from contextlib import redirect_stdout

In [2]:
# Set display option to show full content of columns
pd.options.display.max_colwidth = None

# Turn off scientific notation for pandas DataFrames
pd.options.display.float_format = '{:.2f}'.format

In [3]:
# Take the dataframes
# file_reviews = '/content/drive/MyDrive/Recommendation Engine BP/Home_and_Kitchen_filtered.csv'
file_reviews = '../data/Home_and_Kitchen_filtered.csv'
df_reviews = pd.read_csv(file_reviews)

file_items = '../data/meta_Home_and_Kitchen_filtered.csv'
df_items = pd.read_csv(file_items)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [4]:
# Merge reviews with items using left_join
df_combined = df_reviews.merge(df_items, left_on='asin', right_on='asin', how = 'left')

# Convert 'unixReviewTime' to datetime format
df_combined['unixReviewTime'] = pd.to_datetime(df_combined['unixReviewTime'], unit='s')

In [5]:
# Take the subset to analys
df_subset = df_combined[['overall', 'reviewerID', 'asin', 'unixReviewTime']]
df_subset.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_subset.drop_duplicates(inplace=True)


### Testing IALS model

In [8]:
import sys
from pathlib import Path
sys.path.append(str(Path().resolve().parent))
import functions.interaction_matrix as interaction_matrix
from functions.ials_implicit_package import IALSImplicitRecommender

In [9]:
# --- Example to test ---
user_id = 'A8LUWTIPU9CZB'
cutoff_time = timestamp=pd.Timestamp('2016-11-05')

In [10]:
# Initialize matrix builder and knn
builder = interaction_matrix.InteractionMatrixBuilder(min_users=5, cutoff_time=cutoff_time)
matrix = builder.build(df_subset)

In [13]:
# Initialize IALS model with hyperparameters
# Adjust these values when tuning: factors, regularization, alpha, epochs
ials = IALSImplicitRecommender(factors=64, regularization=0.01, alpha=40, epochs=5)

# Train the model using the interaction matrix from InteractionMatrixBuilder
ials.fit(
    matrix=matrix,
    item_map=builder.item_map,
    items=builder.items,
    user_map=builder.user_map,
    users=builder.users
)

100%|██████████| 5/5 [00:27<00:00,  5.57s/it]


<functions.ials_implicit_package.IALSImplicitRecommender at 0x7fdc86655850>

In [15]:
# Get the user's purchase history before the cutoff date
user_history = df_subset[
    (df_subset['reviewerID'] == user_id) &
    (df_subset['unixReviewTime'] < cutoff_time)
]['asin'].tolist()

print(f"User {user_id} has {len(user_history)} purchases before cutoff")

# Generate top 10 recommendations
recommendations = ials.recommend(user_id=user_id, user_history=user_history, top_n=10)
print(f"\nTop 10 IALS recommendations:")
for i, item in enumerate(recommendations, 1):
    print(f"  {i}. {item}")

User A8LUWTIPU9CZB has 5 purchases before cutoff

Top 10 IALS recommendations:
  1. B007WQ9YNE
  2. B00AYULZEQ
  3. B0060M5TJY
  4. B000VENLF6
  5. B01EJ2GM1K
  6. B000079XW2
  7. B002S52ZDU
  8. B000X7GF40
  9. B007ZF638G
  10. B00T6TKYPC


In [16]:
# Evaluate IALS recommendations against actual purchases after the cutoff date
from functions.rs_evaluation import (
    calculate_precision_at_k,
    calculate_recall_at_k,
    calculate_hit_at_k,
    calculate_ndcg_at_k
)

# Get the user's actual purchases after the cutoff date (ground truth)
actual_purchases = df_subset[
    (df_subset['reviewerID'] == user_id) &
    (df_subset['unixReviewTime'] >= cutoff_time)
]['asin'].tolist()

k = 10
print(f"Actual purchases after cutoff: {len(actual_purchases)}")
print(f"\nIALS Evaluation @{k}:")
print(f"  Precision@{k}: {calculate_precision_at_k(recommendations, actual_purchases, k):.4f}")
print(f"  Recall@{k}:    {calculate_recall_at_k(recommendations, actual_purchases, k):.4f}")
print(f"  Hit@{k}:       {calculate_hit_at_k(recommendations, actual_purchases, k)}")
print(f"  NDCG@{k}:      {calculate_ndcg_at_k(recommendations, actual_purchases, k):.4f}")

Actual purchases after cutoff: 4

IALS Evaluation @10:
  Precision@10: 0.0000
  Recall@10:    0.0000
  Hit@10:       0
  NDCG@10:      0.0000
