<a href="https://colab.research.google.com/github/gerardgoh/bt4222/blob/main/RAG_recommender_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.metrics.pairwise import cosine_similarity
import torch
import torch.nn as nn
import torch.optim as optim
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix
from sklearn.preprocessing import LabelEncoder
from sentence_transformers import SentenceTransformer
import pickle

nltk.download('wordnet')
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [2]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [3]:
import os
# reviews_path='/content/drive/MyDrive/bt4222data/Reviews Data Cleaned/cleaned_reviews.csv'
# product_metadata_path='/content/drive/Meta Data Cleaned/meta_data_cleaned.csv.zip'
# base_path = '/content/drive/MyDrive/bt4222data'

# FILE PATHS
reviews_path = '/content/drive/MyDrive/amazon_data/cleaned_reviews.csv'
product_metadata_path = '/content/drive/MyDrive/amazon_data/meta_data_cleaned.csv.zip'
openai_api_key= "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"
base_path='/content/drive/MyDrive/amazon_data'

# DATA
product_metadata = pd.read_csv(product_metadata_path)
reviews_data = pd.read_csv(reviews_path)
review_embeddings = np.load(os.path.join(base_path, "embeddings", 'review_embeddings.npz'), allow_pickle=True)

# User Input
This class handles the processing and preparation of user queries for the recommendation system. It takes a user ID, query text, query type (text-based or item-based), and reviews data as inputs.

In [102]:
class UserInput:
  def __init__(self, userID, query, query_type, reviews_data):
    self.userID = userID
    self.query = query

    # type 1 = query-based recommendations, type 2 = item-based recommendations
    self.query_type = query_type
    self.reviews_data = reviews_data

    self.has_reviews = userID in reviews_data['reviewerID'].values # will be valid for NeuMF recommendations

    # Initialize NLP tools
    self.stop_words = set(stopwords.words('english'))
    self.lemmatizer = WordNetLemmatizer()

    # Process and embed query for content-based recommendations
    self.processed_query = self.preprocess_query(query) if query_type == 1 else None
    self.query_embedding = self.embed_query() if query_type == 1 else None

  def preprocess_query(self, query):
    """
    1. convert to lowercase
    2. remove special characters
    3. remove stopwords
    4. lemmatization
    """
    # Convert to lower case
    query = query.lower()

    # Remove special characters
    query = re.sub(r'[^\w\s]', ' ', query)
    query = re.sub(r'\s+', ' ', query).strip()

    # Remove stopwords
    tokens = nltk.word_tokenize(query)
    filtered_tokens = [word for word in tokens if word not in self.stop_words]

    # Lemmatization
    lemmatized_tokens = [self.lemmatizer.lemmatize(word) for word in filtered_tokens]

    processed_query = ' '.join(lemmatized_tokens)
    return processed_query

  def embed_query(self):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = model.encode(self.processed_query)
    return query_embedding

  def is_reviewer(self):
    return self.has_reviews

Import the Other Models

In [103]:
# # Step 3: Create a proper Python package
# init_file = os.path.join(models_path, '__init__.py')
# if not os.path.exists(init_file):
#     with open(init_file, 'w') as f:
#         pass  # Create an empty file
#     print("Created __init__.py file")

# Step 4: Import modules using importlib
import importlib.util
import sys
models_path = '/content/drive/MyDrive/amazon_data/models'
# Import recommendation_models.py
rec_model_path = os.path.join(models_path, 'recommendation_models.py')
spec = importlib.util.spec_from_file_location("recommendation_models", rec_model_path)
recommendation_models = importlib.util.module_from_spec(spec)
sys.modules["recommendation_models"] = recommendation_models
spec.loader.exec_module(recommendation_models)

# Access the classes
RAGRecommender = recommendation_models.Recommender
NCF = recommendation_models.NCF
NeuMFPlusPlus = recommendation_models.NeuMFPlusPlus


# Import related_fallback_model.py
fallback_model_path = os.path.join(models_path, 'related_fallback_model.py')
spec = importlib.util.spec_from_file_location("related_fallback_model", fallback_model_path)
related_fallback_model = importlib.util.module_from_spec(spec)
sys.modules["related_fallback_model"] = related_fallback_model
spec.loader.exec_module(related_fallback_model)

# Access the class
RelatedFallbackModel = related_fallback_model.RelatedFallbackModel

print("Successfully imported modules")

Successfully imported modules


# Recommender Class
This class serves as the main orchestrator for the recommendation system. Depending on the user input and query type, it will generate the recommendations through the specified pipeline as such:

Query type = 1:
- generate content-based recommendations
- if user has reviews, also generates NeuMF Sbert recommendations

Query type = 2:
- if item has reviews, calls ibcf model
- if item has no reviews, call related fallback model

In [104]:
class Recommender:
  def __init__(self, input):
    self.has_reviews = input.has_reviews
    self.query = input.query
    self.userID = input.userID
    self.query_type = input.query_type
    self.query_embedding = input.query_embedding
    self.cb_recommendations = []
    self.neumf_recommendations = []
    self.ibcf_recommendations = []
    self.fallback_recommendations = []
    self.metadata_embeddings_path = os.path.join(base_path, "embeddings", 'metadata_embeddings.npz')
    self.metadata_df = product_metadata
    self.reviews_df = reviews_data

    self.recommender = RAGRecommender(
            metadata_df=self.metadata_df,
            reviews_df=self.reviews_df,
            metadata_embeddings_path=self.metadata_embeddings_path,
            query_embeddings=self.query_embedding,
            has_reviews = self.has_reviews,
            query_type = self.query_type,
            item_embeddings_path = os.path.join(base_path, "embeddings", 'item_embeddings.pkl'),
            sbert_embeddings_path = os.path.join(base_path, "embeddings", 'item_sbert_embeddings.npy'),

            ncf_model_path = os.path.join(base_path, "models", 'ncf_binary_model.pt'), ## baseline model ##
            ncf_encoders_path = os.path.join(base_path, "models", 'encoders.pkl'), ## encoder for the baseline model ##
            sbert_model_path = os.path.join(base_path, "models", 'neumf_sbert_model.pt'),###
            sbert_encoders_path = os.path.join(base_path, "models", 'sbert_encoders.pkl'), ##
        )

    self.fallback_recommender = RelatedFallbackModel(product_metadata)

  def get_cb_recommendations(self):
    self.cb_recommendations = self.recommender.get_cb_recommendations()
    return self.cb_recommendations

  def get_neumf_recommendations(self, type, user_id):
    # type has to be 'neumf' or 'neumf_sbert'
    if type == 'neumf':
      self.neumf_recommendations = self.recommender.get_neumf_recommendations(user_id)
      return self.neumf_recommendations

    else:
      self.neumf_recommendations = self.recommender.get_sbert_neumf_recommendations(user_id)
      return self.neumf_recommendations

  def get_item_based_recommendations(self, item_id):
    self.ibcf_recommendations = self.recommender.get_item_based_recommendations(item_id)
    return self.ibcf_recommendations


  def get_fallback_recommendations(self, item_id):
    if self.query_type == 2:
      self.fallback_recommendations = self.fallback_recommender.get_recommendations(item_id)
      return self.fallback_recommendations
    else:
      self.fallback_recommendations = []
      return self.fallback_recommendations

  def get_recommendations(self):
    if self.query_type == 1:
      self.get_cb_recommendations() # updates the corresponding attributes with a list of recommended product asins
      if self.has_reviews:
        self.get_neumf_recommendations('neumf_sbert', self.userID)

    else:
      item_id = self.query
      item_has_reviews = item_id in self.reviews_df['asin'].values
      if item_has_reviews:
        self.get_item_based_recommendations(item_id)
      else:
        self.get_fallback_recommendations(item_id)

  # uncomment this to run the baseline model
  # def get_recommendations(self):
  #   print("running get_recommendations")
  #   if self.query_type == 1:
  #     self.get_cb_recommendations() # updates the corresponding attributes with a list of recommended product asins
  #     if self.has_reviews:
  #       self.get_neumf_recommendations('neumf', self.userID)

  #   else:
  #     item_id = self.query
  #     item_has_reviews = item_id in self.reviews_df['asin'].values
  #     if item_has_reviews:
  #       self.get_item_based_recommendations(item_id)
  #     else:
  #       self.get_fallback_recommendations(item_id)

    print(f"CB Recommendations: {self.cb_recommendations}")
    print(f"NeuMF Recommendations: {self.neumf_recommendations}")
    print(f"IBCF Recommendations: {self.ibcf_recommendations}")
    print(f"Fallback Recommendations: {self.fallback_recommendations}")

# Prompt Generator
This class gathers all the relevant reviews and metadata of the recommended products and formats them into a prompt to be fed into the LLM.

### Review Retrieval Logic

For the recommendations by the content-based model:
- Retrieve reviews that are semantically relevant to the user’s query.
- From these relevant reviews, select those with high combined helpfulness scores.

For the recommendations by other (NeuMF Sbert, ibcf, relatedfallback)
- Retrieve reviews by directly selecting those with high combined helpfulness scores, without semantic filtering.

Logic for combined score can be found under reviews preprocessing file.

### Business Rationale

Since content-based recommendations are directly linked to the user’s search query, it is important that the retrieved reviews are aligned with the specific needs expressed in the query. By prioritizing semantic relevance first, we ensure that the explanations generated by the LLM adequately capture and address the user’s intent.

On the other hand, recommendations by other models are not contingent on the users queries (they are driven are driven by factors like historical user interaction patterns instead). In this case, reviews that have been marked as helpful by other users are sufficient indicators of quality and relevance. High helpfulness scores symbolize collective user agreement on the review’s usefulness, making them appropriate inputs to generate trustworthy explanations.

In [110]:
from urllib.parse import quote_from_bytes
class PromptGenerator:
  def __init__(self, recommender, input):

    # RECOMMENDATIONS
    self.cb_recommendations = recommender.cb_recommendations
    self.neumf_recommendations = recommender.neumf_recommendations
    self.ibcf_recommendations = recommender.ibcf_recommendations
    self.fallback_recommendations = recommender.fallback_recommendations

    self.reviews_per_product = 4

    # INPUT DATA
    self.query_type = input.query_type
    self.query = input.query
    self.reviews_data = input.reviews_data
    self.has_reviews = input.has_reviews
    self.input = input # to retrieve query text (for prompt) and embedded query (for review retrieval)

    """
    Reviews are stored in the following format:
    KEY: Product ID
    VALUE: List of review dictionaries (rating, combined_score, text)

    """
    self.cb_reviews = {}
    self.neumf_reviews = {}
    self.ibcf_reviews = {}
    self.fallback_reviews = {}

    """
    Recommendations are stored in the following format:
    KEY: Product ID
    VALUE: Dictionary of product details (title, price, description)
    """
    self.recommendations = {}

  # THE RETRIEVAL FOR CBF
  def retrieve_cb_reviews(self):
    """
    For each product, extract relevant reviews based on:
    - Semantic similarity with query
    - High combined score
    - returns a dictionary of relevant reviews
    """
    relevant_reviews = {}

    for asin in self.cb_recommendations:
      # retrieve all product reviews
      product_reviews = reviews_data[reviews_data['asin'] == asin]

      if len(product_reviews) == 0:
          relevant_reviews[asin] = [] # an empty list
          continue


      # Calculate cosine similarity score between query and review embeddings
      similarities = []
      for idx, row in product_reviews.iterrows():
          reviewer_id = row['reviewerID']
          matches = np.where((review_embeddings['reviewer_ids'] == reviewer_id) & (review_embeddings['asins'] == asin))[0]
          matching_review_embedding = review_embeddings['embeddings'][matches[0]]
          query_embedding = self.input.query_embedding
          similarity = cosine_similarity(
              query_embedding.reshape(1, -1),
              matching_review_embedding.reshape(1, -1)
          )[0][0]
          similarities.append({
              'similarity': similarity,
              'combined_score': row['combined_score'],
              'rating': row['overall'],
              'text': str(row['clean_summary']) + " - " + str(row['truncated_review'])
          })

      # Sort by similarity first
      sorted_by_similarity = sorted(similarities, key=lambda x: x['similarity'], reverse=True)
      top_similar = sorted_by_similarity[:min(10, len(sorted_by_similarity))]

      # Then sort by combined helpfulness score
      final_selection = sorted(top_similar, key=lambda x: x['combined_score'], reverse=True)

      # Sample a balance of positive and negative reviews
      positive_reviews = [r for r in final_selection if r['rating'] >= 4.0]
      negative_reviews = [r for r in final_selection if r['rating'] <= 2.0]

      selected_reviews = []
      selected_reviews.extend(positive_reviews[:max(2, self.reviews_per_product // 2)])
      selected_reviews.extend(negative_reviews[:max(1, self.reviews_per_product // 2)])

      # If there are not enough reviews, fill in the remaining slots with the filtered reviews.
      remaining_slots = self.reviews_per_product - len(selected_reviews)
      if remaining_slots > 0:
          remaining_reviews = [r for r in final_selection if r not in selected_reviews]
          selected_reviews.extend(remaining_reviews[:remaining_slots])

      # Sort selected reviews by similarity for presentation
      selected_reviews = sorted(selected_reviews, key=lambda x: x['similarity'], reverse=True)

      clean_reviews = []
      for review in selected_reviews:
          clean_reviews.append({
              'rating': review['rating'],
              'combined_score': review['combined_score'],
              'text': review['text']
          })

      relevant_reviews[asin] = clean_reviews

    self.cb_reviews = relevant_reviews  # Store in class attribute
    return relevant_reviews


  # THE RETRIEVAL FOR NEUMF, IBCF & FALLBACK MODEL
  def retrieve_by_score(self, model):
    """
    For each product, extract relevant reviews based on high combined score
    """
    # Get the appropriate recommendations based on model type
    if model == 'neumf':
        recommendations = self.neumf_recommendations
    elif model == 'fallback':
        recommendations = self.fallback_recommendations
    elif model == 'ibcf':
        recommendations = self.ibcf_recommendations
    else:
        return {}  # Return empty dict if model type is invalid

    reviews = {}

    for asin in recommendations:
        product_reviews = self.reviews_data[self.reviews_data['asin'] == asin]

        if len(product_reviews) == 0:
            reviews[asin] = []
            continue

        # Sort directly by combined helpfulness score
        review_scores = []
        for idx, row in product_reviews.iterrows():
            review_scores.append({
                'combined_score': row['combined_score'],
                'rating': row['overall'],
                'text': str(row['clean_summary']) + " - " + str(row['truncated_review'])
            })

        # Sort by combined helpfulness score
        sorted_reviews = sorted(review_scores, key=lambda x: x['combined_score'], reverse=True)

        # Sample a balance of positive and negative reviews
        positive_reviews = [r for r in sorted_reviews if r['rating'] >= 4.0]
        negative_reviews = [r for r in sorted_reviews if r['rating'] <= 2.0]

        selected_reviews = []
        selected_reviews.extend(positive_reviews[:max(2, self.reviews_per_product // 2)])
        selected_reviews.extend(negative_reviews[:max(1, self.reviews_per_product // 4)])

        # Fill remaining slots if needed
        remaining_slots = self.reviews_per_product - len(selected_reviews)
        if remaining_slots > 0:
            remaining_reviews = [r for r in sorted_reviews if r not in selected_reviews]
            selected_reviews.extend(remaining_reviews[:remaining_slots])

        reviews[asin] = selected_reviews

    # Update the appropriate class attribute based on model type
    if model == 'neumf':
        self.neumf_reviews = reviews
    elif model == 'fallback':
        self.fallback_reviews = reviews
    elif model == 'ibcf':
        self.ibcf_reviews = reviews

    return reviews

  def retrieve_product_details(self):
    """
    Retrieve product details for each recommended product and updates self.recommendations
    """
    # Updated to use class attributes directly
    cb_recommendations = self.cb_recommendations
    neumf_recommendations = self.neumf_recommendations
    ibcf_recommendations = self.ibcf_recommendations
    fallback_recommendations = self.fallback_recommendations

    product_details = {}
    all_product_ids = set(cb_recommendations + neumf_recommendations + ibcf_recommendations + fallback_recommendations)


    for asin in all_product_ids:
      product_info = product_metadata[product_metadata['asin'] == asin]

      product_details[asin] = {
        'title': product_info.get('title'),
        'price': product_info.get('price'),
        'description': product_info.get('description'),
      }

    self.recommendations = product_details
    return self.recommendations

  # THE PROMPT FOR QUERY-BASED RECOMMENDATIONS
  def augment_type1_prompt(self):
    cb_reviews = self.cb_reviews
    recommendations = self.recommendations
    query = self.input.query
    cb_asins = self.cb_recommendations

    prompt = f"""
    You are a helpful shopping assistant that provides personalized product recommendations and explanations.
    The user is currently searching for: "{query}"

    Below are two types of recommendations:
    - Content-Based Recommendations (based on the user's query)
    - You May Also Like (based on the user's past preferences)

    =========================

    # CONTENT-BASED RECOMMENDATIONS:
    """

    # Fixed indentation and variable name issue (i -> index)
    for index, asin in enumerate(cb_asins, 1):
      product_info = recommendations.get(asin, {})
      prompt += f"""
        PRODUCT {index}: {product_info.get('title')}
        PRICE: ${product_info.get('price')}
        DESCRIPTION: {product_info.get('description')}

        REVIEWS:
      """

      product_reviews = cb_reviews.get(asin, [])

      if not product_reviews:
          prompt += "No reviews available for this product.\n"
      else:
          for j, review in enumerate(product_reviews, 1):
              prompt += f"""
                REVIEW {j} (Rating: {review.get('rating')}/5):
                {review.get('text')}
              """

    # concatenate extra prompt for neumf recommendations
    if self.input.is_reviewer():
      neumf_reviews = self.neumf_reviews
      neumf_asins = self.neumf_recommendations

      if neumf_asins:
          prompt += "\n\nYOU MAY ALSO LIKE:\n"

          for i, asin in enumerate(neumf_asins, 1):
              product_info = recommendations.get(asin, {})

              prompt += f"""
              PRODUCT {i}: {product_info.get('title')}
              PRICE: ${product_info.get('price')}
              DESCRIPTION: {product_info.get('description')}

              REVIEWS:
              """

              product_reviews = neumf_reviews.get(asin, [])

              if not product_reviews:
                  prompt += "No reviews available for this product.\n"
              else:
                  for j, review in enumerate(product_reviews, 1):
                      prompt += f"""
                        REVIEW {j} (Rating: {review.get('rating')}/5):
                        {review.get('text')}
                      """

    prompt += """
      INSTRUCTIONS TO ASSISTANT:
      1. For Content-Based Recommendations: explain how each product specifically addresses the user's query.
      2. For You May Also Like: explain why these products might interest the user based on their general preferences.
      3. Summarize the main strengths and potential drawbacks based on the reviews provided.
      4. Keep explanations honest, conversational, and friendly.
      5. Do not invent facts beyond the provided reviews and metadata.
    """

    return prompt

  def augment_type2_prompt(self):

    """
    Generates a prompt for item-based recommendations (type 2 queries)
    """

    # Get the item ID from the query
    item_id = self.query
    recommendations = self.recommendations

    # Get the item's details
    item_details = product_metadata[product_metadata['asin']==item_id] # the row of the reference item
    item_title = item_details['title'].values[0]
    item_price = item_details['price'].values[0]
    item_description = item_details['description'].values[0]

    # Determine which recommendations to use based on whether item has reviews
    if self.ibcf_reviews:
        rec_type = "Similar Products"
        rec_asins = self.ibcf_recommendations
        reviews = self.ibcf_reviews
        explanation_basis = "items that similar customers rate similarly to"
    else:
        rec_type = "You Might Also Like"
        rec_asins = self.fallback_recommendations
        reviews = self.fallback_reviews
        explanation_basis = "products that are complementary or frequently purchased together"

    prompt = f"""
    You are a helpful shopping assistant that provides personalized product recommendations and explanations.

    The user is currently viewing: "{item_title}"

    Below are recommendations for {rec_type} to consider:

    =========================

    # REFERENCE PRODUCT:
    PRODUCT: {item_title}
    PRICE: ${item_price}
    DESCRIPTION: {item_details}

    # RECOMMENDED PRODUCTS:
    """

    # Add details for each recommended product
    for index, asin in enumerate(rec_asins, 1):
        product_info = recommendations.get(asin, {})
        prompt += f"""
        PRODUCT {index}: {product_info.get('title')}
        PRICE: ${product_info.get('price')}
        DESCRIPTION: {product_info.get('description')}

        REVIEWS:
        """

        # Get reviews for this product
        product_reviews = reviews.get(asin, [])

        if not product_reviews:
            prompt += "No reviews available for this product.\n"
        else:
            for j, review in enumerate(product_reviews, 1):
                prompt += f"""
                REVIEW {j} (Rating: {review.get('rating')}/5):
                {review.get('text')}
                """

    prompt += f"""
    INSTRUCTIONS TO ASSISTANT:
    1. Explain why these products might interest the user based on their interest in "{item_title}".
    2. Highlight how each recommended product compares to the reference product.
    3. Summarize the main strengths and potential drawbacks of each recommendation based on the reviews provided.
    4. Note any complementary relationships between products if they exist.
    5. Keep explanations honest, conversational, and friendly.
    6. Do not invent facts beyond the provided reviews and metadata.

    Note: These recommendations are based on {explanation_basis} "{item_title}".
    """

    return prompt

  def generate_prompt(self):
    """
    Main method to generate the appropriate prompt based on query type
    """
    # First, retrieve all necessary data
    self.retrieve_product_details()
    if self.query_type == 1:
        # For type 1 (text-based search)
        self.retrieve_cb_reviews()
        if self.has_reviews:
            self.retrieve_by_score('neumf')
        prompt = self.augment_type1_prompt()
        return prompt
    else:
        # For type 2 (item-based)
        item_id = self.query
        if self.ibcf_recommendations:
            self.retrieve_by_score('ibcf')
        else:
          self.retrieve_by_score('fallback')
        prompt = self.augment_type2_prompt()
        return prompt

# Explainer
This class handles the interaction with OpenAI's API to generate natural language explanations for recommendations. It receives the structured prompt from PromptGenerator, sends it to the GPT model (defaulting to gpt-4o), and returns the generated explanation.

In [106]:
from openai import OpenAI

class Explainer:
    def __init__(self, prompt, api_key, model_name="gpt-4o"):
        self.prompt = prompt
        self.model_name = model_name
        self.api_key = api_key
        self.client = OpenAI(api_key=self.api_key)

    def generate_explanation(self):
        try:
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": "You are a helpful shopping assistant."},
                    {"role": "user", "content": self.prompt}
                ],
                temperature=0.7,
                max_tokens=1500
            )
            explanation = response.choices[0].message.content
            return explanation

        except Exception as e:
            print(f"Error generating explanation: {e}")
            return None

In [107]:
import builtins

def main():
    """
    Main function to run the recommendation system from terminal input.

    This function will:
    1. Ask for user ID, query type, and query text from terminal
    2. Initialize the Input object with user inputs
    3. Create Recommender instance and generate recommendations
    4. Generate prompts and explanations through PromptGenerator and Explainer
    5. Display the final personalized recommendations to the user
    """

    # Get user inputs
    print("\n====== Amazon Product Recommendation System ======\n")

    userID = builtins.input("Enter User ID (or press Enter for guest mode): ").strip()

    # Validate query type
    while True:
        query_type_input = builtins.input("Enter Query Type (1 for text search, 2 for item-based): ").strip()
        try:
            query_type = int(query_type_input)
            if query_type in [1, 2]:
                break
            else:
                print("Invalid query type. Please enter 1 or 2.")
        except ValueError:
            print("Invalid input. Please enter a number (1 or 2).")

    # Get appropriate query based on query type
    if query_type == 1:
        query = builtins.input("What are you looking for? (e.g., 'wireless bluetooth headphones'): ").strip()
    else:
        query = builtins.input("Enter Product ID (ASIN) you're interested in: ").strip()

    print(f"\nProcessing request for user: {userID if userID else 'Guest'}")
    print(f"Query Type: {query_type} ({'Text Search' if query_type == 1 else 'Item-Based'})")
    print(f"Query: {query}")
    print("\nGenerating recommendations...\n")

    # Initialize Input object
    user_input = UserInput(userID, query, query_type, reviews_data)

    # Create Recommender instance and get recommendations
    recommender = Recommender(user_input)
    recommender.get_recommendations() # the relevant recommendation arrays will be filled

    # Generate prompt and explanation
    prompt_generator = PromptGenerator(
        recommender,
        user_input
    )

    prompt = prompt_generator.generate_prompt()

    # Generate explanation using LLM
    print("Generating personalized explanation...")
    explainer = Explainer(prompt, openai_api_key)
    explanation = explainer.generate_explanation()

    # Display the result
    if explanation:
        print("\n====== PERSONALIZED RECOMMENDATION EXPLANATION ======\n")
        print(explanation)
        print("\n=====================================================\n")
    else:
        print("Failed to generate explanation.")

    # Ask if user wants to continue
    continue_response = builtins.input("\nWould you like to make another query? (y/n): ").strip().lower()
    if continue_response == 'y':
        main()  # Recursive call to restart
    else:
        print("Thank you for using the Amazon Product Recommendation System!")

# Testing the Pipeline

## Testing on content-based model
existing user: type a random id (does not exist in reviews_data)

query_type:  1

query: wireless bluetooth headphones

In [111]:
main()



Enter User ID (or press Enter for guest mode): not_reviewer
Enter Query Type (1 for text search, 2 for item-based): 1
What are you looking for? (e.g., 'wireless bluetooth headphones'): wireless bluetooth headphones

Processing request for user: not_reviewer
Query Type: 1 (Text Search)
Query: wireless bluetooth headphones

Generating recommendations...

Loading model from /content/drive/MyDrive/amazon_data/models/ncf_binary_model.pt
Creating NCF model with: n_users=192403, n_items=63001, factors=64, mlp_layers=[128, 64, 32]
Successfully loaded model parameters
CB Recommendations: ['B00KL0BX9Q', 'B00H3BAIWY', 'B0010TKJ9C', 'B0090ACBPI', 'B00BI1YBYQ']
NeuMF Recommendations: []
IBCF Recommendations: []
Fallback Recommendations: []
Generating personalized explanation...


Sure! Here are some wireless Bluetooth headphones that match your search, along with a brief explanation of each:

### Content-Based Recommendations:

1. **Electronic4sale New Wireless Sports Bluetooth Headphones**  
   

## Testing on content-based + NeuMF_Sbert model
userID: AO94DHGC771SJ (an existing reviewer)

query_type: 1

query: wireless bluetooth headphones

In [112]:
main()



Enter User ID (or press Enter for guest mode): AO94DHGC771SJ
Enter Query Type (1 for text search, 2 for item-based): 1
What are you looking for? (e.g., 'wireless bluetooth headphones'): wireless bluetooth headphones

Processing request for user: AO94DHGC771SJ
Query Type: 1 (Text Search)
Query: wireless bluetooth headphones

Generating recommendations...

Loading model from /content/drive/MyDrive/amazon_data/models/ncf_binary_model.pt
Creating NCF model with: n_users=192403, n_items=63001, factors=64, mlp_layers=[128, 64, 32]
Successfully loaded model parameters
CB Recommendations: ['B00KL0BX9Q', 'B00H3BAIWY', 'B0010TKJ9C', 'B0090ACBPI', 'B00BI1YBYQ']
NeuMF Recommendations: ['B000OGX5AM', 'B003FVVMS0', 'B004EBUXHQ', 'B000OG6I6A', 'B002ZIMEMW']
IBCF Recommendations: []
Fallback Recommendations: []
Generating personalized explanation...


### Content-Based Recommendations:

1. **Electronic4sale New Wireless Sports Bluetooth Headphones**
   - **Price:** $8.99
   - **Description:** These 

## Testing on item-based recommendations

userID: can be random

query_type: 2

item_asin: 0528881469 (Rand McNally 528881469 7-inch Intelliroute TND 700 Truck GPS)

In [113]:
main()



Enter User ID (or press Enter for guest mode): not_reviewer
Enter Query Type (1 for text search, 2 for item-based): 2
Enter Product ID (ASIN) you're interested in: 0528881469

Processing request for user: not_reviewer
Query Type: 2 (Item-Based)
Query: 0528881469

Generating recommendations...

Loading model from /content/drive/MyDrive/amazon_data/models/ncf_binary_model.pt
Creating NCF model with: n_users=192403, n_items=63001, factors=64, mlp_layers=[128, 64, 32]
Successfully loaded model parameters
CB Recommendations: []
NeuMF Recommendations: []
IBCF Recommendations: ['B004IZN3P2', 'B000P3XKFE', 'B0055MQVPG', 'B003ZBZ64Q', 'B004LAYP7O']
Fallback Recommendations: []
Generating personalized explanation...


Certainly! Let's explore the recommended products and see how they might pique your interest, especially given your current focus on the Rand McNally Truck GPS.

### Product 1: Targus AC Charger for Apple iPad
- **Interest Connection**: Although this is a charger, not a GPS, it m

## Testing on related fallback model

userID: can be random

query_type: 2

item_asin: 0594012015 (has related products, but are not electronics - our model falls back on LLM explanations)

In [114]:
main()



Enter User ID (or press Enter for guest mode): not_reviewer
Enter Query Type (1 for text search, 2 for item-based): 2
Enter Product ID (ASIN) you're interested in: 0594012015

Processing request for user: not_reviewer
Query Type: 2 (Item-Based)
Query: 0594012015

Generating recommendations...

Loading model from /content/drive/MyDrive/amazon_data/models/ncf_binary_model.pt
Creating NCF model with: n_users=192403, n_items=63001, factors=64, mlp_layers=[128, 64, 32]
Successfully loaded model parameters
CB Recommendations: []
NeuMF Recommendations: []
IBCF Recommendations: []
Fallback Recommendations: []
Generating personalized explanation...


Sure, I'd be happy to help with some recommendations based on your interest in the "Barnes & Noble NOOK Power Kit in Carbon BNADPN31." Let's see what might catch your eye and why they could be a good fit for you.

### Recommended Products

1. **NOOK Simple Touch Power Adapter (B0032UP32K)**
   - **Why It Might Interest You:** Since you're looking