In [2]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def load_books_data(file_path="Amazon_Books_Scraping/Books_df.csv"):
    """Load the books data from a CSV file."""
    return pd.read_csv(file_path)

def get_book_text(row):
    """
    Create a descriptive text for a book.
    Modify this to include additional fields if desired.
    """
    return (
        f"{row['Title']} by {row['Author']}. Genre: {row['Main Genre']}, {row['Sub Genre']}. "
        f"Type: {row['Type']}. Price: {row['Price']}."
    )

def compute_embeddings(df, model):
    """
    Compute embeddings for each book based on its descriptive text.
    The order of the embeddings corresponds to the DataFrame's order.
    """
    texts = df.apply(get_book_text, axis=1).tolist()
    embeddings = model.encode(texts, convert_to_tensor=True)
    return embeddings

def get_user_reading_history(df, read_titles):
    """
    Filter the DataFrame to get the books that the user has read.
    `read_titles` should be a list of titles that the user has already read.
    """
    return df[df['Title'].isin(read_titles)]

def recommend_books_from_history(user_history_df, candidate_df, model, candidate_embeddings, top_k=5):
    """
    Recommend books based on the user's reading history.
    
    1. Compute embeddings for the books the user has read.
    2. Aggregate these embeddings (e.g., by taking their mean).
    3. Compute cosine similarities between the aggregated embedding and all candidate books.
    4. Exclude the books already read and return the top recommendations.
    """
    # Compute embeddings for user's reading history texts
    user_texts = user_history_df.apply(get_book_text, axis=1).tolist()
    user_embeddings = model.encode(user_texts, convert_to_tensor=True)
    
    # Aggregate embeddings (e.g., using the mean)
    aggregated_user_embedding = user_embeddings.mean(dim=0)
    
    # Compute cosine similarities between the aggregated user embedding and all candidate book embeddings
    cosine_scores = util.cos_sim(aggregated_user_embedding, candidate_embeddings)[0]
    
    # Add the similarity scores to the candidate DataFrame
    candidate_df = candidate_df.copy()  # avoid modifying original DataFrame
    candidate_df['score'] = cosine_scores.cpu().numpy()
    
    # Exclude books that are in the user's reading history
    recommended_df = candidate_df[~candidate_df['Title'].isin(user_history_df['Title'])]
    
    # Sort by similarity score in descending order and return the top recommendations
    recommended_df = recommended_df.sort_values(by='score', ascending=False).head(top_k)
    return recommended_df[['Title', 'Author', 'Main Genre', 'Sub Genre', 'Type', 'Price', 'Rating', 'No. of People rated', 'score']]

if __name__ == "__main__":
    # Load the books dataset
    books_df = load_books_data("Amazon_Books_Scraping/Books_df.csv")
    
    # Load a pre-trained SentenceTransformer model (no API key required)
    model = SentenceTransformer('all-MiniLM-L6-v2')
    
    # Compute embeddings for all candidate books
    candidate_embeddings = compute_embeddings(books_df, model)
    
    # Define the user's reading history (for demonstration, we use a list of titles the user has read)
    # For example, assume the user has read the first two books in the dataset.
    user_read_titles = books_df['Title'].head(2).tolist()
    user_history_df = get_user_reading_history(books_df, user_read_titles)
    
    # Get top recommendations based on the user's reading history
    recommendations = recommend_books_from_history(user_history_df, books_df, model, candidate_embeddings, top_k=5)
    
    print("Recommended Books Based on Your Reading History:")
    print(recommendations)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Recommended Books Based on Your Reading History:
                                                  Title  \
2667       The Best of Sherlock Holmes (Set of 2 Books)   
3637  The Adventures of Sherlock Holmes (AmazonClass...   
847   The Memoirs of Sherlock Holmes : Illustrated A...   
3581                                             Mythos   
3568                                               Dune   

                      Author                 Main Genre  \
2667  Sir Arthur Conan Doyle  Crime, Thriller & Mystery   
3637      Arthur Conan Doyle       Literature & Fiction   
847       Arthur Conan Doyle           Children's Books   
3581             Stephen Fry       Literature & Fiction   
3568           Frank Herbert       Literature & Fiction   

                            Sub Genre                   Type    Price  Rating  \
2667           Thrillers and Suspense              Paperback  ₹349.00     4.6   
3637        Crime, Thriller & Mystery         Kindle Edition  ₹129.15     4.5  