In [None]:
import pandas as pd
df = pd.read_csv("../datasets/books_data.csv")
df

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import ChatHuggingFace
import faiss
from langchain_community.vectorstores import FAISS
from dotenv import load_dotenv
load_dotenv()

In [45]:
df["tagged_desc"] = df["FormattedBookID"].astype(str) + " " + df["Description"].fillna("")


In [None]:
df

In [None]:
df["tagged_desc"]

In [48]:
df["tagged_desc"].to_csv("tagged_desc.txt",
                         index = False,
                         sep="\n",
                         header=False)

In [None]:
raw_docs = TextLoader("tagged_desc.txt",encoding="utf-8").load()
text_splitters = CharacterTextSplitter(chunk_size=1500,chunk_overlap=0,separator="\n")
documents = text_splitters.split_documents(raw_docs)

In [None]:
documents[0]

In [51]:
import os
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(
    model_name="all-MiniLM-L6-v2",
    model_kwargs = {'device':'cuda'}
)
db_books = FAISS.from_documents(
    documents=documents,
    embedding=embeddings
)

In [None]:
query = "A book to know about Harry Potter"
docs = db_books.similarity_search(query,k=5)

for doc in docs:
    print("\nDocuments ID:",doc.id)
    print("Metadata:",doc.metadata)
    print("Content:",doc.page_content[:100])
    print("-"*50)
    

In [57]:
def similar_recommendations(query: str, top_k: int = 10) -> pd.DataFrame:
    recs = db_books.similarity_search(query, k=5)
    books_list = []
    
    for rec in recs:
        first_word = rec.page_content.strip().split()[0]  
        
        if first_word.startswith("BK") and first_word[2:].isdigit():
            books_list.append(first_word)  
        else:
            print(f"Skipping document with invalid or missing BookID: {rec.id}")
    
    return df[df["FormattedBookID"].isin(books_list)].head(top_k)



In [None]:
similar_recommendations("Harry Potter")