In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [None]:
from dotenv import load_dotenv

load_dotenv()

In [None]:
import pandas as pd

In [None]:
# Read new books data
books = pd.read_csv("books_cleaned.csv")

books

In [None]:
books["tagged_description"]

We will save the tagged_description to a text file

In [None]:
books["tagged_description"].to_csv("tagged_description.txt", sep= "\n", index=False, header=False)

In [None]:
raw_documents = TextLoader("tagged_description.txt").load()
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=0,
    chunk_overlap=0,
)
documents = text_splitter.split_documents(raw_documents)

In [None]:
documents[0]

In [None]:
db_books = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings()
)

In [None]:
# Run example query
query = "A book about AI"
docs = db_books.similarity_search(query, k=10)
docs

Nice, we just got some recommendations about the book that we looking for.
However, we don't want to recommend the description of the book, we want the title of it.
The "isbn13" is the one that help us find the title 

In [None]:
# Check the title for the first recommended book from above
books[books["isbn13"] == int(docs[0].page_content.split()[0].strip())]

In [None]:
# Now make a function to get recommendations for a given query
def get_book_recommendations(query, top_k: int = 10) -> pd.DataFrame:
    """
    Get book recommendations based on a query.
    """
    recs = db_books.similarity_search(query, k=50)
    recommended_books = []
    for i in range(0, len(recs)):
        recommended_books += [int(recs[i].page_content.strip('"').split()[0])]
    return books[books["isbn13"].isin(recommended_books)]

In [None]:
get_book_recommendations("A book teaches children about nature")