In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [2]:
import pandas as pd

books = pd.read_csv("preprocessed_book_data.csv")

In [3]:
from langchain.docstore.document import Document

documents = [
    Document(
        page_content=f"Title: {row['title']}, Description: {row['description']}",
        metadata={"title": row["title"], "categories": row["categories"]}
    )
    for _, row in books.iterrows()
]

In [4]:
documents[0]

Document(metadata={'title': 'Gilead', 'categories': 'Fiction'}, page_content='Title: Gilead, Description: A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [6]:
import os
embedding_model = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))

In [7]:
db_books = Chroma.from_documents(documents, embedding_model)

In [8]:
query = "I want a book about magic, adventure, and mystery."
retrieved_books = db_books.similarity_search(query, k=5)  # Get top 5 matches

for book in retrieved_books:
    print(book.page_content)  # Display recommended books


Title: The Books of Magic, Description: Timothy Hunter is taken on a tour of the magical realms by a quartet of fallen mystics.
Title: A Sudden Wild Magic, Description: A breathtakingly original, funny adventure of wars, worlds and otherworlds by one of the greatest fantasy writers in Britain
Title: Riddle-master, Description: A collection of the complete and long out-of-print Riddle-Master trilogy, a modern-day fantasy classic, offers a new introduction by the author and brings back the magical adventures of a young prince discovering his identity in a uniquely realized land. Original.
Title: Tales of Magick, Description: "Kick Ass! And Live to Tell About It! Some people think the Awakened sit around dreaming about Ascension. Wrong. Dead wrong. When reality itself is on the brink of destruction, you don't contemplate your navel. You fight -- and sometimes die -- for the sake of the future. Tales of Magick takes Mage from the realm of abstraction and kicks it screaming into high gear. 

In [9]:
retrieved_books

[Document(id='3b77c2a4-984e-4307-a956-157dab53b29a', metadata={'categories': 'Comics & Graphic Novels', 'title': 'The Books of Magic'}, page_content='Title: The Books of Magic, Description: Timothy Hunter is taken on a tour of the magical realms by a quartet of fallen mystics.'),
 Document(id='79d18c14-b191-43d2-b3d2-d6621aebadf8', metadata={'title': 'A Sudden Wild Magic'}, page_content='Title: A Sudden Wild Magic, Description: A breathtakingly original, funny adventure of wars, worlds and otherworlds by one of the greatest fantasy writers in Britain'),
 Document(id='b3a5bf45-4f3e-4d23-9b47-5143676fd8b8', metadata={'categories': 'Fiction', 'title': 'Riddle-master'}, page_content='Title: Riddle-master, Description: A collection of the complete and long out-of-print Riddle-Master trilogy, a modern-day fantasy classic, offers a new introduction by the author and brings back the magical adventures of a young prince discovering his identity in a uniquely realized land. Original.'),
 Documen

In [13]:
retrieved_books[0].metadata['title']

'The Books of Magic'

In [15]:
books[books['title'] == retrieved_books[0].metadata['title']]

Unnamed: 0,isbn13,isbn10,title,subtitle,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,tagged_description
5504,9781563890826,1563890828,The Books of Magic,The Books of Magic,Neil Gaiman,Comics & Graphic Novels,http://books.google.com/books/content?id=hXh2U...,Timothy Hunter is taken on a tour of the magic...,1993.0,4.08,200.0,13358.0,9781563890826 Timothy Hunter is taken on a tou...


In [38]:
def retrieve_books(
    query: str,
    top_k: int = 10,
) -> pd.DataFrame:
    
    retrieved_books = db_books.similarity_search(query, k=20)

    books_list = []

    for i in range(0, len(retrieved_books)):
        books_list.append(retrieved_books[i].metadata['title'])

    return books[books['title'].isin(books_list)]

In [39]:
retrieve_books("I want a book about magic, adventure, and mystery.")

Unnamed: 0,isbn13,isbn10,title,subtitle,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,tagged_description
391,9780061094156,61094153,Imajica II,Imajica II: The Reconciliation,Clive Barker,Fiction,http://books.google.com/books/content?id=DZVKS...,The magical tale of ill-fated lovers lost amon...,1995.0,4.42,544.0,2538.0,9780061094156 The magical tale of ill-fated lo...
881,9780141311883,141311886,The Blue Sword,The Blue Sword,Robin McKinley,,http://books.google.com/books/content?id=nDW2H...,"Harry, bored with her sheltered life in the re...",2001.0,4.23,272.0,286.0,"9780141311883 Harry, bored with her sheltered ..."
931,9780142407226,142407224,The Tough Guide to Fantasyland,The Tough Guide to Fantasyland,Diana Wynne Jones,Juvenile Nonfiction,http://books.google.com/books/content?id=v5jxA...,A unique guide to fantasy literature helps rea...,2006.0,3.94,234.0,3897.0,9780142407226 A unique guide to fantasy litera...
995,9780143039938,143039938,The Book of Imaginary Beings,The Book of Imaginary Beings,Jorge Luis Borges;Margarita Guerrero;Andrew Hu...,Fiction,http://books.google.com/books/content?id=FuNQP...,A whimsical compendium of mythological creatur...,2006.0,4.09,236.0,4809.0,9780143039938 A whimsical compendium of mythol...
1786,9780345482402,345482409,Enchantment,Enchantment,Orson Scott Card,Fiction,http://books.google.com/books/content?id=TfC7w...,Follows one man from ninth-century Russia to p...,2005.0,3.9,422.0,23134.0,9780345482402 Follows one man from ninth-centu...
1803,9780345492869,345492862,The Voyage of the Jerle Shannara Trilogy,The Voyage of the Jerle Shannara Trilogy,Terry Brooks,Fiction,http://books.google.com/books/content?id=v7mGR...,Chronicles the adventures of the heroes of the...,2006.0,4.26,1248.0,3030.0,9780345492869 Chronicles the adventures of the...
2348,9780425100417,425100413,The Regatta Mystery and Other Stories,The Regatta Mystery and Other Stories,Agatha Christie,Fiction,http://books.google.com/books/content?id=36hHP...,Nine tales of mystery and murder are marked by...,1984.0,3.95,214.0,5701.0,9780425100417 Nine tales of mystery and murder...
2449,9780435233396,435233394,His Dark Materials,His Dark Materials,Nicholas Wright;Philip Pullman,Children's plays,http://books.google.com/books/content?id=VlBnz...,"From the novels by Philip Pullman, this brilli...",2005.0,4.04,320.0,93.0,9780435233396 From the novels by Philip Pullma...
2474,9780439411875,439411874,"The Girl, the Dragon, and the Wild Magic","The Girl, the Dragon, and the Wild Magic",Dave Luckett,Juvenile Fiction,http://books.google.com/books/content?id=1wpcG...,"Failing out of magic school, Rhianna meets a w...",2003.0,3.86,119.0,450.0,"9780439411875 Failing out of magic school, Rhi..."
2533,9780439994798,439994799,His Dark Materials,His Dark Materials,Philip Pullman,"Children's stories, English",http://books.google.com/books/content?id=E9ZkG...,The trilogy follows the coming of age of two c...,2001.0,4.26,1296.0,304.0,9780439994798 The trilogy follows the coming o...
