In [104]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings 
from langchain_chroma import Chroma
from sklearn.metrics.pairwise import cosine_similarity

In [58]:
import pandas as pd

books = pd.read_csv("books_cleaned.csv")

In [59]:
books["tagged_description"]

0       9780002005883 A NOVEL THAT READERS and critics...
1       9780002261982 A new 'Christie for Christmas' -...
2       9780006178736 A memorable, mesmerizing heroine...
3       9780006280897 Lewis' work on the nature of lov...
4       9780006280934 "In The Problem of Pain, C.S. Le...
                              ...                        
5192    9788172235222 On A Train Journey Home To North...
5193    9788173031014 This book tells the tale of a ma...
5194    9788179921623 Wisdom to Create a Life of Passi...
5195    9788185300535 This collection of the timeless ...
5196    9789027712059 Since the three volume edition o...
Name: tagged_description, Length: 5197, dtype: object

In [60]:
books["tagged_description"].to_csv("tagged_description.txt",
                                   sep = "\n",
                                   index = False,
                                   header = False)

In [61]:
raw_documents = TextLoader("tagged_description.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1168, which is longer than the specified 0
Created a chunk of size 1214, which is longer than the specified 0
Created a chunk of size 373, which is longer than the specified 0
Created a chunk of size 309, which is longer than the specified 0
Created a chunk of size 483, which is longer than the specified 0
Created a chunk of size 482, which is longer than the specified 0
Created a chunk of size 960, which is longer than the specified 0
Created a chunk of size 188, which is longer than the specified 0
Created a chunk of size 843, which is longer than the specified 0
Created a chunk of size 296, which is longer than the specified 0
Created a chunk of size 197, which is longer than the specified 0
Created a chunk of size 881, which is longer than the specified 0
Created a chunk of size 1088, which is longer than the specified 0
Created a chunk of size 1189, which is longer than the specified 0
Created a chunk of size 304, which is longer than the specified 0
Create

In [83]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='9780002005883 A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gi

In [66]:
huggingface_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [67]:
db_books = Chroma.from_documents(
    documents,
    embedding=huggingface_embeddings
)

In [68]:
query = 'A book that contains harry potter'
docs = db_books.similarity_search(query, k = 5)
docs

[Document(id='bd27b025-66ab-4ae3-91da-aed10571432e', metadata={'source': 'tagged_description.txt'}, page_content="9780972393614 A guide to J.K. Rowling's first four Harry Potter novels analyzes mysterious elements, themes, and puzzles hidden throughout the works and speculates about the plots and endings of future volumes."),
 Document(id='cb6cf3f4-5b48-4b39-a2f2-320bfdd317ee', metadata={'source': 'tagged_description.txt'}, page_content="9780972393614 A guide to J.K. Rowling's first four Harry Potter novels analyzes mysterious elements, themes, and puzzles hidden throughout the works and speculates about the plots and endings of future volumes."),
 Document(id='7f2724da-72f7-43e2-9e4d-e67de119a1d3', metadata={'source': 'tagged_description.txt'}, page_content="9780972393614 A guide to J.K. Rowling's first four Harry Potter novels analyzes mysterious elements, themes, and puzzles hidden throughout the works and speculates about the plots and endings of future volumes."),
 Document(id='b7

In [69]:
books[books["isbn13"] == int(docs[0].page_content.split()[0].strip())]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
4290,9780972393614,972393617,Ultimate Unofficial Guide to the Mysteries of ...,Galadriel Waters;Astre Mithrandir,Literary Criticism,http://books.google.com/books/content?id=73QFJ...,A guide to J.K. Rowling's first four Harry Pot...,2003.0,4.05,412.0,2725.0,Ultimate Unofficial Guide to the Mysteries of ...,9780972393614 A guide to J.K. Rowling's first ...


In [None]:
def retrieve_semantic_recommendations(query: str, top_k: int = 20) -> pd.DataFrame:
    
    recs = db_books.similarity_search(query, top_k * 3)
    
   
    books_list = set()

    for rec in recs:
        isbn = rec.page_content.strip('"').split()[0]
        
        books_list.add(int(isbn))
        
        if len(books_list) >= top_k:
            break
    
    unique_books = books[books["isbn13"].isin(books_list)]
    
    if len(unique_books) < top_k:
        print(f"Found only {len(unique_books)} unique books out of {top_k} requested.")
    
    return unique_books

recommendations = retrieve_semantic_recommendations("A book to teach children about nature", top_k=20)
recommendations

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
59,9780007151240,0007151241,The Family Way,Tony Parsons,Parenthood,http://books.google.com/books/content?id=dJEIx...,It should be the most natural thing in the wor...,2005.0,3.51,400.0,2095.0,The Family Way,9780007151240 It should be the most natural th...
324,9780060959036,0060959037,Prodigal Summer,Barbara Kingsolver,Fiction,http://books.google.com/books/content?id=06IwG...,Barbara Kingsolver's fifth novel is a hymn to ...,2001.0,4.0,444.0,85440.0,Prodigal Summer: A Novel,9780060959036 Barbara Kingsolver's fifth novel...
404,9780064402453,0064402452,Racso and the Rats of NIMH,Jane Leslie Conly,Juvenile Fiction,http://books.google.com/books/content?id=MgoNv...,"‘Racso, a brash and boastful little rodent, is...",1988.0,3.76,288.0,3231.0,Racso and the Rats of NIMH,"9780064402453 ‘Racso, a brash and boastful lit..."
406,9780064403870,0064403874,"R-T, Margaret, and the Rats of NIMH",Jane Leslie Conly,Juvenile Fiction,http://books.google.com/books/content?id=WTHHH...,"When Margaret and her younger brother, Artie, ...",1991.0,3.52,272.0,631.0,"R-T, Margaret, and the Rats of NIMH",9780064403870 When Margaret and her younger br...
407,9780064404419,0064404412,The Rainbow People,Laurence Yep,Juvenile Fiction,http://books.google.com/books/content?id=5AHwq...,"""Culled from 69 stories collected in a [1930s]...",1992.0,3.75,208.0,202.0,The Rainbow People,"9780064404419 ""Culled from 69 stories collecte..."
416,9780064406925,006440692X,Winter on the Farm,Laura Ingalls Wilder,Juvenile Fiction,http://books.google.com/books/content?id=IvlKH...,The Little House books tell the story of a lit...,1997.0,4.13,32.0,400.0,Winter on the Farm,9780064406925 The Little House books tell the ...
429,9780064434980,0064434982,The Deer in the Wood,Laura Ingalls Wilder,Juvenile Fiction,http://books.google.com/books/content?id=V7YDW...,Even the youngest child can enjoy a special ad...,1999.0,4.17,32.0,302.0,The Deer in the Wood,9780064434980 Even the youngest child can enjo...
991,9780192862099,019286209X,The Origins of Life,John Maynard Smith;Eörs Szathmáry,Science,http://books.google.com/books/content?id=nHDbB...,'I can recommend this book as a thoroughly int...,2000.0,4.11,192.0,41.0,The Origins of Life: From the Birth of Life to...,9780192862099 'I can recommend this book as a ...
1639,9780374422080,0374422087,Everything on a Waffle,Polly Horvath,Juvenile Fiction,http://books.google.com/books/content?id=NimVJ...,This Newbery Honor Book tells the story of 11 ...,2004.0,3.71,150.0,9631.0,Everything on a Waffle,9780374422080 This Newbery Honor Book tells th...
1642,9780374522599,0374522596,The Control of Nature,John McPhee,Nature,http://books.google.com/books/content?id=p1qKQ...,The Control of Nature is John McPhee's bestsel...,1990.0,4.24,288.0,3365.0,The Control of Nature,9780374522599 The Control of Nature is John Mc...


In [116]:
recommendations_with_scores = retrieve_semantic_recommendations("A book to teach children about nature", top_k=6)
print(recommendations_with_scores)

             isbn13      isbn10                                title  \
324   9780060959036  0060959037                      Prodigal Summer   
1642  9780374522599  0374522596                The Control of Nature   
3747  9780786808069  0786808063  Baby Einstein: Neighborhood Animals   
3748  9780786808373  0786808373                 Baby Einstein: Birds   
3749  9780786808380  0786808381                Baby Einstein: Babies   
3750  9780786808397  078680839X                  Baby Einstein: Dogs   

                                authors        categories  \
324                  Barbara Kingsolver           Fiction   
1642                        John McPhee            Nature   
3747  Marilyn Singer;Julie Aigner-Clark  Juvenile Fiction   
3748                 Julie Aigner-Clark  Juvenile Fiction   
3749                 Julie Aigner-Clark  Juvenile Fiction   
3750                 Julie Aigner-Clark  Juvenile Fiction   

                                              thumbnail  \
324   ht