In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter

In [3]:
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
import pandas as pd

books = pd.read_csv("books_cleaned.csv")

In [5]:
books["tagged_description"]

0       9780002005883 A NOVEL THAT READERS and critics...
1       9780002261982 A new 'Christie for Christmas' -...
2       9780006178736 A memorable, mesmerizing heroine...
3       9780006280897 Lewis' work on the nature of lov...
4       9780006280934 "In The Problem of Pain, C.S. Le...
                              ...                        
5192    9788172235222 On A Train Journey Home To North...
5193    9788173031014 This book tells the tale of a ma...
5194    9788179921623 Wisdom to Create a Life of Passi...
5195    9788185300535 This collection of the timeless ...
5196    9789027712059 Since the three volume edition o...
Name: tagged_description, Length: 5197, dtype: object

In [6]:
books["tagged_description"].to_csv("tagged_description.txt",
                                   sep = "\n",
                                   index = False,
                                   header = False)

In [7]:
raw_documents = TextLoader("tagged_description.txt", encoding="utf-8").load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1168, which is longer than the specified 0
Created a chunk of size 1214, which is longer than the specified 0
Created a chunk of size 373, which is longer than the specified 0
Created a chunk of size 309, which is longer than the specified 0
Created a chunk of size 483, which is longer than the specified 0
Created a chunk of size 482, which is longer than the specified 0
Created a chunk of size 960, which is longer than the specified 0
Created a chunk of size 188, which is longer than the specified 0
Created a chunk of size 843, which is longer than the specified 0
Created a chunk of size 296, which is longer than the specified 0
Created a chunk of size 197, which is longer than the specified 0
Created a chunk of size 881, which is longer than the specified 0
Created a chunk of size 1088, which is longer than the specified 0
Created a chunk of size 1189, which is longer than the specified 0
Created a chunk of size 304, which is longer than the specified 0
Create

In [13]:
documents[10]

Document(metadata={'source': 'tagged_description.txt'}, page_content='"9780006483892 Available in the U.S. for the first time, this is the second volume in the exceptional Legends of the Riftwar series from ""New York Times""-bestselling authors Feist and Rosenberg."')

In [None]:
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Chroma

# Load a Sentence Transformer model
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Generate embeddings for the text chunks
embeddings = embedding_model.encode([doc.page_content for doc in documents])

In [22]:
from langchain.embeddings.base import Embeddings

# Custom embedding class for Sentence Transformers
class SentenceTransformerEmbeddings(Embeddings):
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def embed_documents(self, texts):
        return self.model.encode(texts).tolist()

    def embed_query(self, text):
        return self.model.encode(text).tolist()
# Initialize the custom embedding function
embedding_function = SentenceTransformerEmbeddings()

In [26]:
# Create Chroma vector store
db_books = Chroma.from_documents(documents, 
    embedding=embedding_function
)

In [42]:
# Perform a similarity search
query = "Find a book about forgiveness and redemption."
similar_docs = db_books.similarity_search(query, k = 5)

In [43]:
similar_docs

[Document(metadata={'source': 'tagged_description.txt'}, page_content='9780830825707 James K. Beilby and Paul R. Eddy edit a collection of essays on four views of atonement: the healing view, the Christus victor view, the kaleidoscopic view and the penal substitutionary view. This is a book that will help Christians understand the issues, grasp the differences and proceed toward a clearer articulation of their understanding of the atonement.'),
 Document(metadata={'source': 'tagged_description.txt'}, page_content='9780785263715 Miller reveals how the inability to find redemption leads to chaotic relationships, self-hatred, the accumulation of meaningless material possessions, and a lack of inner peace.'),
 Document(metadata={'source': 'tagged_description.txt'}, page_content="9780060577865 Exploring how the past influences the present, the author discusses how to understand each individual's unique coping style and how to tap into the extraordinary quality of human spirit that will crea

In [49]:
## inorder to get the books name and title and authors, sicne these are description recomendation 
get_isbn_from_recomentdation = similar_docs[0].page_content.split()[0]
get_isbn_from_recomentdation

'9780830825707'

In [52]:
books[books['isbn13'] == int(similar_docs[0].page_content.split()[0].strip())]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
4092,9780830825707,830825703,The Nature of the Atonement,James K. Beilby;Paul R. Eddy,Religion,http://books.google.com/books/content?id=ctikD...,James K. Beilby and Paul R. Eddy edit a collec...,2006.0,3.76,208.0,189.0,The Nature of the Atonement: Four Views,9780830825707 James K. Beilby and Paul R. Eddy...


In [68]:
def get_semantic_recomendations(query: str, top_k: int)-> pd.DataFrame:
    similar_docs = db_books.similarity_search(query, top_k)
    isbn_list = []

    for book in range(0, len(similar_docs)):
        isbn_list += [int(similar_docs[book].page_content.strip('"').split()[0])]
    return books[books['isbn13'].isin(isbn_list)].head(top_k)

In [69]:
get_semantic_recomendations(query, 5)

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
159,9780060577865,006057786X,Bad Childhood---Good Life,Laura Schlessinger,Self-Help,http://books.google.com/books/content?id=2sepd...,"Exploring how the past influences the present,...",2006.0,3.92,272.0,502.0,Bad Childhood---Good Life: How to Blossom and ...,9780060577865 Exploring how the past influence...
761,9780141185910,0141185910,Go Tell it on the Mountain,James Baldwin,Fiction,http://books.google.com/books/content?id=eyg78...,"The story of the guilt, bitterness and spiritu...",2001.0,4.01,256.0,33558.0,Go Tell it on the Mountain,"9780141185910 The story of the guilt, bitterne..."
3176,9780687002825,0687002826,Exclusion and Embrace,Miroslav Volf,Religion,http://books.google.com/books/content?id=Cqto7...,Life at the end of the twentieth century prese...,1996.0,4.27,306.0,1895.0,Exclusion and Embrace: A Theological Explorati...,9780687002825 Life at the end of the twentieth...
3720,9780785263715,0785263713,Searching for God Knows What,Donald Miller,Religion,http://books.google.com/books/content?id=Hw4gW...,Miller reveals how the inability to find redem...,2004.0,3.92,246.0,22864.0,Searching for God Knows What,9780785263715 Miller reveals how the inability...
4092,9780830825707,0830825703,The Nature of the Atonement,James K. Beilby;Paul R. Eddy,Religion,http://books.google.com/books/content?id=ctikD...,James K. Beilby and Paul R. Eddy edit a collec...,2006.0,3.76,208.0,189.0,The Nature of the Atonement: Four Views,9780830825707 James K. Beilby and Paul R. Eddy...
