In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

class CustomTextSplitter(CharacterTextSplitter):
    def __init__(self, delimiter='/', **kwargs):
        super().__init__(**kwargs)
        self.delimiter = delimiter

    def split_text(self, text: str):
        return text.split(self.delimiter)

In [2]:

file_path = './DB_Book_and_Famous.txt'
loader = TextLoader(file_path)

splitter = CustomTextSplitter(delimiter='\n\n')

documents = loader.load()
print(documents)
print(len(documents))

# for document in documents:
#   print(document.page_content)

document = splitter.split_documents(documents)
print(len(document))
for chunk in document:
  print(chunk)

[Document(page_content="Source of Insight: Albert Einstein, Physicist.\nContent of insight: Life is like riding a bicycle. To keep your balance, you must keep moving..\nUseful insight in: Motivation.\nThat insight could be helpful in the following way:\nUse this insight to remind yourself that continuous effort and progress are essential in overcoming challenges and maintaining stability in life.  \n\n\nSource of Insight: Maya Angelou, Poet.\nContent of insight: You will face many defeats in life, but never let yourself be defeated..\nUseful insight in: Resilience.\nThat insight could be helpful in the following way:\nApply this insight by viewing setbacks as opportunities to learn and grow, maintaining a positive mindset, and persevering through difficult times. \n\n\nSource of Insight: The Power of Now by Eckhart Tolle.\nContent of insight: Realize deeply that the present moment is all you ever have..\nUseful insight in: Mindfulness.\nThat insight could be helpful in the following wa

In [4]:
for chunk in document:
  print(chunk)

page_content='Source of Insight: Albert Einstein, Physicist.\nContent of insight: Life is like riding a bicycle. To keep your balance, you must keep moving..\nUseful insight in: Motivation.\nThat insight could be helpful in the following way:\nUse this insight to remind yourself that continuous effort and progress are essential in overcoming challenges and maintaining stability in life.  ' metadata={'source': './DB_Book_and_Famous.txt'}
page_content='\nSource of Insight: Maya Angelou, Poet.\nContent of insight: You will face many defeats in life, but never let yourself be defeated..\nUseful insight in: Resilience.\nThat insight could be helpful in the following way:\nApply this insight by viewing setbacks as opportunities to learn and grow, maintaining a positive mindset, and persevering through difficult times. ' metadata={'source': './DB_Book_and_Famous.txt'}
page_content='\nSource of Insight: The Power of Now by Eckhart Tolle.\nContent of insight: Realize deeply that the present mom

In [8]:
from langchain_openai import OpenAIEmbeddings

embed_model = OpenAIEmbeddings(model='text-embedding-3-large')

In [9]:
from langchain_community.vectorstores import FAISS 

vector_index = FAISS.from_documents(document, embed_model)

In [10]:
store_path = './db/faiss_index'
vector_index.save_local(store_path)

In [11]:
query = "I am so tired."

In [14]:
retriever = vector_index.as_retriever(
  search_kwargs={'k': 3}
)

retrieved_docs = retriever.invoke(query)

In [42]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever(query: str) -> List[Document]:
    docs, scores = zip(*vector_index.similarity_search_with_score_by_vector(query))
    for doc, score in zip(docs, scores):
        doc.metadata["score"] = score

    return docs

In [None]:
vector_index.similarity_search_with_score_by_vector

In [39]:
retrieved_documents = retriever.invoke(query)

In [43]:
for doc in retrieved_documents:
  print(doc.metadata)
  print(doc)

{'source': './DB_Book_and_Famous.txt', 'score': 0.50826865}
page_content='\nSource of Insight: Pride and Prejudice by Jane Austen.\nContent of insight: I declare after all there is no enjoyment like reading! How much sooner one tires of anything than of a book!.\nUseful insight in: Reading.\nThat insight could be helpful in the following way:\nEmbrace reading as a source of joy and enrichment, regularly engaging with books to broaden your perspective and knowledge.  ' metadata={'source': './DB_Book_and_Famous.txt', 'score': 0.50826865}
{'source': './DB_Book_and_Famous.txt', 'score': 0.5259526}
page_content='Source of Insight: Albert Einstein, Physicist.\nContent of insight: Life is like riding a bicycle. To keep your balance, you must keep moving..\nUseful insight in: Motivation.\nThat insight could be helpful in the following way:\nUse this insight to remind yourself that continuous effort and progress are essential in overcoming challenges and maintaining stability in life.  ' metada

In [48]:
retriever_threshold = vector_index.as_retriever(
  search_type='similarity_score_threshold',
  search_kwargs={'score_threshold': 0.5}
)

retrieved_docs_threshold = retriever_threshold.invoke(query)

In [15]:
print(type(retrieved_docs))

<class 'list'>


In [49]:
for doc in retrieved_docs_threshold:
  print(doc)
  print(doc.page_content)
  print(doc.metadata)

page_content='\nSource of Insight: Pride and Prejudice by Jane Austen.\nContent of insight: I declare after all there is no enjoyment like reading! How much sooner one tires of anything than of a book!.\nUseful insight in: Reading.\nThat insight could be helpful in the following way:\nEmbrace reading as a source of joy and enrichment, regularly engaging with books to broaden your perspective and knowledge.  ' metadata={'source': './DB_Book_and_Famous.txt', 'score': 0.50826865}

Source of Insight: Pride and Prejudice by Jane Austen.
Content of insight: I declare after all there is no enjoyment like reading! How much sooner one tires of anything than of a book!.
Useful insight in: Reading.
That insight could be helpful in the following way:
Embrace reading as a source of joy and enrichment, regularly engaging with books to broaden your perspective and knowledge.  
{'source': './DB_Book_and_Famous.txt', 'score': 0.50826865}
page_content='Source of Insight: Albert Einstein, Physicist.\nCon

In [16]:
for doc in retrieved_docs:
  print(doc)

page_content='\nSource of Insight: Pride and Prejudice by Jane Austen.\nContent of insight: I declare after all there is no enjoyment like reading! How much sooner one tires of anything than of a book!.\nUseful insight in: Reading.\nThat insight could be helpful in the following way:\nEmbrace reading as a source of joy and enrichment, regularly engaging with books to broaden your perspective and knowledge.  ' metadata={'source': './DB_Book_and_Famous.txt'}
page_content='Source of Insight: Albert Einstein, Physicist.\nContent of insight: Life is like riding a bicycle. To keep your balance, you must keep moving..\nUseful insight in: Motivation.\nThat insight could be helpful in the following way:\nUse this insight to remind yourself that continuous effort and progress are essential in overcoming challenges and maintaining stability in life.  ' metadata={'source': './DB_Book_and_Famous.txt'}
page_content="\nSource of Insight: Nelson Mandela, Political Leader.\nContent of insight: It alway