VectorStore- FAISS

In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain.docstore.document import Document

In [2]:
loader = TextLoader('speech.txt')
doc = loader.load()

In [3]:
text_splitter = CharacterTextSplitter(chunk_size = 256, chunk_overlap = 32)
chunked_doc = text_splitter.split_documents(doc)

Created a chunk of size 470, which is longer than the specified 256
Created a chunk of size 347, which is longer than the specified 256
Created a chunk of size 668, which is longer than the specified 256
Created a chunk of size 982, which is longer than the specified 256
Created a chunk of size 789, which is longer than the specified 256


In [4]:
embedding = OllamaEmbeddings()
db = FAISS.from_documents(chunked_doc, embedding)
db

<langchain_community.vectorstores.faiss.FAISS at 0x1ffd1b1c090>

In [5]:
query = "How does the speaker describe the desired outcome of the war?"
search_result = db.similarity_search(query)
search_result

[Document(metadata={'source': 'speech.txt'}, page_content='It will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness because we act without animus, not in enmity toward a people or with the desire to bring any injury or disadvantage upon them, but only in armed opposition to an irresponsible government which has thrown aside all considerations of humanity and of right and is running amuck. We are, let me say again, the sincere friends of the German people, and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to believe that this is spoken from our hearts.'),
 Document(metadata={'source': 'speech.txt'}, page_content='It is a distressing and oppressive duty, gentlemen of the Congress, which I have performed in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us. It is a fearful thi

In [6]:
search_result[0].page_content

'It will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness because we act without animus, not in enmity toward a people or with the desire to bring any injury or disadvantage upon them, but only in armed opposition to an irresponsible government which has thrown aside all considerations of humanity and of right and is running amuck. We are, let me say again, the sincere friends of the German people, and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to believe that this is spoken from our hearts.'

As a retriever

In [7]:
retriever = db.as_retriever()
retriever_result = retriever.invoke(query)
retriever_result

[Document(metadata={'source': 'speech.txt'}, page_content='It will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness because we act without animus, not in enmity toward a people or with the desire to bring any injury or disadvantage upon them, but only in armed opposition to an irresponsible government which has thrown aside all considerations of humanity and of right and is running amuck. We are, let me say again, the sincere friends of the German people, and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to believe that this is spoken from our hearts.'),
 Document(metadata={'source': 'speech.txt'}, page_content='It is a distressing and oppressive duty, gentlemen of the Congress, which I have performed in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us. It is a fearful thi

Similarity Search with Score

In [8]:
distance = db.similarity_search_with_score(query)
distance

[(Document(metadata={'source': 'speech.txt'}, page_content='It will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness because we act without animus, not in enmity toward a people or with the desire to bring any injury or disadvantage upon them, but only in armed opposition to an irresponsible government which has thrown aside all considerations of humanity and of right and is running amuck. We are, let me say again, the sincere friends of the German people, and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to believe that this is spoken from our hearts.'),
  15155.236),
 (Document(metadata={'source': 'speech.txt'}, page_content='It is a distressing and oppressive duty, gentlemen of the Congress, which I have performed in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us. It 

In [9]:
query_embedding = embedding.embed_query(query)
query_embedding

[1.905942678451538,
 -3.0848188400268555,
 1.9266877174377441,
 1.4802498817443848,
 -0.906783938407898,
 0.5994235873222351,
 1.5922367572784424,
 -0.562498927116394,
 0.8656339645385742,
 -1.970470666885376,
 1.2984124422073364,
 -1.9644593000411987,
 -0.6610937118530273,
 1.5142890214920044,
 -0.5127217173576355,
 -2.042038917541504,
 -0.3550319969654083,
 0.12661026418209076,
 0.7278351783752441,
 -1.8765541315078735,
 -0.6742256283760071,
 -1.0370304584503174,
 2.1645638942718506,
 -2.02512526512146,
 0.7461797595024109,
 -0.9956343173980713,
 0.4128318428993225,
 -1.6351494789123535,
 0.16330350935459137,
 -0.8801084756851196,
 2.095625162124634,
 -2.84267258644104,
 -2.6858737468719482,
 3.762274980545044,
 2.291747570037842,
 -5.131372451782227,
 -1.0398008823394775,
 1.3561785221099854,
 -1.2168262004852295,
 -1.3907657861709595,
 -0.5272400379180908,
 -2.0885183811187744,
 2.146793842315674,
 0.35255667567253113,
 0.9356279373168945,
 0.5666354894638062,
 -0.2919834852218628,

In [10]:
search_by_embedding = db.similarity_search_by_vector(query_embedding)
search_by_embedding

[Document(metadata={'source': 'speech.txt'}, page_content='It will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness because we act without animus, not in enmity toward a people or with the desire to bring any injury or disadvantage upon them, but only in armed opposition to an irresponsible government which has thrown aside all considerations of humanity and of right and is running amuck. We are, let me say again, the sincere friends of the German people, and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to believe that this is spoken from our hearts.'),
 Document(metadata={'source': 'speech.txt'}, page_content='It is a distressing and oppressive duty, gentlemen of the Congress, which I have performed in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us. It is a fearful thi

In [11]:
distance_by_embedding = db.similarity_search_with_score_by_vector(query_embedding)
distance_by_embedding

[(Document(metadata={'source': 'speech.txt'}, page_content='It will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness because we act without animus, not in enmity toward a people or with the desire to bring any injury or disadvantage upon them, but only in armed opposition to an irresponsible government which has thrown aside all considerations of humanity and of right and is running amuck. We are, let me say again, the sincere friends of the German people, and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to believe that this is spoken from our hearts.'),
  15155.236),
 (Document(metadata={'source': 'speech.txt'}, page_content='It is a distressing and oppressive duty, gentlemen of the Congress, which I have performed in thus addressing you. There are, it may be, many months of fiery trial and sacrifice ahead of us. It 

Add Doc into FaissDB

In [12]:
doc_to_add = ["A transformer is a deep learning architecture developed by researchers at Google and based on the multi-head attention mechanism, proposed in a 2017 paper [Attention Is All You Need].",
              "BERT, GPT are both based on transformer architecture"]
doc_to_add = [Document(page_content=doc, metadata={}) for doc in doc_to_add]
db.add_documents(doc_to_add)

['d1c528fc-cf5b-465c-a1c0-f26f7a3de56c',
 '4e9d9e99-4621-40cc-9ee3-6463bfdebc50']

In [13]:
query2 = "What technique is GPT based on?"
search_result2 = db.similarity_search(query2)
search_result2

[Document(metadata={}, page_content='A transformer is a deep learning architecture developed by researchers at Google and based on the multi-head attention mechanism, proposed in a 2017 paper [Attention Is All You Need].'),
 Document(metadata={}, page_content='BERT, GPT are both based on transformer architecture'),
 Document(metadata={'source': 'speech.txt'}, page_content='It will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness because we act without animus, not in enmity toward a people or with the desire to bring any injury or disadvantage upon them, but only in armed opposition to an irresponsible government which has thrown aside all considerations of humanity and of right and is running amuck. We are, let me say again, the sincere friends of the German people, and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to 

In [14]:
distance2 = db.similarity_search_with_score(query2)
distance2

[(Document(metadata={}, page_content='A transformer is a deep learning architecture developed by researchers at Google and based on the multi-head attention mechanism, proposed in a 2017 paper [Attention Is All You Need].'),
  15013.222),
 (Document(metadata={}, page_content='BERT, GPT are both based on transformer architecture'),
  16415.64),
 (Document(metadata={'source': 'speech.txt'}, page_content='It will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness because we act without animus, not in enmity toward a people or with the desire to bring any injury or disadvantage upon them, but only in armed opposition to an irresponsible government which has thrown aside all considerations of humanity and of right and is running amuck. We are, let me say again, the sincere friends of the German people, and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for

Saving and Loading VectorDB

In [15]:
db.save_local("speech_faissdb")

In [16]:
db1 = FAISS.load_local("speech_faissdb", embedding, allow_dangerous_deserialization = True)

In [17]:
search_result1 = db1.similarity_search(query)
search_result1

[Document(metadata={}, page_content='A transformer is a deep learning architecture developed by researchers at Google and based on the multi-head attention mechanism, proposed in a 2017 paper [Attention Is All You Need].'),
 Document(metadata={'source': 'speech.txt'}, page_content='It will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness because we act without animus, not in enmity toward a people or with the desire to bring any injury or disadvantage upon them, but only in armed opposition to an irresponsible government which has thrown aside all considerations of humanity and of right and is running amuck. We are, let me say again, the sincere friends of the German people, and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to believe that this is spoken from our hearts.'),
 Document(metadata={'source': 'speech.txt'}, 

In [18]:
search_result2 = db1.similarity_search(query2)
search_result2

[Document(metadata={}, page_content='A transformer is a deep learning architecture developed by researchers at Google and based on the multi-head attention mechanism, proposed in a 2017 paper [Attention Is All You Need].'),
 Document(metadata={}, page_content='BERT, GPT are both based on transformer architecture'),
 Document(metadata={'source': 'speech.txt'}, page_content='It will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness because we act without animus, not in enmity toward a people or with the desire to bring any injury or disadvantage upon them, but only in armed opposition to an irresponsible government which has thrown aside all considerations of humanity and of right and is running amuck. We are, let me say again, the sincere friends of the German people, and shall desire nothing so much as the early reestablishment of intimate relations of mutual advantage between us—however hard it may be for them, for the time being, to 