## Faiss

In [23]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader

# Load your speech
loader = TextLoader("Speech.txt")
documents = loader.load()

# Use Recursive splitter with forced small chunk size
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=30,
    chunk_overlap=10,
    separators=["\n\n", "\n", ".", "!", "?", " ", ""],  # Include char-level fallback
)

docs = text_splitter.split_documents(documents)

# Print chunks
for i, doc in enumerate(docs):
    print(f"\nChunk {i+1}: {repr(doc.page_content)}")



Chunk 1: 'My parents impressed on me the'

Chunk 2: 'on me the value of that you'

Chunk 3: 'that you work hard for what'

Chunk 4: 'for what you want in life'

Chunk 5: '.'

Chunk 6: 'That your word is your bond'

Chunk 7: 'your bond and you do what you'

Chunk 8: 'what you say and keep your'

Chunk 9: 'keep your promise'

Chunk 10: '.'

Chunk 11: 'That you treat people with'

Chunk 12: 'with respect'

Chunk 13: '.'

Chunk 14: 'Show the values and morals in'

Chunk 15: 'morals in in the daily life'

Chunk 16: '.'

Chunk 17: 'That is the lesson that we'

Chunk 18: 'that we continue to pass on'

Chunk 19: 'pass on to our son'

Chunk 20: '.'

Chunk 21: 'We need to pass those lessons'

Chunk 22: 'lessons on to the many'

Chunk 23: 'the many generations to'

Chunk 24: 'to follow'

Chunk 25: '.'

Chunk 26: '[Cheering] Because we want'

Chunk 27: 'we want our children in these'

Chunk 28: 'in these nations to know that'

Chunk 29: 'know that the only limit to'

Chunk 30: 'limit to your achi

In [24]:
docs

[Document(metadata={'source': 'Speech.txt'}, page_content='My parents impressed on me the'),
 Document(metadata={'source': 'Speech.txt'}, page_content='on me the value of that you'),
 Document(metadata={'source': 'Speech.txt'}, page_content='that you work hard for what'),
 Document(metadata={'source': 'Speech.txt'}, page_content='for what you want in life'),
 Document(metadata={'source': 'Speech.txt'}, page_content='.'),
 Document(metadata={'source': 'Speech.txt'}, page_content='That your word is your bond'),
 Document(metadata={'source': 'Speech.txt'}, page_content='your bond and you do what you'),
 Document(metadata={'source': 'Speech.txt'}, page_content='what you say and keep your'),
 Document(metadata={'source': 'Speech.txt'}, page_content='keep your promise'),
 Document(metadata={'source': 'Speech.txt'}, page_content='.'),
 Document(metadata={'source': 'Speech.txt'}, page_content='That you treat people with'),
 Document(metadata={'source': 'Speech.txt'}, page_content='with respect

In [25]:
from langchain_community.embeddings import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="nomic-embed-text")


In [26]:
from langchain.vectorstores import FAISS
db = FAISS.from_documents(docs, embeddings)

In [34]:
query = "What hung up for the monuments ?"
docs=db.similarity_search(query)
docs

[Document(id='d5924ef4-5751-4363-965f-8121a447e3e9', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
 Document(id='13abdfc3-3b88-44c9-8013-6948c2ba89c3', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
 Document(id='20a4080e-a44f-4600-8869-48fd8165a93e', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
 Document(id='b209e602-76f1-43e6-b0ff-188fcb846b8e', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;')]

In [28]:
retriever = db.as_retriever()
retriever.invoke({"query": query})

[Document(id='d5924ef4-5751-4363-965f-8121a447e3e9', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
 Document(id='13abdfc3-3b88-44c9-8013-6948c2ba89c3', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
 Document(id='20a4080e-a44f-4600-8869-48fd8165a93e', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
 Document(id='b209e602-76f1-43e6-b0ff-188fcb846b8e', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;')]

# Similarity Search with Scores , one of good methods in FAISS

In [29]:
docs_with_score = db.similarity_search_with_score(query)
docs_with_score

[(Document(id='d5924ef4-5751-4363-965f-8121a447e3e9', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
  167.34337),
 (Document(id='13abdfc3-3b88-44c9-8013-6948c2ba89c3', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
  167.34337),
 (Document(id='20a4080e-a44f-4600-8869-48fd8165a93e', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
  167.34337),
 (Document(id='b209e602-76f1-43e6-b0ff-188fcb846b8e', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
  167.34337)]

In [30]:
embedding_vector = embeddings.embed_query(query)
print(f"Embedding vector for query: {embedding_vector}")

Embedding vector for query: [1.17205810546875, 1.142122507095337, -4.341152667999268, -1.7983425855636597, 0.6705060005187988, 1.302890419960022, 0.0012621202040463686, 0.6011227369308472, 0.3053220510482788, 0.546068012714386, -0.9560771584510803, 0.4757692813873291, 0.960134744644165, -0.28709977865219116, 0.6079282760620117, -1.1685179471969604, 0.4097599983215332, -0.0730801373720169, -0.05072418972849846, 0.6032628417015076, 0.2823358178138733, -0.6929592490196228, -0.6936702728271484, 0.6055915355682373, -0.16918008029460907, 1.3742448091506958, 1.0249452590942383, -0.38615682721138, -0.3200676143169403, 0.9695039987564087, 0.1495586633682251, -0.6352800130844116, -1.063223958015442, -0.4771924614906311, 0.6998236775398254, -1.0063878297805786, 1.5329149961471558, 0.8393277525901794, 1.7349153757095337, -0.6875868439674377, 0.9125422239303589, -0.2728612720966339, -2.16859769821167, -0.06265021115541458, 0.5411943793296814, -0.9247835874557495, 0.6270304322242737, 1.1929461956024

In [31]:
db.save_local("faiss_index")

In [36]:
new_db = FAISS.load_local("faiss_index", embeddings,allow_dangerous_deserialization=True)
new_docs = new_db.similarity_search(query, k=5)


In [37]:
new_docs

[Document(id='d5924ef4-5751-4363-965f-8121a447e3e9', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
 Document(id='13abdfc3-3b88-44c9-8013-6948c2ba89c3', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
 Document(id='20a4080e-a44f-4600-8869-48fd8165a93e', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
 Document(id='b209e602-76f1-43e6-b0ff-188fcb846b8e', metadata={'source': 'Speech.txt'}, page_content='hung up for monuments;'),
 Document(id='1cf0d8cf-0c62-4003-91ab-bd64f0db63ec', metadata={'source': 'Speech.txt'}, page_content='Our bruised arms hung up')]