### Faiss 

facebook AI Similarity Search (Faiss) is a library for efficent similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. it also contains supporting code for evaluation and parameter tuning.

In [34]:
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import CharacterTextSplitter



In [35]:
loader=TextLoader('speech.txt')

documents=loader.load()
text_splitter=CharacterTextSplitter(chunk_size=50, chunk_overlap=10)
docs=text_splitter.split_documents(documents)


In [36]:
print(docs)

[Document(metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India")]


In [37]:
embeddings=OllamaEmbeddings(model='llama3')
db=FAISS.from_documents(docs, embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x109b11d60>

In [38]:
# quering..

query="Who is Head coach of the Indian men's cricket team"

docs=db.similarity_search(query)

docs

[Document(id='b4dd19de-572e-4b8c-8179-42d446e0c4b8', metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India")]

In [39]:
retriever=db.as_retriever()
docs=retriever.invoke(query)

docs

[Document(id='b4dd19de-572e-4b8c-8179-42d446e0c4b8', metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India")]

In [40]:
db.similarity_search_with_score(query)

[(Document(id='b4dd19de-572e-4b8c-8179-42d446e0c4b8', metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India"),
  np.float32(24852.42))]

In [41]:
embedding_vector=embeddings.embed_query(query)

embedding_vector

[-1.7840536832809448,
 -2.187621831893921,
 -0.9355307817459106,
 -2.3821873664855957,
 -2.375674247741699,
 -0.5695218443870544,
 -6.7540154457092285,
 -1.8104602098464966,
 -5.920034885406494,
 -0.255293607711792,
 -0.02162480168044567,
 -1.5600641965866089,
 -1.5079197883605957,
 -2.7171831130981445,
 1.2763298749923706,
 6.294151306152344,
 1.7978885173797607,
 -0.4034716486930847,
 -1.6913889646530151,
 0.6896819472312927,
 -4.009430885314941,
 1.9746308326721191,
 -0.9710885286331177,
 1.8375864028930664,
 -0.7572808265686035,
 -1.767125129699707,
 0.7720951437950134,
 0.5227797031402588,
 6.006777763366699,
 4.183549404144287,
 -0.36852771043777466,
 1.857982873916626,
 -0.8827803134918213,
 0.5898957848548889,
 1.4498302936553955,
 1.6618837118148804,
 -4.689548015594482,
 -2.7509801387786865,
 2.834022283554077,
 2.009028911590576,
 -1.1588637828826904,
 -1.206780195236206,
 -0.175625741481781,
 -0.05599083751440048,
 0.7645946741104126,
 1.9293739795684814,
 2.953233957290649

In [42]:
docs_score=db.similarity_search_by_vector(embedding_vector)
docs_score

[Document(id='b4dd19de-572e-4b8c-8179-42d446e0c4b8', metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India")]

In [43]:
## saving and loading

db.save_local('faiss_index')

In [45]:
new_db=db.load_local('faiss_index', embeddings, allow_dangerous_deserialization=True)

In [46]:
new_db.similarity_search(query)

[Document(id='b4dd19de-572e-4b8c-8179-42d446e0c4b8', metadata={'source': 'speech.txt'}, page_content="Gautam Gambhir (born 14 October 1981) is the Head coach of the Indian men's cricket team. \nHe is also a former international cricketer, former politician, and philanthropist. \nHe played for India in all formats of the game between 2003 and 2016. \nHe was a member of the 17th Lok Sabha from 2019 to 2024 representing East Delhi constituency from the Bharatiya Janata Party. \nHe received the Padma Shri from the Government of India in 2019, the fourth highest civilian award in India")]