# FAISS

In [6]:
from langchain_community.document_loaders import TextLoader # type: ignore
from langchain_community.vectorstores import FAISS # type: ignore
from langchain_huggingface.embeddings import HuggingFaceEmbeddings # type: ignore
from langchain_text_splitters import CharacterTextSplitter # type: ignore
from tqdm.notebook import tqdm as notebook_tqdm

In [7]:
loader=TextLoader("data/result.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
docs = text_splitter.split_documents(documents)

Created a chunk of size 1212, which is longer than the specified 1000
Created a chunk of size 2034, which is longer than the specified 1000
Created a chunk of size 2104, which is longer than the specified 1000


In [8]:
len(docs),docs

(7,
 [Document(metadata={'source': 'data/result.txt'}, page_content='WikipediaThe Free Encyclopedia\nSearch Wikipedia\nSearch\nDonate\nCreate account\nLog in\n\nMain Page\nTalk\nRead\nView source\nView history\n\nTools\nAppearance hide\nText\n\nSmall\n\nStandard\n\nLarge\nWidth\n\nStandard\n\nWide\nColor (beta)\n\nAutomatic\n\nLight'),
  Document(metadata={'source': 'data/result.txt'}, page_content='Dark\nWelcome to Wikipedia,\nthe free encyclopedia that anyone can edit.\n116,430 active editors6,937,620 articles in English\nFrom today\'s featured article\nMeghan Trainor\nMeghan Trainor\nTitle is the debut major-label studio album by American singer-songwriter Meghan Trainor (pictured), released on January 9, 2015. Initially a songwriter for other artists in 2013, Trainor signed with Epic Records the following year and began recording material she co-wrote with Kevin Kadish. They drew influence from retro-styled music as they were tired of chasing radio trends. Title includes "All About

In [9]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

2025-03-22 17:05:44.642945: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

In [10]:
db = FAISS.from_documents(docs, embeddings)
db

<langchain_community.vectorstores.faiss.FAISS at 0x180733ac0>

In [11]:
### querying
query="what does this article says about aircraft?"
query_docs = db.similarity_search(query)

In [12]:
query_docs[0].page_content

"Aircraft involved in the Iran Air Flight 277 accident\nAircraft involved in the Iran Air Flight 277 accident\n1797 – War of the First Coalition: The siege of Kehl by Habsburg and Württembergian forces ended when French troops withdrew from their fortifications.\n1917 – First World War: Troops of the British Empire defeated Ottoman forces at the Battle of Rafa on the Sinai–Palestine border.\n1972 – The Los Angeles Lakers of the National Basketball Association lost to the Milwaukee Bucks, ending a 33-game winning streak, the longest in major American professional team sports.\n1975 – The Great Storm spawned the first of 45 tornadoes over a three-day period in the Southeastern United States.\n2011 – In poor weather conditions, Iran Air Flight 277 (aircraft pictured) crashed near Urmia Airport, Iran, killing 78 of the 105 people on board.\nT. W. Robertson (b. 1829)Carrie Chapman Catt (b. 1859)Farhan Akhtar (b. 1974)Lei Jieqiong (d. 2011)\nMore anniversaries: January 8January 9January 10\n

In [13]:
retriver=db.as_retriever()
new_docs = retriver.invoke(query)

In [14]:
new_docs[0].page_content

"Aircraft involved in the Iran Air Flight 277 accident\nAircraft involved in the Iran Air Flight 277 accident\n1797 – War of the First Coalition: The siege of Kehl by Habsburg and Württembergian forces ended when French troops withdrew from their fortifications.\n1917 – First World War: Troops of the British Empire defeated Ottoman forces at the Battle of Rafa on the Sinai–Palestine border.\n1972 – The Los Angeles Lakers of the National Basketball Association lost to the Milwaukee Bucks, ending a 33-game winning streak, the longest in major American professional team sports.\n1975 – The Great Storm spawned the first of 45 tornadoes over a three-day period in the Southeastern United States.\n2011 – In poor weather conditions, Iran Air Flight 277 (aircraft pictured) crashed near Urmia Airport, Iran, killing 78 of the 105 people on board.\nT. W. Robertson (b. 1829)Carrie Chapman Catt (b. 1859)Farhan Akhtar (b. 1974)Lei Jieqiong (d. 2011)\nMore anniversaries: January 8January 9January 10\n

# Similiarity with score

In [15]:
docs_and_scores = db.similarity_search_with_score(query)
docs_and_scores

[(Document(metadata={'source': 'data/result.txt'}, page_content="Aircraft involved in the Iran Air Flight 277 accident\nAircraft involved in the Iran Air Flight 277 accident\n1797 – War of the First Coalition: The siege of Kehl by Habsburg and Württembergian forces ended when French troops withdrew from their fortifications.\n1917 – First World War: Troops of the British Empire defeated Ottoman forces at the Battle of Rafa on the Sinai–Palestine border.\n1972 – The Los Angeles Lakers of the National Basketball Association lost to the Milwaukee Bucks, ending a 33-game winning streak, the longest in major American professional team sports.\n1975 – The Great Storm spawned the first of 45 tornadoes over a three-day period in the Southeastern United States.\n2011 – In poor weather conditions, Iran Air Flight 277 (aircraft pictured) crashed near Urmia Airport, Iran, killing 78 of the 105 people on board.\nT. W. Robertson (b. 1829)Carrie Chapman Catt (b. 1859)Farhan Akhtar (b. 1974)Lei Jieqio

In [18]:
query_embedding = embeddings.embed_query(query)
len(query_embedding)

384

In [19]:
docs_query_w_vector=db.similarity_search_by_vector(query_embedding)
docs_query_w_vector

[Document(metadata={'source': 'data/result.txt'}, page_content="Aircraft involved in the Iran Air Flight 277 accident\nAircraft involved in the Iran Air Flight 277 accident\n1797 – War of the First Coalition: The siege of Kehl by Habsburg and Württembergian forces ended when French troops withdrew from their fortifications.\n1917 – First World War: Troops of the British Empire defeated Ottoman forces at the Battle of Rafa on the Sinai–Palestine border.\n1972 – The Los Angeles Lakers of the National Basketball Association lost to the Milwaukee Bucks, ending a 33-game winning streak, the longest in major American professional team sports.\n1975 – The Great Storm spawned the first of 45 tornadoes over a three-day period in the Southeastern United States.\n2011 – In poor weather conditions, Iran Air Flight 277 (aircraft pictured) crashed near Urmia Airport, Iran, killing 78 of the 105 people on board.\nT. W. Robertson (b. 1829)Carrie Chapman Catt (b. 1859)Farhan Akhtar (b. 1974)Lei Jieqion

# Saving And Loading

In [20]:
db.save_local("faiss_index")

In [21]:
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
docs = new_db.similarity_search(query)

In [22]:
docs

[Document(metadata={'source': 'data/result.txt'}, page_content="Aircraft involved in the Iran Air Flight 277 accident\nAircraft involved in the Iran Air Flight 277 accident\n1797 – War of the First Coalition: The siege of Kehl by Habsburg and Württembergian forces ended when French troops withdrew from their fortifications.\n1917 – First World War: Troops of the British Empire defeated Ottoman forces at the Battle of Rafa on the Sinai–Palestine border.\n1972 – The Los Angeles Lakers of the National Basketball Association lost to the Milwaukee Bucks, ending a 33-game winning streak, the longest in major American professional team sports.\n1975 – The Great Storm spawned the first of 45 tornadoes over a three-day period in the Southeastern United States.\n2011 – In poor weather conditions, Iran Air Flight 277 (aircraft pictured) crashed near Urmia Airport, Iran, killing 78 of the 105 people on board.\nT. W. Robertson (b. 1829)Carrie Chapman Catt (b. 1859)Farhan Akhtar (b. 1974)Lei Jieqion

In [23]:
id(db)

6450002624

In [24]:
id(new_db)

6488037648