In [27]:
from langchain_cohere import CohereEmbeddings
from langchain_classic.vectorstores import FAISS
from langchain_classic.schema import Document

In [28]:
#Creating sample documents
doc_mi = Document(
    page_content="""
    Mumbai Indians is one of the most successful teams in IPL history.
    The team is known for strong all-round performances and winning multiple championships.
    Rohit Sharma has been a key player and former captain who led the team to several IPL titles.
    """,
    metadata={"source": "ipl_teams", "team_id": 1}
)

doc_csk = Document(
    page_content="""
    Chennai Super Kings is one of the most consistent teams in the IPL.
    They are famous for their calm leadership and loyal fan base.
    MS Dhoni is a legendary player who captained the team to multiple IPL victories.
    """,
    metadata={"source": "ipl_teams", "team_id": 2}
)

doc_rcb = Document(
    page_content="""
    Royal Challengers Bangalore is known for its explosive batting lineup.
    The team has reached the IPL finals several times but is yet to win a title.
    Virat Kohli is one of the most important players and has been the face of the franchise for many years.
    """,
    metadata={"source": "ipl_teams", "team_id": 3}
)

doc_kkr = Document(
    page_content="""
    Kolkata Knight Riders is a competitive IPL team with multiple championship wins.
    They are known for their aggressive gameplay and strong bowling attack.
    Andre Russell is a star all-rounder who can change the game with both bat and ball.
    """,
    metadata={"source": "ipl_teams", "team_id": 4}
)

doc_rr = Document(
    page_content="""
    Rajasthan Royals won the inaugural IPL season in 2008.
    The team focuses on nurturing young talent and playing smart cricket.
    Sanju Samson is the captain and a key batsman who leads the team's batting lineup.
    """,
    metadata={"source": "ipl_teams", "team_id": 5}
)

docs = [doc_mi, doc_csk, doc_rcb, doc_kkr, doc_rr]

In [29]:
#Creating a FAISS vector store from the documents using Cohere embeddings
vector_store = FAISS.from_documents(docs, CohereEmbeddings(model="multilingual-22-12"))

In [30]:
#Total number of documents in the vector store
vector_store_index = vector_store.index.ntotal
print(f"Number of documents in the vector store: {vector_store_index}")

Number of documents in the vector store: 5


In [31]:
#Displaying all documents in the vector store. Note FAISS does not show embeddings, to see them we need to use numpy
for doc_id, doc in vector_store.docstore._dict.items():
    print(f"Document ID: {doc_id}")
    print(f"Document Content: {doc.page_content}")
    print(f"Document Metadata: {doc.metadata}")
    print("---")

Document ID: 7b3c12c3-7b81-4ff1-b4e8-ab3a0fcbeb70
Document Content: 
    Mumbai Indians is one of the most successful teams in IPL history.
    The team is known for strong all-round performances and winning multiple championships.
    Rohit Sharma has been a key player and former captain who led the team to several IPL titles.
    
Document Metadata: {'source': 'ipl_teams', 'team_id': 1}
---
Document ID: 2686d85b-793f-4aaf-9e8d-7c91b6d33fa4
Document Content: 
    Chennai Super Kings is one of the most consistent teams in the IPL.
    They are famous for their calm leadership and loyal fan base.
    MS Dhoni is a legendary player who captained the team to multiple IPL victories.
    
Document Metadata: {'source': 'ipl_teams', 'team_id': 2}
---
Document ID: 21382bd2-63b8-4d09-afb2-bc266dfb5245
Document Content: 
    Royal Challengers Bangalore is known for its explosive batting lineup.
    The team has reached the IPL finals several times but is yet to win a title.
    Virat Kohli is on

In [32]:
#Performing a similarity search to find the most relevant documents based on the query "Which team has won the most IPL titles?" and retrieving the top 2 results.
vector_store.similarity_search("Which team has won the most IPL titles?", k=2)

[Document(id='7b3c12c3-7b81-4ff1-b4e8-ab3a0fcbeb70', metadata={'source': 'ipl_teams', 'team_id': 1}, page_content='\n    Mumbai Indians is one of the most successful teams in IPL history.\n    The team is known for strong all-round performances and winning multiple championships.\n    Rohit Sharma has been a key player and former captain who led the team to several IPL titles.\n    '),
 Document(id='2686d85b-793f-4aaf-9e8d-7c91b6d33fa4', metadata={'source': 'ipl_teams', 'team_id': 2}, page_content='\n    Chennai Super Kings is one of the most consistent teams in the IPL.\n    They are famous for their calm leadership and loyal fan base.\n    MS Dhoni is a legendary player who captained the team to multiple IPL victories.\n    ')]

In [33]:
#Performing a similarity search to find the most relevant documents based on the query "Which team has won the most IPL titles?" and retrieving the top 2 results along with their similarity scores.
vector_store.similarity_search_with_score("Which team has won the most IPL titles?", k=2)

[(Document(id='7b3c12c3-7b81-4ff1-b4e8-ab3a0fcbeb70', metadata={'source': 'ipl_teams', 'team_id': 1}, page_content='\n    Mumbai Indians is one of the most successful teams in IPL history.\n    The team is known for strong all-round performances and winning multiple championships.\n    Rohit Sharma has been a key player and former captain who led the team to several IPL titles.\n    '),
  np.float32(32.4788)),
 (Document(id='2686d85b-793f-4aaf-9e8d-7c91b6d33fa4', metadata={'source': 'ipl_teams', 'team_id': 2}, page_content='\n    Chennai Super Kings is one of the most consistent teams in the IPL.\n    They are famous for their calm leadership and loyal fan base.\n    MS Dhoni is a legendary player who captained the team to multiple IPL victories.\n    '),
  np.float32(38.256607))]

In [34]:
#Adding new text to the existing vector store
new_texts = ["FAISS supports fast nearest neighbor search"]

vector_store.add_texts(new_texts)

['e799f0c7-74cd-4284-b0f9-a613e8932cc8']

In [35]:
#Adding new documents to the existing vector store
vector_store.add_documents([
    Document(page_content="New document added")
])

['6dc1a94b-ea60-491c-82bc-1fabaa449880']

In [36]:
#Saving the vector store locally to a directory named "faiss_index"
vector_store.save_local("faiss_index")

In [37]:
#Loading the vector store from the local directory "faiss_index"
vector_store = FAISS.load_local(
    "faiss_index",
    embeddings = CohereEmbeddings(model="multilingual-22-12"),
    allow_dangerous_deserialization=True
)


In [None]:
#Deleting a document from the vector store using its ID
vector_store.delete(ids=["6dc1a94b-ea60-491c-82bc-1fabaa449880"])

True