MMR is an information retrieval algorithm designed to reduce redundancy in the retrieved results while maintaining high relevance to the query

In [7]:
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings

In [8]:
# Sample documents
docs = [
    Document(page_content="Langchain makes it easy to work with LLM"),
    Document(page_content="Langchain is used to build LLM based Applications"),
    Document(page_content="Chroma is used to store and search document embeddings"),
    Document(page_content="embeddings are the vector representations of the text"),
    Document(page_content="MMR helps you get diverse results when doing similarity search"),
    Document(page_content="Langchain Supports Chroma, FAISS, Pinecone and more")
]

In [9]:
from langchain_community.vectorstores import FAISS

# step 1 : initialize the embeddings
embedding_model = HuggingFaceEmbeddings()

# step 2 : create the FAISS vector store from documents
vector_store = FAISS.from_documents(
    embedding=embedding_model,
    documents=docs
)

In [10]:
# step 3 : enable the MMR in the retriever

retriever = vector_store.as_retriever(
    search_type = "mmr",  # this enables mmr
    search_kwargs = {"k" : 3, "lambda_mult" : 0.5}  # K = top results, lambda_mult = relevance-diversity-balance
)

In [11]:
query = "what is langchain?"

results = retriever.invoke(query)

In [12]:
for i, doc in enumerate(results):
    print(f"\n----Result {i+1} -----")
    print(f"Content \n {doc.page_content}...")


----Result 1 -----
Content 
 Langchain is used to build LLM based Applications...

----Result 2 -----
Content 
 embeddings are the vector representations of the text...

----Result 3 -----
Content 
 Langchain Supports Chroma, FAISS, Pinecone and more...
