In [1]:
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings

from dotenv import load_dotenv



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Sample documents

docs= [
    Document(page_content="LangChain makes it easy to work with LLMs."),
    Document(page_content="LangChain is used to build LLM based applications."),
    Document(page_content="Chroma is used to store and search document embeddings."),
    Document(page_content="Embedding are vector representations of text."),
    Document(page_content="MMR helps you get diverse results when doing similarity search."),
    Document(page_content="LangChain support Chroma, FAISS, Pinecone, and more. "),
]

In [3]:
from langchain_community.vectorstores import FAISS 

# Initialize OpenAI embeddings
embedding_model= HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

# step2 ; create the FAISS vector store from documents
vectorstore= FAISS.from_documents(
    documents= docs,
    embedding= embedding_model,
)

In [7]:
# Enable MMR in the retriever
retriever= vectorstore.as_retriever(
    search_type= 'mmr', # <--- This enables MMR
    search_kwargs= {'k':3, "lambda_mult":0.5}  # k= top results(how much top relavent docs you want), lambda_mult= relevance-diversity balace (values vary 0 -> 1)

)

# if you put lambda_mult= 1 , then mmr will work similar like normal similarity search
# if you take it's value 0 then you will get very diverse result
# so you have to be in between
 

In [8]:
query= "What is langChain?"
results= retriever.invoke(query)

In [9]:
for i, doc in enumerate(results): # at lambda_mult=1 you will all the things (docs) related to langchain
    print(f"\n--- Result {i+1} ---")
    print(doc.page_content)


--- Result 1 ---
LangChain is used to build LLM based applications.

--- Result 2 ---
LangChain support Chroma, FAISS, Pinecone, and more. 

--- Result 3 ---
Embedding are vector representations of text.
