<a href="https://colab.research.google.com/github/iamanujkumar/Natural-Language-Processing/blob/main/Multi_query_retriever_retriever.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEndpointEmbeddings

In [28]:
docs = [
    Document(page_content="Artificial intelligence is transforming industries worldwide."),
    Document(page_content="Machine learning models are being applied in healthcare, finance, and education."),
    Document(page_content="Deep learning techniques have enabled breakthroughs in computer vision and natural language processing."),
    Document(page_content="Despite progress, challenges like bias, transparency, and data privacy remain critical."),
    Document(page_content="AI-driven automation is reshaping the future of work and productivity."),
    Document(page_content="Governments are creating policies to ensure ethical and responsible AI development."),
    Document(page_content="Collaboration between academia, industry, and policymakers is essential for sustainable AI growth."),
    Document(page_content="Renewable energy technologies are gaining momentum to combat climate change."),
    Document(page_content="Solar and wind power are becoming cost-effective alternatives to fossil fuels."),
    Document(page_content="Electric vehicles are reducing dependence on non-renewable energy sources."),
    Document(page_content="Sustainable agriculture practices help conserve water and maintain soil health."),
    Document(page_content="Global warming continues to pose risks to ecosystems and human societies.")
]

In [11]:
from google.colab import userdata
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get("HUGGINGFACEHUB_API_TOKEN")

embedding_model = HuggingFaceEndpointEmbeddings(
    repo_id="sentence-transformers/all-MiniLM-L6-v2",
)

In [29]:
vectorstore = Chroma.from_documents(
    documents = documents,
    embedding = embedding_model,
    collection_name='my_collection'
)

In [30]:
similarity_retriever = vectorstore.as_retriever(
    search_type='similarity',
    search_kwargs={"k": 5}
)

In [61]:
from langchain.retrievers import MultiQueryRetriever
from langchain_huggingface import HuggingFaceEndpoint

In [65]:
from langchain_groq import ChatGroq
from langchain.retrievers import MultiQueryRetriever
from google.colab import userdata

multiquery_retriever = MultiQueryRetriever.from_llm(
    retriever = vectorstore.as_retriever(search_kwargs={"k":5}),
    llm = ChatGroq(
    groq_api_key=userdata.get("GROQ_API_KEY"),
    model="llama-3.1-8b-instant",
    temperature=0,
    )
)

In [66]:
query = "What are the current applications and challenges of artificial intelligence?"

In [67]:
result = multiquery_retriever.invoke(query)
result

[Document(metadata={}, page_content='Machine learning models are being applied in healthcare, finance, and education.'),
 Document(metadata={}, page_content='Deep learning techniques have enabled breakthroughs in computer vision and natural language processing.'),
 Document(metadata={}, page_content='AI-driven automation is reshaping the future of work and productivity.'),
 Document(metadata={}, page_content='Artificial intelligence is transforming industries worldwide.'),
 Document(metadata={}, page_content='Governments are creating policies to ensure ethical and responsible AI development.'),
 Document(metadata={}, page_content='Collaboration between academia, industry, and policymakers is essential for sustainable AI growth.')]

In [68]:
retriever = vectorstore.as_retriever(
    search_type='mmr',
    search_kwargs={"k": 2, "lambda_mult":0.5}
)

In [69]:
result = retriever.invoke(query)
result

[Document(metadata={}, page_content='Artificial intelligence is transforming industries worldwide.'),
 Document(metadata={}, page_content='Despite progress, challenges like bias, transparency, and data privacy remain critical.')]