<a href="https://colab.research.google.com/github/iamanujkumar/Natural-Language-Processing/blob/main/Contextual_compression_retriever.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEndpointEmbeddings

In [88]:
docs = [
    Document(page_content="""Artificial intelligence is transforming industries worldwide.
    Many organizations are adopting machine learning to improve efficiency.
    However, critics argue that AI can lead to job displacement if not managed responsibly.
    Despite this, AI offers opportunities in healthcare for early disease detection and personalized treatment plans."""),

    Document(page_content="""Climate change is one of the biggest challenges of our time.
    Renewable energy sources such as solar and wind power are becoming more affordable.
    Governments are setting targets to reduce carbon emissions, but progress is uneven across countries.
    AI technologies are also being applied to optimize energy grids and forecast weather patterns more accurately."""),

    Document(page_content="""In the field of natural language processing, deep learning models like transformers
    have achieved state-of-the-art performance in tasks such as translation, summarization, and question answering.
    These models require massive datasets and computational power, raising concerns about energy consumption.
    Ongoing research is exploring more efficient architectures to balance accuracy and sustainability."""),

    Document(page_content="""The history of computing dates back to early mechanical calculators.
    Charles Babbage is often called the father of the computer for designing the Analytical Engine.
    Over time, computers evolved from vacuum tubes to transistors and now to modern microprocessors.
    Today’s research in quantum computing promises to revolutionize fields requiring massive parallel processing.""")
]

In [89]:
from google.colab import userdata
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = userdata.get("HUGGINGFACEHUB_API_TOKEN")

embedding_model = HuggingFaceEndpointEmbeddings(
    repo_id="sentence-transformers/all-MiniLM-L6-v2",
)

In [90]:
vectorstore = Chroma.from_documents(
    documents = documents,
    embedding = embedding_model,
    collection_name='my_collection'
)

In [91]:
similarity_retriever = vectorstore.as_retriever(
    search_type='similarity',
    search_kwargs={"k": 5}
)

In [92]:
from langchain.retrievers import MultiQueryRetriever
from langchain_huggingface import HuggingFaceEndpoint

In [93]:
from langchain_groq import ChatGroq
from langchain.retrievers import MultiQueryRetriever
from google.colab import userdata

multiquery_retriever = MultiQueryRetriever.from_llm(
    retriever = vectorstore.as_retriever(search_kwargs={"k":5}),
    llm = ChatGroq(
    groq_api_key=userdata.get("GROQ_API_KEY"),
    model="llama-3.1-8b-instant",
    temperature=0,
    )
)

In [94]:
base_retriever = vectorstore.as_retriever(
    search_type='similarity',
    search_kwargs={"k": 5}
)

In [95]:
from langchain.retrievers.document_compressors import LLMChainExtractor
llm = ChatGroq(
    groq_api_key=userdata.get("GROQ_API_KEY"),
    model="llama-3.1-8b-instant",
    temperature=0,
    )
compressor = LLMChainExtractor.from_llm(llm)

In [96]:
from langchain.retrievers import ContextualCompressionRetriever

compression_retriever = ContextualCompressionRetriever(
    base_retriever = base_retriever,
    base_compressor = compressor
)

In [97]:
query = "How is artificial intelligence being used in healthcare and energy optimization?"

In [103]:
result = compression_retriever.invoke(query)
print(result)

[Document(metadata={}, page_content='Artificial intelligence is transforming industries worldwide.'), Document(metadata={}, page_content='Artificial intelligence is transforming industries worldwide.'), Document(metadata={}, page_content='Artificial intelligence is transforming industries worldwide.'), Document(metadata={}, page_content='Machine learning models are being applied in healthcare, finance, and education.'), Document(metadata={}, page_content='Machine learning models are being applied in healthcare, finance, and education.')]
