#### Retriever And Chain With Langchain

In [None]:
## Import necessary modules
## Modules for document compressors, retrievers, and language model integration
from langchain.retrievers.document_compressors import (
    LLMChainFilter, LLMListwiseRerank, LLMChainExtractor, EmbeddingsFilter, DocumentCompressorPipeline
)
import uuid
from langchain.chains.question_answering import load_qa_chain
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.storage import InMemoryByteStore, InMemoryStore
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.document_transformers import EmbeddingsRedundantFilter, LongContextReorder
from langchain.retrievers import ContextualCompressionRetriever, ParentDocumentRetriever
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.retrievers.multi_query import MultiQueryRetriever
from dotenv import load_dotenv
import os

load_dotenv()

gemini_api_key = os.getenv("GEMINI_API_KEY")
os.environ["GEMINI_API_KEY"] = gemini_api_key

# print(gemini_api_key)

class DocumentTool:
    def __init__(self, llm, embedding_model):
        self.llm = llm
        self.embeddings_model = embeddings_model
        self.vectorstore = None

    def load_documents(self, file_paths):
        raw_documents = []
        for path in file_paths:
            if path.endswith(".txt"):
                loader = TextLoader(path)
            elif path.endswith(".pdf"):
                loader = PyPDFLoader(path)
            else:
                raise ValueError("Unsupported file format. Use .txt or .pdf")
            raw_documents += loader.load()
        return raw_documents

    def split_documents(self, raw_documents):
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=20,
            separators=["\n", "\n\n"]
        )
        return splitter.split_documents(raw_documents)

    def create_vectorstore(self, docs):
        self.vectorstore = FAISS.from_documents(docs, self.embeddings_model)
        return self.vectorstore

    def setup_retriever(self):
        if not self.vectorstore:
            raise ValueError("Vectorstore is not initialized. Call `create_vectorstore` first.")
        retriever = self.vectorstore.as_retriever()
        compressor = LLMChainExtractor.from_llm(self.llm)
        return ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)

    def ask_question(self, query, retriever):
        compressed_docs = retriever.invoke(query)
        qa_chain = load_qa_chain(self.llm, chain_type="stuff")
        return qa_chain.run({"input_documents": compressed_docs, "question": query})


## Step 1: Initialize the Language Model (LLM)
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",  ## Model version
    api_key=gemini_api_key,    ## API key for authentication
    temperature=0.2            ## Control response randomness
)


## Step 3: Setup Embeddings Model for Similarity Matching
embeddings_model = GoogleGenerativeAIEmbeddings(
    google_api_key=gemini_api_key,
    model="models/text-embedding-004"  ## Embedding model version
)

# Step 2: Create the tool instance
tool = DocumentTool(llm=llm, embedding_model=embeddings_model)

# Step 3: Load documents
file_paths = ["../files/example.txt", "../files/cocking.pdf"]  # Add your file paths here
raw_documents = tool.load_documents(file_paths)
# print(raw_documents)

# # Step 4: Split documents
docs = tool.split_documents(raw_documents)
# print(docs)

# # Ste
# p 5: Create vectorstore
vectorstore = tool.create_vectorstore(docs)
# print(vectorstore)

# # Step 6: Setup retriever
retriever = tool.setup_retriever()
print(retriever)

# # Step 7: Ask a question
query = "How to cook Kabuli Pulao?"
answer = tool.ask_question(query, retriever)
print("Answer:", answer)


base_compressor=LLMChainExtractor(llm_chain=PromptTemplate(input_variables=['context', 'question'], input_types={}, output_parser=NoOutputParser(), partial_variables={}, template='Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return NO_OUTPUT. \n\nRemember, *DO NOT* edit the extracted parts of the context.\n\n> Question: {question}\n> Context:\n>>>\n{context}\n>>>\nExtracted relevant parts:')
| ChatGoogleGenerativeAI(model='models/gemini-1.5-flash', google_api_key=SecretStr('**********'), temperature=0.2, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x0000025C307F2A80>, default_metadata=())
| NoOutputParser(), get_input=<function default_get_input at 0x0000025C0DCF2020>) base_retriever=VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAIS

SyntaxError: invalid syntax (2606509741.py, line 1)