In [None]:
# FINAL WORKING CODE
import os
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

In [None]:
# 1. Enhanced Document Processing
def load_documents():
    documents = []
    pdf_folder = "/kaggle/input/rag-test"
    
    for file in os.listdir(pdf_folder):
        if file.endswith(".pdf"):
            try:
                loader = PyPDFLoader(os.path.join(pdf_folder, file))
                documents.extend(loader.load_and_split())
            except Exception as e:
                print(f"Error loading {file}: {str(e)}")
    
    # Better text splitting for legal documents
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=100,
        separators=["\n\n", "\n", "। ", "§", "(a)", "(b)"]
    )
    return text_splitter.split_documents(documents)

# 2. Fixed Pipeline Configuration
def create_qa_chain():
    model_name = "google/flan-t5-base"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to("cuda:0")

    # Correct generation parameters
    pipe = pipeline(
        "text2text-generation",
        model=model,
        tokenizer=tokenizer,
        device=0,
        max_length=512,
        do_sample=True,
        temperature=0.7,
        top_k=40,
        truncation=True
    )

    # Improved prompts
    question_prompt = PromptTemplate(
        template="Context:\n{context}\n\nQuestion: {question}\nAnswer in simple Hindi/English:",
        input_variables=["context", "question"]
    )

    combine_prompt = PromptTemplate(
        template="Combine these answers clearly:\n{summaries}\n\nFinal Question: {question}\nDetailed Answer:",
        input_variables=["summaries", "question"]
    )

    # Create vector store
    vectorstore = FAISS.from_documents(
        load_documents(),
        HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    )

    return RetrievalQA.from_chain_type(
        llm=HuggingFacePipeline(pipeline=pipe),
        chain_type="map_reduce",
        retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
        chain_type_kwargs={
            "question_prompt": question_prompt,
            "combine_prompt": combine_prompt,
            "combine_document_variable_name": "summaries"
        },
        return_source_documents=True
    )

# 3. Smarter Answer Formatting
def farmer_friendly_answer(result):
    answer = result['result'].strip()
    
    # Check for empty answers
    if not answer or len(answer) < 20:
        return "Please visit your nearest Krishi Seva Kendra for details or Call KISAN CALL CENTER Toll Free No.1800-180-1551"
    
    # Simplify technical terms
    replacements = {
        "Sponsor": "Company",
        "agreement": "contract",
        "permanent structure": "permanent building",
        "temporary modification": "temporary change"
    }
    
    for old, new in replacements.items():
        answer = answer.replace(old, new)
    
    return answer

# Query Interface
def ask_farmer(query):
    qa = create_qa_chain()
    result = qa({"query": query})
    
    print("\n🌾" + "="*50 + "🌾")
    print(f"Question: {query}")
    print("🌱" + "-"*50 + "🌱")
    print("Answer:", farmer_friendly_answer(result))
    print("\nRelevant Sources:")
    for doc in result['source_documents']:
        print(f"• {os.path.basename(doc.metadata['source'])} (Page {doc.metadata['page']+1})")
    print("🌾" + "="*50 + "🌾\n")

# Example Usage
if __name__ == "__main__":
    ask_farmer("Can companies build permanent buildings on my land?")
    ask_farmer("What is the farming agreement act?")

2025-04-24 10:22:37.636752: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745490157.830610      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745490157.892165      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
ask_farmer("What is the complete process outlined for resolving disputes between farmers and sponsors?")