## RAG SETUP - NyayaSarthi

## Install Libraries

In [None]:
!pip install langchain-community langchain-openai faiss-cpu tiktoken pypdf ragas sentence_transformers

## Import Required Packages

In [None]:
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_openai import AzureChatOpenAI
from ragas.metrics import LLMContextRecall
from ragas import EvaluationDataset
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import LLMContextRecall
from ragas.metrics import LLMContextPrecisionWithoutReference
from ragas.metrics import ResponseRelevancy
from ragas.metrics import Faithfulness
from ragas import evaluate
import os

## Import Azure Opneai Keys & Endpoint

In [None]:
os.environ["AZURE_OPENAI_API_KEY"] = ""
os.environ["AZURE_OPENAI_ENDPOINT"] = ""

## Setup LLM with required parameters like temperature , top_p, max_tokens etc

In [None]:
llm = AzureChatOpenAI(
    openai_api_version="2025-01-01-preview",
    azure_deployment="gpt-4.1-mini",  # Replace with your deployment name
    temperature=0.1,
    top_p=1,
    max_tokens=2000)

In [None]:
messages = [
    (
        "system",
        "You are a helpful assistant",
    ),
    ("human", "what is capital of india?"),
]
llm.invoke(messages)

## Build FAISS Vector Index by loading the PDF & chunking , Embedding them

### Uplaod the 3 PDF
### 1. BNS.pdf
### 2. BNSS.pdf
### 3. BNSA.pdf

In [None]:
vectorstore_path = "faiss_index"

if os.path.exists(vectorstore_path):
    print("🔄 Loading existing FAISS index...")
    embed_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    db = FAISS.load_local(vectorstore_path, embeddings=embed_model, allow_dangerous_deserialization=True)
else:
    print("🆕 Creating new FAISS index from PDFs...")
    loader = DirectoryLoader("data/", glob="**/*.pdf", loader_cls=PyPDFLoader)
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
    chunks = splitter.split_documents(docs)

    embed_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    db = FAISS.from_documents(chunks, embed_model)

    db.save_local(vectorstore_path)

## Retriveal Augment QA

In [None]:
retriever = db.as_retriever(search_kwargs={"k": 4})

prompt = ChatPromptTemplate.from_template("""
You are a legal assistant. Use the below excerpts from law documents to answer the question.
Mention source and page number after each answer fragment.

{context}

Question: {input}

Helpful Answer with citations:
""")

stuff_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
rag_chain = create_retrieval_chain(retriever, stuff_chain)

## Ask Question & Get Answer

In [None]:
response = rag_chain.invoke({"input": "What is the section no that deals with  theft and punishment duration for the same  under the new law BNS?"})
print("\n📘 Answer:\n", response["answer"])

## Evaluate the Results using a pre-curated Synthetic Golden Dataset

In [None]:
EVAL_QUESTIONS=[
    "What are the section that deal with bribery?",
    "What is the punishemnt for kidnapping or abducting in order to murder or for ransom?",
    "What is the section that deals with culpable homicide?",
    "What is the section that deals with donwry?"
]

EVAL_ANSWERS=[
    "Section 170 deals with bribery, diefining it is a giving or accepting gratification to induce someone to exercise electoral rights.Section 173 addresses punishment for bribery, stating it shall be punished for fine only.",
    "For fidnapping or abducting in order to murder the punishment is imprisonment for life or rigorous imprisonment for 10 years and fine.For kidnapping for ransome the punishment is death or imprisonment for life and fine. Both offenses are cognizable and tried by the cour of session",
    "Section 100 deals with culpable homicide.Section 101 defines murder, while section 105 addresses punishment for culpable homicide not amounting to murder, Section 102 covers coulpable homicide by causing the death of a person other than the intended victim",
    "Section 80 deals with dowry death. It defines dowry death as the death of a woman with seven years of marriage caused by burns, bodily injury or under abnormal circumstances where she was subjected to cruelty or harassement related to downry demands"
]
dataset =[]

for query, reference in zip(EVAL_QUESTIONS, EVAL_ANSWERS):
  relevant_docs = retriever.invoke(query)
  response = rag_chain.invoke({"input": query})
  dataset.append({"user_input": query,
                  "retrieved_contexts": [rdoc.page_content for rdoc in relevant_docs],
                  "reference_answer": reference,
                  "response": response["answer"],
                  "reference":reference
                  })

In [None]:
evaluation_dataset = EvaluationDataset.from_list(dataset)

In [None]:
evaluator_llm = LangchainLLMWrapper(llm)

In [None]:
result = evaluate(dataset =evaluation_dataset,
                  metrics =[LLMContextRecall(),Faithfulness(),ResponseRelevancy(),LLMContextPrecisionWithoutReference()],
                  llm = evaluator_llm, embeddings= embed_model )

In [None]:
print(result)