# **Installing Libs**

In [None]:
!pip install langchain langchain-community langchain-core transformers sentence_transformers langchain-huggingface pypdf chromadb



# **Hugging Face Embedding Library**

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
hg_embeddings = HuggingFaceEmbeddings()

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
loader_pdf = PyPDFLoader('Notice Dipen Gajra.pdf')
documents = loader_pdf.load()

# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=20, chunk_overlap=5)

# Split your docs into texts
texts_pdf = text_splitter.split_documents(documents)

# Embeddings
embeddings = HuggingFaceEmbeddings()

# **Setting up chroma db**

In [None]:
from langchain.vectorstores import Chroma

persist_directory = 'docs/chroma_rag/'

In [None]:
langchain_chroma = Chroma.from_documents(
    documents=texts_pdf,
    collection_name="pdf_data",
    embedding=hg_embeddings,
    persist_directory=persist_directory
)

In [None]:
question = "What is the notice about?"
docs_pdf = langchain_chroma.similarity_search(question,k=5)

In [None]:
docs_pdf

[Document(metadata={'page': 0, 'total_pages': 4, 'creator': 'PyPDF', 'creationdate': '', 'title': 'Notice Dipen Gajra.docx', 'producer': 'Skia/PDF m121 Google Docs Renderer', 'page_label': '1', 'source': 'Notice Dipen Gajra.pdf'}, page_content='NOTICE'),
 Document(metadata={'creationdate': '', 'creator': 'PyPDF', 'total_pages': 4, 'source': 'Notice Dipen Gajra.pdf', 'title': 'Notice Dipen Gajra.docx', 'producer': 'Skia/PDF m121 Google Docs Renderer', 'page': 2, 'page_label': '3'}, page_content='.2020undersection176'),
 Document(metadata={'total_pages': 4, 'page_label': '1', 'creator': 'PyPDF', 'source': 'Notice Dipen Gajra.pdf', 'page': 0, 'title': 'Notice Dipen Gajra.docx', 'creationdate': '', 'producer': 'Skia/PDF m121 Google Docs Renderer'}, page_content='ricityAct.2003'),
 Document(metadata={'producer': 'Skia/PDF m121 Google Docs Renderer', 'creator': 'PyPDF', 'title': 'Notice Dipen Gajra.docx', 'page_label': '1', 'page': 0, 'source': 'Notice Dipen Gajra.pdf', 'creationdate': '', '

# **RAG**

In [None]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.llms import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import os
import warnings
warnings.filterwarnings('ignore')

# Set the Hugging Face API token as an environment variable
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"

# Load the model and tokenizer
# model_id = "HuggingFaceH4/zephyr-7b-beta"
# model_id = "TheBloke/phi-2-GGUF"
# model_id = "tiiuae/Falcon3-1B-Instruct"
model_id = "Qwen/Qwen3-0.6B-Base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

# Create a text generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.1,
    top_k=50,
    top_p=0.95,
    eos_token_id=tokenizer.eos_token_id,
)

# Use HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipe)


retriever_docs = langchain_chroma.as_retriever(search_kwargs={"k":5})
qs="What is the document about?"
template = """

You are a highly skilled Legal Expert specializing in the analysis of legal documents. Your task is to provide a precise and accurate answer based *only* on the legal documents provided to you as context.

### Instructions:
1.  **Analyze the context:** Carefully read the provided legal text to understand the key facts, parties involved, legal claims, and demands as per Indian Constitution.
2.  **Answer the question:** Address the user's question directly and comprehensively.
3.  **Cite your sources:** Use citations in the format for every piece of information taken directly from the document.
4.  **Maintain Professionalism:** Your response must be objective, legally focused, and free of any speculative or external information.

### Legal Document Context:
{context}

### User Question:
{question}

### Your Expert Response:

"""

PROMPT = PromptTemplate(input_variables=["context","question"], template=template)
qa_with_sources = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff",chain_type_kwargs = {"prompt": PROMPT}, retriever=retriever_docs, return_source_documents=True)
llm_response = qa_with_sources({"query": qs})

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.19G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

Device set to use cpu


In [None]:
print(llm_response)

{'query': 'What is the document about?', 'result': "\n\nYou are a highly skilled Legal Expert specializing in the analysis of legal documents. Your task is to provide a precise and accurate answer based *only* on the legal documents provided to you as context.\n\n### Instructions:\n1.  **Analyze the context:** Carefully read the provided legal text to understand the key facts, parties involved, legal claims, and demands as per Indian Constitution.\n2.  **Answer the question:** Address the user's question directly and comprehensively.\n3.  **Cite your sources:** Use citations in the format for every piece of information taken directly from the document.\n4.  **Maintain Professionalism:** Your response must be objective, legally focused, and free of any speculative or external information.\n\n### Legal Document Context:\n.2020undersection176\n\nricityAct.2003\n\nisteredbelowdocument\n\nwdocumentno.2458/201\n\nAStateGovernmentUnd\n\n### User Question:\nWhat is the document about?\n\n### Y

In [None]:
from IPython.display import display

display(llm_response['result'])

"\n\nYou are a highly skilled Legal Expert specializing in the analysis of legal documents. Your task is to provide a precise and accurate answer based *only* on the legal documents provided to you as context.\n\n### Instructions:\n1.  **Analyze the context:** Carefully read the provided legal text to understand the key facts, parties involved, legal claims, and demands as per Indian Constitution.\n2.  **Answer the question:** Address the user's question directly and comprehensively.\n3.  **Cite your sources:** Use citations in the format for every piece of information taken directly from the document.\n4.  **Maintain Professionalism:** Your response must be objective, legally focused, and free of any speculative or external information.\n\n### Legal Document Context:\n.2020undersection176\n\nricityAct.2003\n\nisteredbelowdocument\n\nwdocumentno.2458/201\n\nAStateGovernmentUnd\n\n### User Question:\nWhat is the document about?\n\n### Your Expert Response:\n\nThe document in question is

In [None]:
from IPython.display import display, Markdown

display(Markdown(llm_response['result']))



You are a highly skilled Legal Expert specializing in the analysis of legal documents. Your task is to provide a precise and accurate answer based *only* on the legal documents provided to you as context.

### Instructions:
1.  **Analyze the context:** Carefully read the provided legal text to understand the key facts, parties involved, legal claims, and demands as per Indian Constitution.
2.  **Answer the question:** Address the user's question directly and comprehensively.
3.  **Cite your sources:** Use citations in the format for every piece of information taken directly from the document.
4.  **Maintain Professionalism:** Your response must be objective, legally focused, and free of any speculative or external information.

### Legal Document Context:
.2020undersection176

ricityAct.2003

isteredbelowdocument

wdocumentno.2458/201

AStateGovernmentUnd

### User Question:
What is the document about?

### Your Expert Response:

The document in question is a **State Government Undated** document, specifically **Document No. 2458/201**. This document is part of the **2003 Act** under **Section 176** of the **Indian Constitution**. 

### Key Points:
1. **Document Type:** The document is a **State Government Undated** document, which means it was issued by the state government without a specific date.
2. **Document Number:** The document is identified by **Document No. 2458/201**.
3. **Context:** The document is part of the **2003 Act**, which is a significant piece of legislation in India. The **2003 Act** is a comprehensive piece of legislation that governs various aspects of Indian society, including civil, criminal, and administrative law.
4. **Section 176:** The document is related to **Section 176** of the **Indian Constitution**, which deals with the **State Government** and its powers and functions. This section outlines the responsibilities and authority of the state government in various areas of governance.

### Citation:
- **Document No. 2458/201**  
- **2003 Act**  
- **Indian Constitution**  

### Conclusion:
The document in question is a State Government Undated document, Document No. 2458/201, issued under the **2003 Act** and related to **Section 176** of the **Indian Constitution**. It provides details about the state government's functions and responsibilities in governance.