In [5]:
!pip install langchain_community
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Change path for Kaggle (upload your constitution.pdf to /kaggle/input)
loader = PyPDFLoader("/kaggle/input/indian-constitution-2024/Indian_Constitution_2024.pdf")
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
chunks = splitter.split_documents(docs)

print(f"Loaded {len(chunks)} chunks from the Constitution.")


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Loaded 1453 chunks from the Constitution.


In [8]:
# Step 1: Install required packages
!pip install faiss-cpu sentence-transformers

# Step 2: Your existing code
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

# Use small, offline embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create FAISS index
vectorstore = FAISS.from_documents(chunks, embeddings)
vectorstore.save_local("/kaggle/working/faiss_index")
print("FAISS index created and saved at /kaggle/working/faiss_index/")


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting faiss-cpu
  Downloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none

In [9]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.load_local("/kaggle/working/faiss_index", embeddings, allow_dangerous_deserialization=True)
print("FAISS vector store loaded successfully!")

FAISS vector store loaded successfully!


In [10]:
from transformers import pipeline

from transformers import pipeline

qa_model = pipeline(
    "text-generation",
    model="HuggingFaceH4/zephyr-7b-beta",
    device_map="auto"
)



retriever = vectorstore.as_retriever()


config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/816M [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

Device set to use cuda:0


In [14]:
def ask_constitution(question):
    # Retrieve relevant sections
    docs = retriever.invoke(question)
    context = "\n".join([doc.page_content for doc in docs[:3]])  # include a bit more context

    # Build the improved prompt
    prompt = f"""
You are an expert on the Constitution of India. 
Using the context below, provide a comprehensive answer that includes:
1. A short definition or explanation.
2. What the section or article states or means in simple terms.
3. The exact constitutional text or quotation, if available.

Context:
{context}

Question: {question}
Answer:
"""

    # Generate detailed response
    response = qa_model(
        prompt,
        max_new_tokens=1000,
        temperature=0.6,
        do_sample=True
    )[0]["generated_text"]

    # Extract only the answer part
    answer_start = response.find("Answer:")
    answer = response[answer_start + len("Answer:"):] if answer_start != -1 else response

    return answer.strip()


In [15]:
question = "What is the Preamble of the Constitution?"
answer = ask_constitution(question)

print("\n Question:", question)
print("Answer:", answer)


 Question: What is the Preamble of the Constitution?
Answer: 1. Short Definition: The Preamble is a brief introduction or statement that provides an overview of the purpose and principles of a constitution or a piece of legislation.
2. Explanation: The Preamble of the Constitution of India is a short statement that explains the objective and philosophy of the Constitution. It sets the tone for the rest of the Constitution and reflects the aspirations and values of the people of India. The Preamble establishes India as a sovereign, socialist, secular, democratic, and republican country. It also declares that the Constitution is meant to ensure justice, liberty, equality, and fraternity to all its citizens.
3. Constitutional Text: "WE, THE PEOPLE OF INDIA, having solemnly resolved to constitute India into a SOVEREIGN SOCIALIST SECULAR DEMOCRATIC REPUBLIC and to secure to all its citizens: JUSTICE, social, economic and political; LIBERTY of thought, expression, belief, faith and worship;