<a href="https://colab.research.google.com/github/deepakb26/IPC-RAG/blob/main/IPC_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Init

In [None]:
#!pip install langchain
# !pip install langchain-community
# !pip install sentence_transformers
# !pip install chromadb

In [None]:
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

# HF


In [None]:

# Set up HuggingFace API token
os.environ["huggingface_api.txt"] = "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"

# Sample IPC sections database (simplified for demonstration)
ipc_sections = """
Section 302: Punishment for murder
Section 307: Attempt to murder
Section 354: Assault or criminal force to woman with intent to outrage her modesty
Section 378: Theft
Section 420: Cheating and dishonestly inducing delivery of property
"""

# Save IPC sections to a text file
with open("ipc_sections.txt", "w") as f:
    f.write(ipc_sections)

# Load the document
loader = TextLoader("ipc_sections.txt")
documents = loader.load()

# Split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

# Create embeddings
embeddings = HuggingFaceEmbeddings()

# Create vector store
db = Chroma.from_documents(texts, embeddings)

# Create retriever
retriever = db.as_retriever()




  embeddings = HuggingFaceEmbeddings()


# Distillgpt2

In [None]:
model_name = "distilgpt2"  # You can change this to other models like "gpt2-medium" if you have more computational resources
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Create a text-generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15
)

# Create LangChain wrapper for the pipeline
local_llm = HuggingFacePipeline(pipeline=pipe)

# Create QA chain
qa = RetrievalQA.from_chain_type(llm=local_llm, chain_type="stuff", retriever=retriever)

def suggest_ipc_sections(incident_description):
    query = f"Given the following incident, suggest relevant IPC sections and provide a brief explanation: {incident_description}"
    result = qa.run(query)
    return result

# Example usage
incident = "A person broke into a house at night and stole valuable items."
suggestion = suggest_ipc_sections(incident)

print(f"Incident: {incident}")
print(f"Suggestion: {suggestion}")


  local_llm = HuggingFacePipeline(pipeline=pipe)
  result = qa.run(query)
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Incident: A person broke into a house at night and stole valuable items.
Suggestion: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Section 302: Punishment for murder
Section 307: Attempt to murder
Section 354: Assault or criminal force to woman with intent to outrage her modesty
Section 378: Theft
Section 420: Cheating and dishonestly inducing delivery of property

Question: Given the following incident, suggest relevant IPC sections and provide a brief explanation: A person broke into a house at night and stole valuable items.
Helpful Answer: You must report any crime in your jurisdiction if it is committed by someone who may be responsible for crimes against humanity (a) without evidence prior to being charged as such. Please refer only briefly to those people suspected; however these are not individuals identified either due process issues like sexual assault occurri

In [None]:
prompt_template = """
You are a legal assistant helping to identify relevant Indian Penal Code (IPC) sections for a given incident.
Use the following IPC sections to suggest the most relevant ones for the incident:

{context}

Incident: {question}

Suggest the most relevant IPC section(s) for this incident and provide a brief explanation for why they apply:
"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

# Create QA chain
qa = RetrievalQA.from_chain_type(
    llm=local_llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

def suggest_ipc_sections(incident_description):
    result = qa({"query": incident_description})
    return result["result"], result["source_documents"]

# Example usage
incident = "A person broke into a house at night and stole valuable items."
suggestion, source_docs = suggest_ipc_sections(incident)

print(f"Incident: {incident}")
print(f"Suggestion: {suggestion}")
print("\nRetrieved IPC Sections:")
for doc in source_docs:
    print(doc.page_content)



  result = qa({"query": incident_description})
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Incident: A person broke into a house at night and stole valuable items.
Suggestion: 
You are a legal assistant helping to identify relevant Indian Penal Code (IPC) sections for a given incident. 
Use the following IPC sections to suggest the most relevant ones for the incident:

Section 302: Punishment for murder
Section 307: Attempt to murder
Section 354: Assault or criminal force to woman with intent to outrage her modesty
Section 378: Theft
Section 420: Cheating and dishonestly inducing delivery of property

Incident: A person broke into a house at night and stole valuable items.

Suggest the most relevant IPC section(s) for this incident and provide a brief explanation for why they apply:
1) For example, when an occupant is seen wearing sunglasses while intoxicated in Delhi but has not been arrested due to his lack of experience under Section 377;
2), he was also observed on CCTV running from one apartment building near Kannada Road as well - despite being drunk all day long witho

#AI4Bharat

In [None]:
import torch
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load MuRIL model
model_name = "ai4bharat/indic-bert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Expanded IPC sections database
ipc_sections = {
    "302": "Punishment for murder - Whoever commits murder shall be punished with death, or imprisonment for life, and shall also be liable to fine.",
    "307": "Attempt to murder - Whoever does any act with such intention or knowledge, and under such circumstances that, if he by that act caused death, he would be guilty of murder, shall be punished with imprisonment of either description for a term which may extend to ten years, and shall also be liable to fine.",
    "354": "Assault or criminal force to woman with intent to outrage her modesty - Whoever assaults or uses criminal force to any woman, intending to outrage or knowing it to be likely that he will thereby outrage her modesty, shall be punished with imprisonment of either description for a term which may extend to two years, or with fine, or with both.",
    "378": "Theft - Whoever, intending to take dishonestly any moveable property out of the possession of any person without that person's consent, moves that property in order to such taking, is said to commit theft.",
    "420": "Cheating and dishonestly inducing delivery of property - Whoever cheats and thereby dishonestly induces the person deceived to deliver any property to any person, or to make, alter or destroy the whole or any part of a valuable security, or anything which is signed or sealed, and which is capable of being converted into a valuable security, shall be punished with imprisonment of either description for a term which may extend to seven years, and shall also be liable to fine."
}


config.json:   0%|          | 0.00/507 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/5.65M [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/135M [00:00<?, ?B/s]

In [None]:
ipc_sections = {
    "302": "Punishment for murder - Whoever commits murder shall be punished with death, or imprisonment for life, and shall also be liable to fine.",
    "307": "Attempt to murder - Whoever does any act with such intention or knowledge, and under such circumstances that, if he by that act caused death, he would be guilty of murder, shall be punished with imprisonment of either description for a term which may extend to ten years, and shall also be liable to fine.",
    "354": "Assault or criminal force to woman with intent to outrage her modesty - Whoever assaults or uses criminal force to any woman, intending to outrage or knowing it to be likely that he will thereby outrage her modesty, shall be punished with imprisonment of either description for a term which may extend to two years, or with fine, or with both.",
    "378": "Theft - Whoever, intending to take dishonestly any moveable property out of the possession of any person without that person's consent, moves that property in order to such taking, is said to commit theft.",
    "420": "Cheating and dishonestly inducing delivery of property - Whoever cheats and thereby dishonestly induces the person deceived to deliver any property to any person, or to make, alter or destroy the whole or any part of a valuable security, or anything which is signed or sealed, and which is capable of being converted into a valuable security, shall be punished with imprisonment of either description for a term which may extend to seven years, and shall also be liable to fine."
}

In [None]:

# Function to get embeddings
def get_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

# Get embeddings for all IPC sections
section_embeddings = {section: get_embedding(text) for section, text in ipc_sections.items()}

def suggest_ipc_sections(incident_description, top_n=2):
    # Get embedding for the incident description
    incident_embedding = get_embedding(incident_description)

    # Calculate cosine similarity between incident and all IPC sections
    similarities = {section: cosine_similarity([incident_embedding], [emb])[0][0]
                    for section, emb in section_embeddings.items()}

    # Sort sections by similarity and get top N
    top_sections = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:top_n]

    # Prepare the output
    output = f"Incident: {incident_description}\n\nRelevant IPC Sections:\n\n"
    for section, similarity in top_sections:
        output += f"Section {section}: {ipc_sections[section]}\n"
        output += f"Relevance Score: {similarity:.4f}\n\n"

    return output


In [None]:

incident = "A local shopkeeper complained that a customer paid for expensive items using counterfeit currency."
suggestion = suggest_ipc_sections(incident)
print(suggestion)

Incident: A local shopkeeper complained that a customer paid for expensive items using counterfeit currency.

Relevant IPC Sections:

Section 378: Theft - Whoever, intending to take dishonestly any moveable property out of the possession of any person without that person's consent, moves that property in order to such taking, is said to commit theft.
Relevance Score: 0.8175

Section 420: Cheating and dishonestly inducing delivery of property - Whoever cheats and thereby dishonestly induces the person deceived to deliver any property to any person, or to make, alter or destroy the whole or any part of a valuable security, or anything which is signed or sealed, and which is capable of being converted into a valuable security, shall be punished with imprisonment of either description for a term which may extend to seven years, and shall also be liable to fine.
Relevance Score: 0.8098


