##Setup

In [4]:
#Basic PDF & Document Libraries
!pip install PyPDF2 pymupdf -q

#Transformers, PyTorch & ML Libraries
!pip install torch transformers sentence-transformers scikit-learn -q

#LangChain & LangChain Extensions
!pip install langchain faiss-cpu langchain-openai -q
!pip install -U langchain-community -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m81.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m449.6/449.6 kB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.7/64.7 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does n

##Imports

In [5]:
#OS & System Utilities
import os
from getpass import getpass
import re
import random
import zipfile
import json
from collections import Counter


#Google Colab / Drive
from google.colab import drive
from google.colab import files


#PDF & Document Handling
from PyPDF2 import PdfReader
from langchain.document_loaders import PyMuPDFLoader
from langchain.schema import Document


#Data Handling
import pandas as pd


#PyTorch & Transformers
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification


#scikit-learn
from sklearn.preprocessing import LabelEncoder


#LangChain Core & Chat Models
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain.schema import StrOutputParser, Document
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough
from langchain.memory import ConversationBufferMemory
from langchain.agents import initialize_agent, AgentType
from langchain.tools import tool
from langchain.embeddings.base import Embeddings
from langchain.vectorstores import FAISS
from langchain_openai import ChatOpenAI


#Gradio
import gradio as gr

##Google Drive

In [6]:
drive.mount('/content/drive')

Mounted at /content/drive


##Law Books Splitting

In [7]:
#PDF Path
folder_path = "/content/drive/MyDrive/LegalMind"
lawbook_paths = [
    f"{folder_path}/Code Of Criminal Procedure 1898.pdf",
    f"{folder_path}/Pakistan Penal Code.pdf",
    f"{folder_path}/Qanun-e-Shahadat Order 1984.pdf"
]


#Split by Numbered Sections
def split_into_documents(text, book_name):
    lines = text.split('\n')
    documents = []
    current_chunk = ""
    section_number = None

    section_header_pattern = re.compile(r'^\s*(\d+)\.\s')

    for line in lines:
        match = section_header_pattern.match(line)
        if match:
            if current_chunk:
                doc = Document(
                    page_content=current_chunk.strip(),
                    metadata={"book": book_name, "section": section_number}
                )
                documents.append(doc)
                current_chunk = ""
            section_number = match.group(1)
            current_chunk = line
        else:
            current_chunk += '\n' + line


#Save last chunk
    if current_chunk:
        doc = Document(
            page_content=current_chunk.strip(),
            metadata={"book": book_name, "section": section_number}
        )
        documents.append(doc)

    return documents


#Process All Books
all_chunks = []

for path in lawbook_paths:
    book_name = os.path.basename(path).replace(".pdf", "")
    print(f"Processing: {book_name}")

    reader = PdfReader(path)
    text = "\n".join(page.extract_text() or "" for page in reader.pages)

    docs = split_into_documents(text, book_name)
    all_chunks.extend(docs)

    print(f"{len(docs)} chunks created for {book_name}\n")

#Save lawbook_chunks to a JSON file
lawbook_raw_data = [{"content": doc.page_content, "metadata": doc.metadata} for doc in all_chunks]
with open("lawbook_chunks.json", "w") as f:
    json.dump(lawbook_raw_data, f)

Processing: Code Of Criminal Procedure 1898
568 chunks created for Code Of Criminal Procedure 1898

Processing: Pakistan Penal Code
517 chunks created for Pakistan Penal Code

Processing: Qanun-e-Shahadat Order 1984
260 chunks created for Qanun-e-Shahadat Order 1984



In [8]:
sample_chunks = random.sample(all_chunks, 10)

for i, chunk in enumerate(sample_chunks):
    print(f"\nSample Chunk #{i+1}")
    print("-" * 50)
    print(f"Book: {chunk.metadata['book']}")
    print(f"Section: {chunk.metadata['section']}")
    print("Content Preview:")
    print(chunk.page_content, "...\n")


Sample Chunk #1
--------------------------------------------------
Book: Code Of Criminal Procedure 1898
Section: 363
Content Preview:
363. Remarks respecting demeanour of witness: When a Sessions Judge or 
Magistrate has recorded the evidence of a wit ness he shall also record such remarks (if 
any) as he thinks material; respecting the demeanour of such witness whilst under 
examination.  364. Examination how recorded:  (1) Whenever the accused is examined, by any 
Magistrate or by any Cour t other than a High Court, the whole of such examination 
including every question put to him and every ans wer given by him, shall be recorded in 
full, in the language in which he is  examined, or, if that is not  practicable, in the language 
of the Court or in English; and such record sh all be shown or read, to  him, or if he does 
not understand the language in which it is written shall be interpreted to him in language 
which he understands, and he shaft be at liber ty to explain or add to

##Judgments Splitting

In [9]:
def clean_text(text):
    text = text.lower()
    text = text.replace("_", "").replace("-", "")
    lines = [line.strip() for line in text.splitlines() if line.strip()]
    return "\n".join(lines)

def is_new_judgment_page(lines):
    if not lines:
        return False

    lines = [line.lower().strip() for line in lines[:12]]
    joined = " ".join(lines)

    return any([
        re.search(r"judg[e]?ment\s+sheet", joined),
        re.search(r"peshawar high court", joined),
        re.search(r"\b(cr\.a|crl\.a|wp no|w\.p\.|writ petition|cr\.r|cr\.revision|c\.p\.|c\.a\.|c\.r\.|civil appeal|civil revision|criminal appeal|criminal misc|crl\.misc|misc\. appl)\b", joined),
        re.match(r"^[a-z]{2,4}\.\s?no\.\s?\d{1,4}", lines[0]) if lines else False
    ])

def extract_court_name(text):
    first_3_lines = "\n".join(text.strip().split("\n")[:3]).lower()
    if "lahore high court" in first_3_lines or "lahore" in first_3_lines:
        return "Lahore High Court"
    elif "peshawar high court" in first_3_lines or "peshawar" in first_3_lines:
        return "Peshawar High Court"
    else:
        return "Unknown"

def split_judgments_by_common_headers(pdf_path):
    loader = PyMuPDFLoader(pdf_path)
    pages = loader.load()

    judgments = []
    current = []

    for page in pages:
        lines = page.page_content.strip().splitlines()
        if is_new_judgment_page(lines) and current:
            combined_text = "\n\n".join([p.page_content for p in current])
            cleaned_text = clean_text(combined_text)
            court = extract_court_name(cleaned_text)

            judgments.append(Document(
                page_content=cleaned_text,
                metadata={
                    "source": f"Judgment {len(judgments) + 1}",
                    "court": court
                }
            ))
            current = [page]
        else:
            current.append(page)

    if current:
        combined_text = "\n\n".join([p.page_content for p in current])
        cleaned_text = clean_text(combined_text)
        court = extract_court_name(cleaned_text)

        judgments.append(Document(
            page_content=cleaned_text,
            metadata={
                "source": f"Judgment {len(judgments) + 1}",
                "court": court
            }
        ))

    return judgments


#Save lawbook to a JSON file
pdf_path = "/content/drive/MyDrive/LegalMind/Judgments.pdf"

judgments = split_judgments_by_common_headers(pdf_path)

judgment_raw_data = [{"content": doc.page_content, "metadata": doc.metadata} for doc in judgments]
with open("judgments_chunks.json", "w", encoding="utf-8") as f:
    json.dump(judgment_raw_data, f)


In [10]:
total_judgments = len(judgments)

court_counts = Counter(doc.metadata.get("court", "Unknown") for doc in judgments)

print(f"Total judgments: {total_judgments}\n")
print("Judgments per court:")
for court, count in court_counts.items():
    print(f"{court}: {count}")

Total judgments: 1010

Judgments per court:
Peshawar High Court: 368
Unknown: 571
Lahore High Court: 71


In [11]:
for i, doc in enumerate(judgments[:3], 1):
    print(f"\nJudgment #{i}")
    print("-" * 50)
    print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    print(f"Court: {doc.metadata.get('court', 'Unknown')}")
    print("Full Content:")
    print(doc.page_content)
    print("\n" + "="*50)


Judgment #1
--------------------------------------------------
Source: Judgment 1
Court: Peshawar High Court
Full Content:
judement sheet
peshawar high court, abbottabad bench.
judicial department
cr.a no.323a of 2019
judga,ient
date of hearing
... ...21.04.2020
appellant...(the state) by raja muhammad zubair, additional advocate
general ....
respondent ... (mir umar son of sain muhammad)
v
ahmad ali, j:
the state through
advocate general, khyber pakhtunkhwa has called in
question the acquittal of accused/respondent, in case
f.i.r no. 182 dated \7.04.2015 under section 302
ppc of ps shinkiari, district mansehra, vide
impugned judgment dated 11.03.2019 of the learned
additional sessions judgeiii, mansehra by filing
instant appeal under section 417 (2a) cr.p.c.
2.
brief facts of the case are that the
complainant, mst. bibi iratima wife of said khan,
reported the matter to thc local policc on 17 '042015
at 15.30 hours, u,ho reached the spot in village khara
battangi cum makra miana that 

##InLegalBERTEmbedder: Custom Mean-Pooling Embedder for Legal Texts

In [12]:
class InLegalBERTEmbedder:
    def __init__(self, model_name="law-ai/InLegalBERT", device=None):
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name).to(self.device)

    def embed_documents(self, texts):
        embeddings = []
        for text in texts:
            inputs = self.tokenizer(text, padding=True, truncation=True, return_tensors="pt", max_length=512).to(self.device)
            with torch.no_grad():
                model_output = self.model(**inputs)

#Mean Pooling
            token_embeddings = model_output.last_hidden_state
            attention_mask = inputs["attention_mask"]
            input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
            sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
            sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
            embedding = sum_embeddings / sum_mask
            embeddings.append(embedding.squeeze(0).cpu().numpy())
        return embeddings

## LangChain Wrapper for InLegalBERT Embeddings

In [13]:
class InLegalBERTLangChain(Embeddings):
    def __init__(self):
        self.model = InLegalBERTEmbedder()

    def embed_documents(self, texts):
        return self.model.embed_documents(texts)

    def embed_query(self, text):
        return self.model.embed_documents([text])[0]

##Vector Stores (FAISS)

In [14]:
#Load both JSON files
with open("/content/judgments_chunks.json", "r", encoding="utf-8") as f:
    judgment_raw_data = json.load(f)

with open("/content/lawbook_chunks.json", "r", encoding="utf-8") as f:
    lawbook_raw_data = json.load(f)

#Convert JSON data to LangChain Documents
judgment_docs = [
    Document(page_content=item["content"], metadata=item["metadata"])
    for item in judgment_raw_data
]

lawbook_docs = [
    Document(page_content=item["content"], metadata=item["metadata"])
    for item in lawbook_raw_data
]

print(f"Loaded {len(judgment_docs)} judgment chunks")
print(f"Loaded {len(lawbook_docs)} lawbook chunks")

#Initialize embedding model
embeddings = InLegalBERTLangChain()

#Create and Save FAISS vector stores
lawbook_store = FAISS.from_documents(lawbook_docs, embeddings)
lawbook_store.save_local("/content/lawbook_store")
print("Lawbook vector store saved successfully!")

judgment_store = FAISS.from_documents(judgment_docs, embeddings)
judgment_store.save_local("/content/judgment_store")
print("Judgment vector store saved successfully!")

print("All vector stores created and saved!")


Loaded 1010 judgment chunks
Loaded 1345 lawbook chunks


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/516 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/671 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/534M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/534M [00:00<?, ?B/s]

Lawbook vector store saved successfully!
Judgment vector store saved successfully!
All vector stores created and saved!


##Load Vector Stores

In [15]:
embeddings = InLegalBERTLangChain()

lawbook_store = FAISS.load_local("/content/lawbook_store", embeddings, allow_dangerous_deserialization=True)
judgment_store = FAISS.load_local("/content/judgment_store", embeddings, allow_dangerous_deserialization=True)

print("FAISS vector stores loaded successfully!")


FAISS vector stores loaded successfully!


##OpenAI LLM Initialization

In [16]:
os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


Enter your OpenAI API key: ··········


##RAG Law Book

##Retrieval

In [17]:
retriever_law = lawbook_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [18]:
retriever_law

VectorStoreRetriever(tags=['FAISS', 'InLegalBERTLangChain'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7b101a1de1b0>, search_kwargs={'k': 3})

In [19]:
sections = retriever_law.invoke('Explain the offence defined in Section 375 PPC and its punishment.')
sections

[Document(id='0b79fb35-7953-42d2-978e-b587a3988ced', metadata={'book': 'Code Of Criminal Procedure 1898', 'section': '82'}, page_content='82. Where warrant may be executed:  A warrant of arrest may be executed at any place \nin Pakistan.  \n [Explanation : In this section,  "warrant of arrest" includes a warrant of arrest issued under \nthis Code as in force in Azad Jammu and Kashmir]  \nExplan. added by Code of Criminal Procedure (Amendment) Act. Vlll of 1993.'),
 Document(id='cdcea7a7-1316-41da-8978-0d0c650e4fdb', metadata={'book': 'Pakistan Penal Code', 'section': '101'}, page_content='101. When such right extends to causing any harm other than death:  \nIf the offence be not of any of the descriptions  enumerated in the last  preceding section, the \nright of private defence of th e body dose not extend to the voluntary causing of death to the \nassailant, but dose extend, under the restrictions mentioned in Section 99 to the voluntary \ncausing to the assailant of a ny harm other 

In [20]:
sections[0].page_content

'82. Where warrant may be executed:  A warrant of arrest may be executed at any place \nin Pakistan.  \n [Explanation : In this section,  "warrant of arrest" includes a warrant of arrest issued under \nthis Code as in force in Azad Jammu and Kashmir]  \nExplan. added by Code of Criminal Procedure (Amendment) Act. Vlll of 1993.'

##Augmentation

In [21]:
prompt_law = PromptTemplate(
    input_variables=["context", "query"],
    template="""
You are a highly competent legal assistant specializing in Pakistani criminal law.

Use the following sections from Pakistan law books to answer the legal query.

For each section, write a separate explanation — even if the section is only partially relevant. Use this format:

---

Section [Section Number] – [Title or Short Description]
From: [Book Name]

[Explain how this section is relevant to the query.]

---

If a section is completely irrelevant, ignore it entirely and do not mention it in the answer.

Respond in formal legal language only.

Context:
{context}

Query:
{query}
"""
)

In [22]:
query = "What is the procedure for issuing a warrant of arrest in Pakistan?"
retrieved_sections = retriever_law.invoke(query)
retrieved_sections

[Document(id='0b79fb35-7953-42d2-978e-b587a3988ced', metadata={'book': 'Code Of Criminal Procedure 1898', 'section': '82'}, page_content='82. Where warrant may be executed:  A warrant of arrest may be executed at any place \nin Pakistan.  \n [Explanation : In this section,  "warrant of arrest" includes a warrant of arrest issued under \nthis Code as in force in Azad Jammu and Kashmir]  \nExplan. added by Code of Criminal Procedure (Amendment) Act. Vlll of 1993.'),
 Document(id='45b3a168-76dc-412b-a2b8-c384dedd8a80', metadata={'book': 'Code Of Criminal Procedure 1898', 'section': '186'}, page_content="186. Power to issue summons or warrant for offence committed beyond local jurisdiction. \nMagistrate's procedure on arrest."),
 Document(id='b5aeaabf-83ee-4c68-a490-230339216a2d', metadata={'book': 'Code Of Criminal Procedure 1898', 'section': '85'}, page_content='85.   Procedure on arrest of per son against whom warrant issued.')]

In [23]:
if not isinstance(retrieved_sections, list):
    retrieved_sections = [retrieved_sections]

context_text = "\n\n---\n\n".join([
    f"Section from {doc.metadata.get('book', 'Unknown Book')} (Section {doc.metadata.get('section', 'N/A')}):\n{doc.page_content}"
    for doc in retrieved_sections
])

In [24]:
final_prompt_law = prompt_law.invoke({"context": context_text, "query": query})
final_prompt_law

StringPromptValue(text='\nYou are a highly competent legal assistant specializing in Pakistani criminal law.\n\nUse the following sections from Pakistan law books to answer the legal query.\n\nFor each section, write a separate explanation — even if the section is only partially relevant. Use this format:\n\n---\n\nSection [Section Number] – [Title or Short Description]\nFrom: [Book Name]\n\n[Explain how this section is relevant to the query.]\n\n---\n\nIf a section is completely irrelevant, ignore it entirely and do not mention it in the answer.\n\nRespond in formal legal language only.\n\nContext:\nSection from Code Of Criminal Procedure 1898 (Section 82):\n82. Where warrant may be executed:  A warrant of arrest may be executed at any place \nin Pakistan.  \n [Explanation : In this section,  "warrant of arrest" includes a warrant of arrest issued under \nthis Code as in force in Azad Jammu and Kashmir]  \nExplan. added by Code of Criminal Procedure (Amendment) Act. Vlll of 1993.\n\n-

##Generation

In [25]:
answer = llm.invoke(final_prompt_law)
print(answer.content)

---

Section 82 – Where warrant may be executed
From: Code Of Criminal Procedure 1898

Section 82 of the Code of Criminal Procedure 1898 states that a warrant of arrest may be executed at any place in Pakistan. This means that the warrant can be enforced throughout the country, regardless of where the alleged offender is located. It is important to note that this section also clarifies that the term "warrant of arrest" includes warrants issued under this Code as in force in Azad Jammu and Kashmir.

---

Section 85 – Procedure on arrest of person against whom warrant issued
From: Code Of Criminal Procedure 1898

Section 85 of the Code of Criminal Procedure 1898 outlines the procedure to be followed upon the arrest of a person against whom a warrant has been issued. This section details the steps that must be taken by the arresting officer, including informing the arrested individual of the grounds for their arrest, producing them before the court without unnecessary delay, and ensuring 

##Chain

In [26]:
def format_docs(docs):
    return "\n\n---\n\n".join([
        f"Section {doc.metadata.get('section', 'N/A')} – {doc.metadata.get('title', 'No Title')}\n\n"
        f"From: {doc.metadata.get('book', 'Unknown Book')}\n\n"
        f"{doc.page_content.strip()}"
        for doc in docs
    ])

parallel_chain_law = RunnableParallel({
    "context": retriever_law | RunnableLambda(format_docs),
    "query": RunnablePassthrough()
})

parser = StrOutputParser()

main_chain_law = parallel_chain_law | prompt_law | llm | parser

In [27]:
response = main_chain_law.invoke("What are the legal procedures for search and seizure during a criminal investigation under Pakistani law?")
print(response)

---

Section 5 – Trial of offences under Penal Code
From: Code Of Criminal Procedure 1898

This section outlines that all offences under the Pakistan Penal Code shall be investigated, enquired into, tried, and otherwise dealt with according to the provisions contained within the Code of Criminal Procedure. This means that the legal procedures for search and seizure during a criminal investigation under Pakistani law would be governed by the provisions of the Code of Criminal Procedure when dealing with offences under the Penal Code.

---

Section 58 – Pursuit of offenders into other jurisdiction
From: Code Of Criminal Procedure 1898

This section allows a police officer to pursue and arrest, without a warrant, any person authorized to be arrested under the Code of Criminal Procedure into any place in Pakistan. This provision enables law enforcement authorities to conduct search and seizure operations during a criminal investigation by pursuing offenders into different jurisdictions wit

##RAG Judgment

##Retriever

In [28]:
retriever_judgment = judgment_store.as_retriever(search_type="similarity", search_kwargs={"k": 3})
retriever_judgment

VectorStoreRetriever(tags=['FAISS', 'InLegalBERTLangChain'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7b10d5445a60>, search_kwargs={'k': 3})

In [29]:
prompt_judgment = retriever_judgment.invoke('Explain the offence defined in Section 375 PPC and its punishment.')
prompt_judgment

[Document(id='182184f7-617e-4bdb-8a0f-dfc98e2fe689', metadata={'source': 'Judgment 588', 'court': 'Unknown'}, page_content="writ petition no.9549/2021\n(4)\nrelating to the commission of a cognizable offence and to\nregister a case thereon on the ground that he is not satisfied\nwith the reasonableness or credibility of the information. in\nother words, 'reasonableness' or 'credibility' of the said\ninformation is not a condition precedent for the registration of\na criminal case. a comparison of the present section 154 of\nthe code with those of the earlier codes indicates that the\nlegislature had intentionally thought it appropriate to employ\nonly the words 'every information' without qualifying the said\nwords. an overall reading of all the codes makes it clear that\nsine qua non for recording a first information report is that\nthere should be an information and that information must\ndisclose the commission of a cognizable offence.\n6.\nsection 156 of the code confers the power 

In [30]:
prompt_judgment[0].page_content

"writ petition no.9549/2021\n(4)\nrelating to the commission of a cognizable offence and to\nregister a case thereon on the ground that he is not satisfied\nwith the reasonableness or credibility of the information. in\nother words, 'reasonableness' or 'credibility' of the said\ninformation is not a condition precedent for the registration of\na criminal case. a comparison of the present section 154 of\nthe code with those of the earlier codes indicates that the\nlegislature had intentionally thought it appropriate to employ\nonly the words 'every information' without qualifying the said\nwords. an overall reading of all the codes makes it clear that\nsine qua non for recording a first information report is that\nthere should be an information and that information must\ndisclose the commission of a cognizable offence.\n6.\nsection 156 of the code confers the power upon a\npolice officer to investigate a cognizable offence whereas\nsection 157 lays down the manner, in which that investi

##Augmentation

In [31]:
prompt_judgment = ChatPromptTemplate.from_template("""
You are a legal assistant trained in Pakistani law. Given the following legal judgment text, extract and summarize the key legal information in a structured manner.

Format the output like this:

Case Type: [e.g. Writ Petition, Criminal Appeal]
Court: [e.g. Lahore High Court, Supreme Court of Pakistan]
Parties Involved:
- Petitioner(s): [Names if available]
- Respondent(s): [Names if available]

Main Legal Issues:
[Summarize the core legal issues raised in the case.]

Petitioner’s Arguments:
[Summarize what the petitioner argued.]

Respondent’s Arguments:
[Summarize what the respondent argued.]

Relevant Laws and Sections:
[List all mentioned laws/sections (e.g., Section 154 CrPC, Article 10A of the Constitution).]

Court’s Reasoning and Observations:
[Summarize how the court analyzed the matter, referring to any case law, principles, or interpretations.]

Final Decision / Order:
[Summarize what the court ordered — dismissed, allowed, directions issued, etc.]

---
Summary of the Judgment:
[Provide a concise summary of the entire judgment in no more than 10 lines.]

Judgment Text:
{context}
""")


In [32]:
query = "Summarize the judgment regarding registration of FIR under section 154 CrPC"
retrieved_judgments = retriever_judgment.invoke(query)
retrieved_judgments

[Document(id='0e7f7f95-87ee-48b5-a037-9f7dac42d321', metadata={'source': 'Judgment 234', 'court': 'Unknown'}, page_content='a\njudgment sheet\njvsiqiak. pspabtmpiit\'\ncr. m. b.a no. 1959p/2024\nfaisal hussain\nvs\nthe state\ndate of hearins: 16.08.2024\npetitioner(s) by: mr. aman ullah pirzada, advocate\nstate by: mr. hazrat said, dag\ncomplainant by: arbab shabbir ahmad, advocate.\njudgment\n**{€rf*\nijaz anwar.j.\nthrough instant bail\napplication, accused petitioner faisal hussain son\nof iftikhar hussain seeks his release on bail in\ncase fir no.67124 dated 04.03.2024 registered\nunder section 40914191420 ppc, at police\nstation fia/cbc, peshawar.\n2.\nit is pertinent to mention here that the\nfirst bail application no. 3285p12023 filed by the\npetitioner in fir no. 258 dated 04.07,2023\n2\nregistered under section 40814191420 ppc, at\npolice station gharbi, peshawar was dismissed\nby this court on merit vide order dated\n13.09.2023. subsequently, another fir (present\none) has be

##Chain

In [33]:
judgment_summary_chain = RunnableLambda(
    lambda query: [
        (prompt_judgment | llm | StrOutputParser()).invoke({"context": doc.page_content})
        for doc in retriever_judgment.invoke(query)
    ]
)

In [34]:
query = "criteria for pre-arrest bail in Pakistan"

response = judgment_summary_chain.invoke(query)

print_output = "\n\n-------\n\n".join(response)
print(print_output)

Case Type: Pre-arrest Bail Petition
Court: Not specified

Parties Involved:
- Petitioner(s): Not specified
- Respondent(s): Not specified

Main Legal Issues:
Granting pre-arrest bail, confirmation of ad-interim bail, furnishing of fresh bail bonds.

Petitioner’s Arguments:
The petitioner sought pre-arrest bail and requested confirmation of ad-interim bail.

Respondent’s Arguments:
Not specified

Relevant Laws and Sections:
- Not specified

Court’s Reasoning and Observations:
The court allowed the pre-arrest bail petition, confirmed the ad-interim bail, and required the petitioner to furnish fresh bail bonds. The court emphasized that its findings were tentative and would not prejudice the trial.

Final Decision / Order:
The pre-arrest bail petition was allowed, and the ad-interim bail was confirmed, subject to the petitioner furnishing fresh bail bonds.

Summary of the Judgment:
The court allowed the pre-arrest bail petition, confirmed the ad-interim bail, and required the petitioner t

##Legal Case Verdict Prediction Pipeline

In [47]:
model_path = "/content/drive/MyDrive/Legal Mind/best_model"

#Load model & tokenizer
model = AutoModelForSequenceClassification.from_pretrained(model_path, local_files_only=True)
tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
model.eval()
print("Model & tokenizer loaded successfully.")


with open("/content/drive/MyDrive/LegalMind/LegalMind.json", "r", encoding="utf-8") as f:
    df = json.load(f)
df = pd.DataFrame(df)

label_encoder = LabelEncoder()
label_encoder.fit(df["verdict"])
label_mapping = dict(zip(label_encoder.transform(label_encoder.classes_), label_encoder.classes_))


def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"[\n\r\t]", " ", text)
    text = re.sub(r"[\"']", "", text)
    text = re.sub(r"[^a-z0-9 ,.\[\]()/\-:]", "", text)
    text = re.sub(r"\.{2,}", ".", text)
    text = re.sub(r"\s+", " ", text)
    text = re.sub(r"[,/]+", ",", text)
    text = re.sub(r"(,\s*,)+", ",", text)
    text = re.sub(r"(,\s*$)|(^\s*,)", "", text)
    text = re.sub(r"\b(p\s*,\s*,\s*,\s*c)\b", "ppc", text)
    return text.strip()


def format_case(case):
    summary = clean_text(case["summary"])
    pet = clean_text(case["petitioner_argument"])
    resp = clean_text(case["respondent_argument"])
    case_type = clean_text(case.get("case_type", ""))
    sections = clean_text(", ".join(case.get("offence_sections", [])))
    return f"[SUMMARY] {summary} [PETITIONER] {pet} [RESPONDENT] {resp} [CASE TYPE] {case_type} [SECTIONS] {sections}"


def predict_legal_verdict(case: dict) -> str:
    formatted_text = format_case(case)
    inputs = tokenizer(formatted_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_label = torch.argmax(outputs.logits, dim=1).item()
    return label_mapping[predicted_label]


llm = ChatOpenAI(temperature=0)

prompt_extract_case = PromptTemplate.from_template("""
Extract the following fields from the legal case text:
- summary
- petitioner_argument
- respondent_argument
- offence_sections (list like ["489-F", "420"])
- case_type

Respond in JSON format.

Case Text:
{text}
""")

output_parser = JsonOutputParser()
extract_case_chain = prompt_extract_case | llm | output_parser

def run_pipeline(raw_text: str):
    print("Extracting structured case data from LLM...")
    structured_case = extract_case_chain.invoke({"text": raw_text})
    print("Structured fields:", json.dumps(structured_case, indent=2))

    print("Predicting verdict using LegalBERT classifier...")
    verdict = predict_legal_verdict(structured_case)
    print("Predicted Verdict:", verdict)
    return verdict


Model & tokenizer loaded successfully.


##Tools

In [48]:
@tool
def lawbook_tool(query: str) -> str:
    """
    Use this tool when the user is asking about specific Pakistani laws, legal provisions, or statutory sections.
    It retrieves and summarizes relevant sections from digitized Pakistani law books (e.g., PPC, CrPC, PECA).

    Example queries:
    - "Explain Section 489-F about cheque dishonor."
    - "What does Section 302 of the Pakistan Penal Code say?"
    - "Give me details of cybercrime law under PECA."
    - "Is there any section in CrPC related to bail?"

    The tool returns a summarized explanation of the most relevant sections from law books.
    """
    k = 3

    #Retrieve relevant law book content
    retrieved = retriever_law.invoke(query)

    if not retrieved:
        return "No relevant law sections found."

    section_titles = {}

    #Top-k results
    top_docs = retrieved[:k]

    #Format context with metadata
    formatted_context = "\n\n---\n\n".join([
        (
            f"Section {doc.metadata.get('section', 'N/A')}"
            + (
                f" – {doc.metadata.get('title')}"
                if doc.metadata.get('title')
                else (
                    f" – {section_titles.get(doc.metadata.get('section', ''), '')}"
                    if doc.metadata.get('section', '') in section_titles else ""
                )
            )
            + f"\n\nFrom: {doc.metadata.get('book', 'Unknown Book')}\n\n"
            + f"{doc.page_content.strip()}"
        )
        for doc in top_docs
    ])

    #Summarize using LLM
    input_data = {"context": formatted_context, "query": query}
    response = (prompt_law | llm | StrOutputParser()).invoke(input_data)

    return response

#Judment
@tool

def judgment_tool(query: str) -> str:
    """
    Use this tool when the user asks about how Pakistani courts have decided similar cases in the past.
    It retrieves and summarizes legal judgments, precedents, or case law from a database of actual court decisions.

    Example queries:
    - "What did the court decide in cheque dishonor cases?"
    - "Any Supreme Court ruling on Section 489-F?"
    - "Has Section 302 ever been challenged in court?"

    The tool returns a summary of top relevant legal judgments (default: 3) based on the user's query.
    """
    k = 3

    #Retrieve relevant court judgments
    retrieved = retriever_judgment.invoke(query)

    if not retrieved:
        return "No relevant judgments found."

    #Top-k documents only
    top_docs = retrieved[:k]

    #Summarize
    results = []
    for doc in top_docs:
        context = doc.page_content
        input_data = {"context": context}
        response = (prompt_judgment | llm | StrOutputParser()).invoke(input_data)
        results.append(response)

    return "\n\n-------\n\n".join(results)


#Predictor
@tool
def predict_verdict_from_text(text: str) -> str:
    """
    Use this tool when the user provides raw legal case text (such as FIRs, arguments, case summaries, or judgment excerpts)
    and wants to predict what the verdict might be based on the structured information in that text.

    The tool extracts structured fields such as:
    - Petitioner Argument
    - Respondent Argument
    - Offence Sections
    - Case Type
    - Case Summary

    Then it uses a trained LegalBERT-based model to predict the likely court verdict.

    Example queries:
    - "Predict the outcome of this case: [paste full case text]"
    - "Based on this FIR and arguments, what might the court decide?"
    - "Here's a summary and sections applied. Predict the result."

    The tool returns the predicted verdict such as: 'Guilty', 'Not Guilty', 'Dismissed', 'Granted', etc., depending on the case type.
    """
    structured_case = extract_case_chain.invoke({"text": text})
    verdict = predict_legal_verdict(structured_case)
    return verdict

##Agent

In [49]:
tools = [
    judgment_tool,
    lawbook_tool,
    predict_verdict_from_text
]

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)



agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.OPENAI_FUNCTIONS,
    verbose=True,
    memory = memory
)

  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
  agent = initialize_agent(


In [50]:
response = agent.invoke("What are the legal consequences of cyber harassment under Pakistani law?")
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `lawbook_tool` with `{'query': 'cyber harassment'}`


[0m[33;1m[1;3m---

Section 32 – Power to direct security to be taken

From: Code Of Criminal Procedure 1898

Section 32 of the Code of Criminal Procedure 1898 grants the court the power to direct security to be taken from any person for keeping the peace. In cases of cyber harassment, this section may be relevant if the court deems it necessary to ensure the safety of the victim or to prevent further harassment.

---

Section 92 – Act done in good faith for benefit of a person without consent

From: Pakistan Penal Code

Section 92 of the Pakistan Penal Code provides a defense for acts done in good faith for the benefit of a person without their consent. In cases of cyber harassment, this section may be relevant if the accused can prove that their actions were done in good faith and for the benefit of the victim or others.

---

Section 7 – Official communicat

##Gradio

In [None]:
llm = ChatOpenAI(temperature=0)


@tool
def lawbook_tool(query: str) -> str:
    """Use this tool for legal provisions or sections (e.g., 489-F, PECA, PPC)."""
    return get_lawbook_sections(query)

@tool
def judgment_tool(query: str) -> str:
    """Use this tool to fetch past case judgments relevant to a legal issue."""
    return get_past_judgments(query)

@tool
def predict_verdict_from_text(raw_text: str) -> str:
    """Use this tool when a full legal case is given for verdict prediction."""
    return predict_verdict(raw_text)

tools = [judgment_tool, lawbook_tool, predict_verdict_from_text]

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.OPENAI_FUNCTIONS,
    verbose=True,
    memory=memory
)


#Functional Blocks
def predict_verdict(raw_text):
    structured_case = extract_case_chain.invoke({"text": raw_text})
    verdict = predict_legal_verdict(structured_case)
    return verdict

def get_lawbook_sections(query):
    retrieved = retriever_law.invoke(query)
    if not retrieved:
        return "No relevant sections found."

    top_docs = retrieved[:3]
    results = []
    for doc in top_docs:
        context = (
            f"Section {doc.metadata.get('section', 'N/A')} – {doc.metadata.get('title', 'No Title')}\n\n"
            f"From: {doc.metadata.get('book', 'Unknown Book')}\n\n"
            f"{doc.page_content.strip()}"
        )
        prompt_input = {"context": context, "query": query}
        response = (prompt_law | llm | StrOutputParser()).invoke(prompt_input)
        results.append(response)

    return "\n\n---\n\n".join(results)

def get_past_judgments(query):
    retrieved = retriever_judgment.invoke(query)
    if not retrieved:
        return "No relevant judgments found."

    top_docs = retrieved[:3]
    results = []
    for doc in top_docs:
        context = doc.page_content
        response = (prompt_judgment | llm | StrOutputParser()).invoke({"context": context})
        results.append(response)

    return "\n\n---\n\n".join(results)



#Chat Function with Memory
def chat_fn(message, chat_history):
    try:
        chat_history = chat_history or []

#Context from chat history
        history_text = ""
        for user_msg, ai_msg in chat_history:
            history_text += f"User: {user_msg}\nAI: {ai_msg}\n"

        full_input = f"{history_text}User: {message}"


#Manual routing based on keywords
        if any(word in message.lower() for word in ["section", "law", "pecca", "ppc", "crpc"]):
            reply = lawbook_tool.invoke(message)
        else:
            reply = agent.invoke({"input": full_input})["output"]

        chat_history.append((message, reply))
        return chat_history, chat_history

    except Exception as e:
        error_msg = f"Error: {str(e)}"
        chat_history = chat_history or []
        chat_history.append((message, error_msg))
        return chat_history, chat_history


#Gradio UI
with gr.Blocks(title="LegalMind AI: Justice Meets Intelligence") as demo:
    gr.Markdown("LegalMind AI — Justice Meets Intelligence")


#Predict Verdict Tab
    with gr.Tab("Predict Verdict"):
        input_text = gr.Textbox(lines=15, label="Enter Legal Case Text")
        verdict_output = gr.Code(label="Predicted Verdict", language="python")
        verdict_btn = gr.Button("Predict Verdict")
        verdict_btn.click(predict_verdict, inputs=input_text, outputs=verdict_output)


#Law Book Tab
    with gr.Tab("Search Law Books"):
        law_query = gr.Textbox(label="Ask about a legal topic (e.g., cybercrime, 489-F, bail)", lines=3)
        law_output = gr.Code(label="Relevant Legal Sections", language="python")
        law_btn = gr.Button("Search Law Books")
        law_btn.click(get_lawbook_sections, inputs=law_query, outputs=law_output)


#Past Judgments Tab
    with gr.Tab("Past Case Judgments"):
        case_query = gr.Textbox(label="Describe the legal issue (e.g., 'cheque dishonor under 489-F')", lines=3)
        judgment_output = gr.Code(label="Top Past Judgments", language="python")
        judgment_btn = gr.Button("Find Past Judgments")
        judgment_btn.click(get_past_judgments, inputs=case_query, outputs=judgment_output)


    #ChatBot Tab
    with gr.Tab("Legal ChatBot"):
        chatbot = gr.Chatbot(label="LegalMind Chat", height=400)
        msg = gr.Textbox(label="Ask your legal question...")
        state = gr.State([])

        send_btn = gr.Button("Send")
        send_btn.click(chat_fn, inputs=[msg, state], outputs=[chatbot, state])
        msg.submit(chat_fn, inputs=[msg, state], outputs=[chatbot, state])

demo.launch(share=True)