In [1]:
!pip install evaluate
!pip install datasets
!pip install transformers
!pip install reportlab
!pip install datetime
!pip install flask
!pip install flask-cors





In [2]:
# Import libraries
import json
import os
from datasets import load_dataset
from evaluate import load as load_metric
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    pipeline,
)
from datetime import datetime
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas

In [3]:

# ---------------------------
# ✅ Step 1: Load & Clean Dataset
# ---------------------------

print("📘 Loading cleaned dataset...")

# Load the dataset in JSONL format
dataset = load_dataset("json", data_files="data/legal_pil_dataset_fixed.jsonl")


📘 Loading cleaned dataset...


Generating train split: 0 examples [00:00, ? examples/s]

In [4]:

# ---------------------------
# 🔧 Step 2: Load GPT-2 Model & Tokenizer
# ---------------------------

model_name = "gpt2"
print("🚀 Loading GPT-2 model...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# ✅ Fix padding issue
tokenizer.pad_token = tokenizer.eos_token


🚀 Loading GPT-2 model...


In [5]:
# ---------------------------
# 🔥 Step 3: Tokenize & Setup Training Data
# ---------------------------

def tokenize_function(examples):
    """Tokenize and set input labels for GPT-2 to compute loss."""
    tokenized_output = tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=1024,
    )
    # ✅ Set labels for loss calculation
    tokenized_output["labels"] = tokenized_output["input_ids"].copy()
    return tokenized_output

print("✂️ Tokenizing dataset...")
tokenized_dataset = dataset.map(tokenize_function, batched=True)

✂️ Tokenizing dataset...


Map:   0%|          | 0/1477 [00:00<?, ? examples/s]

In [None]:
"""# ---------------------------
# 🔥 Step 5: Train GPT-2
# ---------------------------

print("🔥 Starting GPT-2 fine-tuning...")
training_args = TrainingArguments(
    output_dir="./gpt2_pil_trained",
    evaluation_strategy="steps",
    num_train_epochs=2,  # Reduce if needed
    per_device_train_batch_size=1,  # Reduce from 2 to 1
    save_total_limit=4,  # Reduce saved checkpoints
    save_steps=1000,  # Reduce frequency of saving
    logging_dir="./logs",
    logging_steps=500,
    fp16=True  # Enable mixed precision (only for GPUs)
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["train"],
)

trainer.train()

# ✅ Save the fine-tuned model
model.save_pretrained("./gpt2_pil_trained")
tokenizer.save_pretrained("./gpt2_pil_trained")

print("✅ Fine-tuning complete — model saved!")
"""


🔥 Starting GPT-2 fine-tuning...


  0%|          | 3/2954 [05:34<91:29:29, 111.61s/it]
 17%|█▋        | 500/2954 [1:05:24<5:18:16,  7.78s/it]
 17%|█▋        | 500/2954 [1:05:24<5:18:16,  7.78s/it]

{'loss': 0.3172, 'grad_norm': 0.7413356304168701, 'learning_rate': 4.1536899119837506e-05, 'epoch': 0.34}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A                                           
                                                      
 17%|█▋        | 500/2954 [1:46:03<5:18:16,  7.78s/it]
[A

{'eval_loss': 0.27264729142189026, 'eval_runtime': 2439.4708, 'eval_samples_per_second': 0.605, 'eval_steps_per_second': 0.076, 'epoch': 0.34}


 34%|███▍      | 1000/2954 [2:51:46<4:09:51,  7.67s/it]  
 34%|███▍      | 1000/2954 [2:51:46<4:09:51,  7.67s/it]

{'loss': 0.289, 'grad_norm': 0.5477713942527771, 'learning_rate': 3.3073798239675016e-05, 'epoch': 0.68}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A                                           
                                                       
 34%|███▍      | 1000/2954 [3:32:10<4:09:51,  7.67s/it]
[A

{'eval_loss': 0.2496897429227829, 'eval_runtime': 2424.5825, 'eval_samples_per_second': 0.609, 'eval_steps_per_second': 0.076, 'epoch': 0.68}


 51%|█████     | 1500/2954 [4:36:42<3:10:00,  7.84s/it]   
 51%|█████     | 1500/2954 [4:36:42<3:10:00,  7.84s/it]

{'loss': 0.2932, 'grad_norm': 0.6375637054443359, 'learning_rate': 2.4610697359512526e-05, 'epoch': 1.02}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A                                           
                                                       
 51%|█████     | 1500/2954 [5:16:42<3:10:00,  7.84s/it]
[A

{'eval_loss': 0.2321518212556839, 'eval_runtime': 2399.9982, 'eval_samples_per_second': 0.615, 'eval_steps_per_second': 0.077, 'epoch': 1.02}


 68%|██████▊   | 2000/2954 [6:21:31<2:04:04,  7.80s/it]   
 68%|██████▊   | 2000/2954 [6:21:31<2:04:04,  7.80s/it]

{'loss': 0.2305, 'grad_norm': 0.7528636455535889, 'learning_rate': 1.6147596479350036e-05, 'epoch': 1.35}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A                                           
                                                       
 68%|██████▊   | 2000/2954 [7:01:04<2:04:04,  7.80s/it]
[A

{'eval_loss': 0.22218036651611328, 'eval_runtime': 2373.3754, 'eval_samples_per_second': 0.622, 'eval_steps_per_second': 0.078, 'epoch': 1.35}


 85%|████████▍ | 2500/2954 [8:04:36<57:58,  7.66s/it]     
 85%|████████▍ | 2500/2954 [8:04:36<57:58,  7.66s/it]

{'loss': 0.2428, 'grad_norm': 2.480236768722534, 'learning_rate': 7.684495599187543e-06, 'epoch': 1.69}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

[A[A                                           
                                                     
 85%|████████▍ | 2500/2954 [8:45:20<57:58,  7.66s/it]
[A

{'eval_loss': 0.21552439033985138, 'eval_runtime': 2444.5586, 'eval_samples_per_second': 0.604, 'eval_steps_per_second': 0.076, 'epoch': 1.69}


100%|██████████| 2954/2954 [9:51:18<00:00,  9.42s/it]    
100%|██████████| 2954/2954 [9:51:20<00:00, 12.01s/it]


{'train_runtime': 35480.1174, 'train_samples_per_second': 0.083, 'train_steps_per_second': 0.083, 'train_loss': 0.2722556695957481, 'epoch': 2.0}
✅ Fine-tuning complete — model saved!


In [8]:


gpt2_pil_generator = pipeline("text-generation", model="./gpt2_pil_trained")


Device set to use mps:0


In [9]:

def generate_full_pil(subject, petitioner, respondent):
    """Generate the entire PIL document from scratch using GPT-2."""
    prompt = build_smart_prompt(subject)

    result = gpt2_pil_generator(
        prompt,
        max_length=1024,
        temperature=0.4,
        num_return_sequences=1,
        truncation=True,
    )

    return result[0]["generated_text"].strip()


In [13]:



def export_pil(pil_text, filename="Generated_PIL_Document"):
    """Export the PIL to text and PDF files."""
    # Save as TXT
    with open(f"{filename}.txt", "w", encoding="utf-8") as file:
        file.write(pil_text)
    print(f"✅ PIL saved as {filename}.txt")

    # Save as PDF
    pdf_file = f"{filename}.pdf"
    c = canvas.Canvas(pdf_file, pagesize=A4)
    c.setFont("Helvetica", 12)
    for i, line in enumerate(pil_text.split("\n")):
        c.drawString(40, 800 - (i * 20), line)
    c.save()
    print(f"✅ PIL saved as {pdf_file}")


In [None]:
import json
from datetime import datetime
from transformers import pipeline
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from flask import Flask, request, jsonify
from flask_cors import CORS
# 🚀 Load GPT-2 model
print("🚀 Loading GPT-2 model...")
gpt2_pil_generator = pipeline("text-generation", model="https://drive.google.com/drive/folders/1Z4A_hmJndCJpVxNjSud8ZwR2NrwIqcdG?usp=sharing")


# 🎯 Improved GPT-2 generation with retries and stronger prompts
def generate_gpt_section(prompt, section_name=""):
    """Generate sections using GPT-2 with retries and better prompt control."""
    try:
        # First attempt at generation
        result = gpt2_pil_generator(
            prompt,
            max_length=512,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.2,
            num_return_sequences=1,
            do_sample=True,
            truncation=True,
        )
        generated_text = result[0]["generated_text"].replace(prompt, "").strip()

        # Retry with a stronger prompt if generation fails or gives junk
        if not generated_text or len(generated_text) < 20:
            print(f"⚠️ {section_name} generation failed — retrying with stronger prompt...")
            refined_prompt = (
                f"{prompt}\n\nEnsure the output is legally accurate, clear, and enforceable."
            )
            refined_result = gpt2_pil_generator(
                refined_prompt,
                max_length=512,
                temperature=0.7,
                top_p=0.9,
                repetition_penalty=1.2,
                num_return_sequences=1,
                do_sample=True,
                truncation=True,
            )
            generated_text = refined_result[0]["generated_text"].replace(refined_prompt, "").strip()

        # Final fallback if both attempts fail
        if not generated_text or len(generated_text) < 20:
            return f"⚠️ Unable to generate {section_name}. Please consult a legal expert."

        return generated_text

    except Exception as e:
        print(f"❌ Error generating {section_name}: {e}")
        return f"⚠️ Error generating {section_name}. Please check the input."


# 🛠️ Assemble the PIL Document with enhanced structure
def generate_pil(petitioner, respondent, subject, summary):
    """Create the full PIL document with powerful Legal Grounds, Prayer, and Court Procedure generation."""

    print("⚡ Generating Legal Grounds...")
    legal_grounds_prompt = (
        f"Draft strong, persuasive legal grounds for a Public Interest Litigation (PIL) on '{subject}', "
        f"citing specific constitutional articles, environmental laws (e.g., Environment Protection Act 1986), "
        f"legal principles (e.g., Precautionary Principle, Polluter Pays Principle), and landmark cases (e.g., M.C. Mehta v. Union of India 1987)."
        f"Ensure the argument is solid, legally backed, and supports the petitioner's cause."
    )
    legal_grounds = generate_gpt_section(legal_grounds_prompt, "Legal Grounds")

    print("⚡ Generating Prayer...")
    prayer_prompt = (
        f"Draft a clear, practical, and enforceable prayer (request) for a PIL on '{subject}', "
        f"including specific directives the court can issue (e.g., impose fines, enforce environmental audits, "
        f"order cleanup operations, penalize non-compliant industries, ensure public data transparency)."
    )
    prayer = generate_gpt_section(prayer_prompt, "Prayer")

    print("⚡ Generating Court Procedure...")
    court_procedure_prompt = (
        f"Draft a legally accurate and step-by-step court procedure for filing a Public Interest Litigation (PIL) "
        f"in the Supreme Court of India on '{subject}', ensuring it includes jurisdiction, notice, evidence submission, and requests for an expedited hearing."
    )
    court_procedure = generate_gpt_section(court_procedure_prompt, "Court Procedure")

    # ✅ Fallback version of Court Procedure in case GPT fails
    if "⚠️" in court_procedure:
        court_procedure = (
            "1. File the PIL under Article 32 of the Constitution for Supreme Court jurisdiction.\n"
            "2. Serve notice to the Respondent and the Attorney General of India.\n"
            "3. Attach supporting evidence such as scientific data, expert affidavits, and media reports.\n"
            "4. Request expedited hearing citing urgent public interest."
        )

    # ✅ Assemble the final PIL document
    pil_text = f"""
IN THE HON'BLE SUPREME COURT OF INDIA

PUBLIC INTEREST LITIGATION (PIL)

Petitioner: {petitioner}
Respondent: {respondent}

Subject: {subject}

Respected Lordships,

{summary}

Legal Grounds:
{legal_grounds}

Prayer:
{prayer}

Court Procedure:
{court_procedure}

Date: {datetime.now().strftime("%A, %d %B %Y")}

Yours sincerely,
{petitioner}
    """

    return pil_text


# 📄 Export PIL to TXT and PDF
def export_pil(pil_text, filename="PIL_Document"):
    """Export the PIL to text and PDF files."""
    # Save as TXT
    with open(f"{filename}.txt", "w", encoding="utf-8") as file:
        file.write(pil_text)
    print(f"✅ PIL saved as {filename}.txt")

    # Save as PDF
    pdf_file = f"{filename}.pdf"
    c = canvas.Canvas(pdf_file, pagesize=A4)
    c.setFont("Helvetica", 12)
    for i, line in enumerate(pil_text.split("\n")):
        c.drawString(40, 800 - (i * 20), line)
    c.save()
    print(f"✅ PIL saved as {pdf_file}")

app = Flask(__name__)
CORS(app)  # Enable CORS for frontend communication

@app.route('/run_pil_generator', methods=['POST'])
    # 🎯 Interactive CLI for PIL Generation
def run_pil_generator():
    data= request.json
    subject = data.get('subject')
    petitioner = data.get('petitioner')
    respondent = data.get('respondent')
    summary = data.get('summary')
    """Run the PIL Generator interactively."""
    print("🎉 Welcome to the PIL Generator!")


    # Generate the PIL document
    print("⚡ Generating the complete PIL document...")
    generated_pil = generate_pil(petitioner, respondent, subject, summary)
    return jsonify({'pil-text':generated_pil})
# 🔥 Run the Generator
if __name__ == "__main__":
    app.run()


🚀 Loading GPT-2 model...


Device set to use mps:0


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [16/Mar/2025 17:49:13] "OPTIONS /run_pil_generator HTTP/1.1" 200 -


🎉 Welcome to the PIL Generator!
⚡ Generating the complete PIL document...
⚡ Generating Legal Grounds...
⚠️ Legal Grounds generation failed — retrying with stronger prompt...
⚡ Generating Prayer...
⚠️ Prayer generation failed — retrying with stronger prompt...
⚡ Generating Court Procedure...
⚠️ Court Procedure generation failed — retrying with stronger prompt...


127.0.0.1 - - [16/Mar/2025 17:49:14] "POST /run_pil_generator HTTP/1.1" 200 -
127.0.0.1 - - [16/Mar/2025 17:52:45] "OPTIONS /run_pil_generator HTTP/1.1" 200 -


🎉 Welcome to the PIL Generator!
⚡ Generating the complete PIL document...
⚡ Generating Legal Grounds...
⚠️ Legal Grounds generation failed — retrying with stronger prompt...


127.0.0.1 - - [16/Mar/2025 17:52:46] "POST /run_pil_generator HTTP/1.1" 200 -


⚡ Generating Prayer...
⚠️ Prayer generation failed — retrying with stronger prompt...
⚡ Generating Court Procedure...
⚠️ Court Procedure generation failed — retrying with stronger prompt...


In [2]:
pip install transformers[torch]

zsh:1: no matches found: transformers[torch]
Note: you may need to restart the kernel to use updated packages.
