In [3]:
!pip install nb-clean
!nb-clean clean Legal_text_summarizer.ipynb





In [None]:

import torch
import pandas as pd
import evaluate
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    Trainer,
    TrainingArguments,
    DataCollatorForSeq2Seq
)
from tqdm import tqdm
import os

# Disable wandb
os.environ["WANDB_DISABLED"] = "true"

In [None]:

dataset = load_dataset("billsum", split={'train': 'train', 'test': 'test'})
print(dataset)

train_dataset = dataset["train"].select(range(500))
test_dataset = dataset["test"].select(range(200))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001.parquet:   0%|          | 0.00/91.8M [00:00<?, ?B/s]

data/test-00000-of-00001.parquet:   0%|          | 0.00/15.8M [00:00<?, ?B/s]

data/ca_test-00000-of-00001.parquet:   0%|          | 0.00/6.12M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/18949 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3269 [00:00<?, ? examples/s]

Generating ca_test split:   0%|          | 0/1237 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'summary', 'title'],
        num_rows: 18949
    })
    test: Dataset({
        features: ['text', 'summary', 'title'],
        num_rows: 3269
    })
})


In [None]:

def train_and_evaluate(model_name):
    print(f"\n Training and Evaluating: {model_name}\n")
    data = load_dataset("billsum")
    train_dataset = data["train"]
    test_dataset = data["test"]

    # Load model & tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

    # Preprocess
    def preprocess_function(examples):
        model_inputs = tokenizer(examples["text"], max_length=512, truncation=True)
        labels = tokenizer(examples["summary"], max_length=128, truncation=True)
        model_inputs["labels"] = labels["input_ids"]
        return model_inputs

    tokenized_train = train_dataset.map(preprocess_function, batched=True)
    tokenized_test = test_dataset.map(preprocess_function, batched=True)

    # Training setup
    args = TrainingArguments(
        output_dir=f"./results/{model_name}",
        per_device_train_batch_size=2, # TRAINING LIGHT
        per_device_eval_batch_size=2,
        num_train_epochs=1,
        save_strategy="no",
        logging_dir="./logs",
        report_to="none"
    )

    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model) #DataCollatorForSeq2Seq handles padding of sequences to equal lengths for batching.

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_test,
        tokenizer=tokenizer,
        data_collator=data_collator
    )

    # Train
    trainer.train()

    return model, tokenizer, test_dataset


ROUGE → measures overlap between generated and reference summaries.

BLEU → measures n-gram precision (from translation tasks).

BERTScore → uses embeddings (BERT) to measure semantic similarity.

In [None]:

rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")
bertscore = evaluate.load("bertscore")

def evaluate_model(model, tokenizer, dataset, num_samples=100):
    references, predictions = [], []

    for sample in tqdm(dataset.select(range(num_samples))):
        input_text = sample["text"]
        target_text = sample["summary"]

        inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True).to(model.device)
        with torch.no_grad():
            output_ids = model.generate(**inputs, max_length=256)
        pred = tokenizer.decode(output_ids[0], skip_special_tokens=True)

        predictions.append(pred)
        references.append(target_text)

    # Compute metrics
    rouge_result = rouge.compute(predictions=predictions, references=references)
    bleu_result = bleu.compute(predictions=predictions, references=references)
    bert_result = bertscore.compute(predictions=predictions, references=references, lang="en")

    return {
        "ROUGE-1": round(rouge_result["rouge1"], 4),
        "ROUGE-2": round(rouge_result["rouge2"], 4),
        "ROUGE-L": round(rouge_result["rougeL"], 4),
        "BLEU": round(bleu_result["bleu"], 4),
        "BERTScore (F1)": round(sum(bert_result["f1"]) / len(bert_result["f1"]), 4)
    }


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:

model_t5, tokenizer_t5, test_t5 = train_and_evaluate("t5-base")


 Training and Evaluating: t5-base



config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/18949 [00:00<?, ? examples/s]

Map:   0%|          | 0/3269 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss
500,2.0842
1000,1.8529
1500,1.7851
2000,1.7793
2500,1.7247
3000,1.727
3500,1.7193
4000,1.6954
4500,1.6898
5000,1.7111


In [None]:
model_flan, tokenizer_flan, test_flan = train_and_evaluate("google/flan-t5-small")



 Training and Evaluating: google/flan-t5-small



tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/18949 [00:00<?, ? examples/s]

Map:   0%|          | 0/3269 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss
500,2.404
1000,2.2189
1500,2.1418
2000,2.131
2500,2.0721
3000,2.0725
3500,2.0621
4000,2.0492
4500,2.0274
5000,2.0488


In [None]:
eval_t5 = evaluate_model(model_t5, tokenizer_t5, test_t5)
eval_flan = evaluate_model(model_flan, tokenizer_flan, test_flan)

  0%|          | 0/100 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|██████████| 100/100 [04:27<00:00,  2.68s/it]


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 100/100 [02:40<00:00,  1.61s/it]


In [None]:
df_eval = pd.DataFrame([eval_t5, eval_flan], index=["t5-base", "flan-t5-small"])
print("\n Evaluation Metrics Comparison")
print(df_eval)



 Evaluation Metrics Comparison
               ROUGE-1  ROUGE-2  ROUGE-L    BLEU  BERTScore (F1)
t5-base         0.4444   0.2628   0.3408  0.1247          0.8779
flan-t5-small   0.4064   0.2296   0.3197  0.0968          0.8697


In [None]:
import nltk
nltk.download('punkt')

from nltk.tokenize import sent_tokenize
from transformers import pipeline
summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", device=0)  # use GPU if available

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
Device set to use cuda:0


In [None]:
long_text = """
The Plaintiff, a private real estate owner, initiated a lawsuit against the Defendant, a major construction company, seeking a declaration of property ownership rights, a permanent injunction against unauthorized construction, and compensation for damages.
The Plaintiff alleged that the Defendant had commenced the construction of a multi-story commercial and residential complex on a portion of land that had been historically and exclusively owned by the Plaintiff for residential purposes since 1978.
The Plaintiff claimed that despite repeated notices, formal requests, and documented communications demanding cessation of construction, the Defendant continued with work, causing structural damage to existing residential buildings, obstructing access to adjoining properties, and creating environmental hazards, including improper waste disposal, soil erosion, disruption of drainage systems, and interference with local wildlife.
The Plaintiff further asserted that the encroachment and construction had significantly depreciated the value of the property and interfered with access, thereby constituting a continuing nuisance.

In the Defendant's written statement, it denied the allegations and contended that the Plaintiff's claims were legally unsubstantiated for the contested area.
The Defendant argued that there had been prior agreements with former landowners, granting rights to develop the property, and that all construction activities were undertaken in compliance with municipal permits, zoning laws, building codes, and environmental regulations.
It further claimed that any inconvenience or damage to the Plaintiff was incidental and not actionable under the current property and tort laws.

During pre-trial proceedings, both parties submitted extensive documentary evidence.
The Plaintiff presented original sale deeds, cadastral maps, surveyor reports, municipal notices, photographs, tax receipts, utility bills, and affidavits from historical occupants to establish continuous possession, ownership rights, and the scope of the disputed property.
The Defendant submitted municipal approvals, environmental clearance certificates, structural engineering reports, architectural plans, notarized agreements with previous landowners, and affidavits from municipal officers to demonstrate lawful entitlement to develop the site.

The Court examined all submissions, focusing on the interpretation of property boundaries, the validity of prior agreements, compliance with statutory requirements regarding construction and encroachment, and environmental safeguards.
Witness testimonies were heard from licensed surveyors, municipal officials, civil engineers, environmental experts, and long-term residents.
During cross-examination, discrepancies in some historical records, maps, and surveys were revealed, but the overarching evidence established that the Plaintiff had continuous possession and ownership over the disputed land.
The Defendant's documentation confirmed procedural compliance but did not negate the Plaintiff's legal rights to ownership and injunctive relief.

Several interim applications were filed during litigation.
The Plaintiff sought temporary restraining orders to halt ongoing construction, which were initially granted by the trial court but later modified on appeal to allow limited continuation of work pending final judgment.
The Defendant requested dismissal of the case and clarification on the scope of property rights claimed by the Plaintiff.
The Court also had to manage multiple interlocutory applications, including requests for expert inspections, appointment of neutral surveyors, and mediation attempts.

The Court considered complex legal doctrines, including adverse possession, bona fide improvements, easements, equitable relief, nuisance law, statutory compliance, and the interaction between municipal permits and private property rights.
Environmental regulations were critically examined, with expert evidence on drainage impacts, soil stability, vegetation removal, tree cutting, and local ecosystem disruption.
The Plaintiff argued that unmitigated construction caused irreparable environmental, financial, and social harm, while the Defendant emphasized procedural compliance, environmental mitigation measures, and absence of intentional wrongdoing.

After thorough deliberation, the Court analyzed historical ownership, prior agreements, statutory provisions, municipal approvals, environmental reports, and expert testimony.
The judgment highlighted the need to balance the Plaintiff's property rights with the Defendant's claims of entitlement, municipal oversight, environmental safeguards, and equitable considerations for both parties.
The Court emphasized that failure to promptly address unauthorized construction could set a precedent encouraging encroachment disputes and undermine property rights enforcement.

The Court ruled in favor of the Plaintiff, granting a permanent injunction restraining further construction on the disputed area, ordering partial removal of structures erected without consent, and directing the Defendant to compensate for damages incurred.
The Court also outlined principles for resolving similar boundary disputes, emphasizing accurate land surveys, timely registration of property rights, adherence to statutory procedures, environmental compliance, and equitable relief.
Both parties were given the opportunity to present final submissions, after which the Court reserved detailed judgment covering all factual, procedural, and legal contentions.

Additionally, the Court examined implications on community welfare, neighboring property rights, public utilities, and municipal zoning plans.
Special attention was given to historical cadastral records, prior property transactions, and continuity of possession.
The judgment underscored that municipal approvals cannot override private property rights and that environmental compliance must be balanced with lawful land ownership claims.
The Court highlighted that any construction, even if procedurally approved, that encroaches on private property or causes nuisance, can be subject to permanent injunctions.

Finally, the Court directed both parties to maintain records of compliance, implement corrective actions for any ongoing environmental damage, and participate in periodic inspections by municipal authorities and court-appointed experts.
The ruling serves as a comprehensive precedent in matters involving private land disputes, construction encroachments, municipal permits, environmental obligations, and equitable remedies, reflecting the interplay between statutory law, common law principles, and procedural safeguards.
"""


In [None]:
summarizer = pipeline(
    "summarization",
    model="t5-base",
    tokenizer="t5-base",
    device=0
)


Device set to use cuda:0


In [None]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')

from nltk.tokenize import sent_tokenize
from transformers import pipeline

def chunk_text(text, max_chars=1000):
    sentences = sent_tokenize(text)
    chunks = []
    current_chunk = ""
    for sent in sentences:
        if len(current_chunk) + len(sent) <= max_chars:
            current_chunk += " " + sent
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sent
    if current_chunk:
        chunks.append(current_chunk.strip())
    return chunks

chunks = chunk_text(long_text, max_chars=1000)

chunk_summaries = []
for i, chunk in enumerate(chunks):
    print(f" Summarizing Chunk {i+1}/{len(chunks)} ")
    summary = summarizer(chunk, max_new_tokens=300)[0]['summary_text']
    chunk_summaries.append(summary)
combined_summary = " ".join(chunk_summaries)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
Your max_length is set to 200, but your input_length is only 178. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=89)


 Summarizing Chunk 1/8 


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Your max_length is set to 200, but your input_length is only 173. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=86)


 Summarizing Chunk 2/8 


Your max_length is set to 200, but your input_length is only 186. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=93)


 Summarizing Chunk 3/8 


Your max_length is set to 200, but your input_length is only 183. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=91)


 Summarizing Chunk 4/8 


Your max_length is set to 200, but your input_length is only 150. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=75)


 Summarizing Chunk 5/8 


Your max_length is set to 200, but your input_length is only 171. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=85)


 Summarizing Chunk 6/8 


Your max_length is set to 200, but your input_length is only 146. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=73)


 Summarizing Chunk 7/8 


Your max_length is set to 200, but your input_length is only 93. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=46)


 Summarizing Chunk 8/8 


In [None]:
sentences = sent_tokenize(combined_summary)
bullet_points = [f"• {s.strip()}" for s in sentences]

print("\n Bullet Point Summary:\n")
for bp in bullet_points:
    print(bp)



 Bullet Point Summary:

• plaintiff seeks declaration of property ownership rights, injunction against unauthorized construction .
• the Defendant continued with work, causing damage to existing residential buildings, he claims .
• the Defendant contends that the Plaintiff's claims are legally unsubstantiated .
• the encroachment and construction had significantly depreciated the value of the property, it says .
• both parties submitted extensive documentary evidence .
• the plaintiff presented original sale deeds, cadastral maps, surveyor reports, municipal notices .
• the Defendant submitted municipal approvals, environmental clearance certificates .
• the Defendant's documentation confirmed procedural compliance .
• the Plaintiff sought temporary restraining orders to halt ongoing construction .
• environmental regulations were critically examined .
• the plaintiff argued that unmitigated construction caused irreparable harm .
• after thorough deliberation, the Court analyzed histo

In [None]:
summary_output = summarizer(long_text, max_new_tokens=1000)[0]['summary_text']


In [None]:

!pip install bert-score -q
from bert_score import score

def bert_score_similarity(original_text, summary_text):
    P, R, F1 = score([summary_text], [original_text], lang="en", verbose=False)
    return F1.mean().item() * 100

bert_similarity = bert_score_similarity(long_text, summary_output)
print(f" BERTScore (F1 Semantic Similarity): {bert_similarity:.2f}%")


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


 BERTScore (F1 Semantic Similarity): 84.90%
