In [None]:
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    Trainer, TrainingArguments, set_seed
)
from datasets import load_dataset
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import f1_score
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import torch

## Use the original variant 1 dataset without normalization 

In [1]:
dataset_name = "victorambrose11/lex_glue_original_TFIDF-SRT"

In [None]:
# ----- Config -----
model_names = {
    "legal_BERT": "nlpaueb/legal-bert-base-uncased",
    "legal_longformer": "nyu-mll/Legal-Longformer-LS",
    "legal_Roberta":"lexlms/legal-roberta-base"
}


learning_rate = 3e-5
epochs = 20
seed = 5

set_seed(seed)

# ----- Load Dataset -----
dataset = load_dataset(dataset_name)
label_list = dataset["train"].features["label"].names
num_labels = len(label_list)

# ----- Token Classification Metric -----
def compute_f1(pred):
    preds = np.argmax(pred.predictions, axis=1)
    labels = pred.label_ids
    return {
        "micro_f1": f1_score(labels, preds, average="micro"),
        "macro_f1": f1_score(labels, preds, average="macro")
    }

results = {}

# ----- Training Function -----
def train_transformer_model(model_key):
    model_checkpoint = model_names[model_key]
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

    # Preprocessing
    def preprocess(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
    
    encoded_dataset = dataset.map(preprocess, batched=True)
    encoded_dataset = encoded_dataset.rename_column("label", "labels")
    encoded_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

    training_args = TrainingArguments(
        output_dir=f"./results_{model_key}",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=1,
        num_train_epochs=epochs,
        learning_rate=learning_rate,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="macro_f1",
        seed=seed,
        logging_dir=f"./logs_{model_key}",
        logging_steps=50,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=encoded_dataset["train"],
        eval_dataset=encoded_dataset["validation"],
        compute_metrics=compute_f1,
        tokenizer=tokenizer,
    )

    trainer.train()

    metrics = trainer.evaluate()
    results[model_key] = {
        "Micro F1": round(metrics["micro_f1"] * 100, 2),
        "Macro F1": round(metrics["macro_f1"] * 100, 2)
    }

# ----- Train LegalBERT and LegalLongformer -----
train_transformer_model("legal_BERT")
train_transformer_model("legal_longformer")
train_transformer_model("legal_Roberta")


# ----- Train TF-IDF + SVM -----
def train_svm():
    print("\n🚀 Training TF-IDF + SVM...")
    train_texts = dataset["train"]["text"]
    train_labels = dataset["train"]["label"]
    val_texts = dataset["validation"]["text"]
    val_labels = dataset["validation"]["label"]

    vectorizer = TfidfVectorizer(max_features=10000)
    X_train = vectorizer.fit_transform(train_texts)
    X_val = vectorizer.transform(val_texts)

    clf = LinearSVC(max_iter=epochs * 100)  # Roughly treating "epoch count" via max_iter
    clf.fit(X_train, train_labels)
    preds = clf.predict(X_val)

    results["tfidf_svm"] = {
        "Micro F1": round(f1_score(val_labels, preds, average="micro") * 100, 2),
        "Macro F1": round(f1_score(val_labels, preds, average="macro") * 100, 2)
    }

train_svm()

# ----- Print Table -----
results_df = pd.DataFrame.from_dict(results, orient="index")
print("\n📋 Results Summary:\n")
print(results_df)

# ----- Plot Chart -----
results_df.plot(kind="bar", figsize=(10, 6), ylim=(0, 100))
plt.title("Model Comparison: Micro and Macro F1 Scores")
plt.ylabel("F1 Score (%)")
plt.xticks(rotation=0)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.show()

## Use the Normalized variant 1 dataset

In [None]:
normalized_dataset_name = "victorambrose11/lex_glue_normalized_TFIDF-SRT"

In [None]:
# ----- Config -----
model_names = {
    "legal_BERT": "nlpaueb/legal-bert-base-uncased",
    "legal_longformer": "nyu-mll/Legal-Longformer-LS",
    "legal_Roberta":"lexlms/legal-roberta-base"
}


learning_rate = 3e-5
epochs = 20
seed = 5

set_seed(seed)

# ----- Load Dataset -----
dataset = load_dataset(normalized_dataset_name)
label_list = dataset["train"].features["label"].names
num_labels = len(label_list)

# ----- Token Classification Metric -----
def compute_f1(pred):
    preds = np.argmax(pred.predictions, axis=1)
    labels = pred.label_ids
    return {
        "micro_f1": f1_score(labels, preds, average="micro"),
        "macro_f1": f1_score(labels, preds, average="macro")
    }

results = {}

# ----- Training Function -----
def train_transformer_model(model_key):
    model_checkpoint = model_names[model_key]
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

    # Preprocessing
    def preprocess(examples):
        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
    
    encoded_dataset = dataset.map(preprocess, batched=True)
    encoded_dataset = encoded_dataset.rename_column("label", "labels")
    encoded_dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

    training_args = TrainingArguments(
        output_dir=f"./results_{model_key}",
        evaluation_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=1,
        num_train_epochs=epochs,
        learning_rate=learning_rate,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model="macro_f1",
        seed=seed,
        logging_dir=f"./logs_{model_key}",
        logging_steps=50,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=encoded_dataset["train"],
        eval_dataset=encoded_dataset["validation"],
        compute_metrics=compute_f1,
        tokenizer=tokenizer,
    )

    trainer.train()

    metrics = trainer.evaluate()
    results[model_key] = {
        "Micro F1": round(metrics["micro_f1"] * 100, 2),
        "Macro F1": round(metrics["macro_f1"] * 100, 2)
    }

# ----- Train LegalBERT and LegalLongformer -----
train_transformer_model("legal_BERT")
train_transformer_model("legal_longformer")
train_transformer_model("legal_Roberta")


# ----- Train TF-IDF + SVM -----
def train_svm():
    print("\n🚀 Training TF-IDF + SVM...")
    train_texts = dataset["train"]["text"]
    train_labels = dataset["train"]["label"]
    val_texts = dataset["validation"]["text"]
    val_labels = dataset["validation"]["label"]

    vectorizer = TfidfVectorizer(max_features=10000)
    X_train = vectorizer.fit_transform(train_texts)
    X_val = vectorizer.transform(val_texts)

    clf = LinearSVC(max_iter=epochs * 100)  # Roughly treating "epoch count" via max_iter
    clf.fit(X_train, train_labels)
    preds = clf.predict(X_val)

    results["tfidf_svm"] = {
        "Micro F1": round(f1_score(val_labels, preds, average="micro") * 100, 2),
        "Macro F1": round(f1_score(val_labels, preds, average="macro") * 100, 2)
    }

train_svm()

# ----- Print Table -----
results_df = pd.DataFrame.from_dict(results, orient="index")
print("\n📋 Results Summary:\n")
print(results_df)

# ----- Plot Chart -----
results_df.plot(kind="bar", figsize=(10, 6), ylim=(0, 100))
plt.title("Model Comparison: Micro and Macro F1 Scores")
plt.ylabel("F1 Score (%)")
plt.xticks(rotation=0)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.show()