 Install Dependencies

In [3]:
!pip install transformers datasets evaluate peft huggingface_hub accelerate



Imports and Initialization

In [1]:
import torch
import evaluate
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import LoraConfig, TaskType, get_peft_model, PeftModel
from huggingface_hub import HfApi
from huggingface_hub.utils import RepositoryNotFoundError

Setup and Parameters

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))

# Model and dataset IDs
model_name = "google/mt5-small"
local_dataset_id = "benitoals/my-txt-dataset"
hf_dataset_id = "CShorten/ML-ArXiv-Papers"

local_model_repo_id = "benitoals/my-lora"
hf_model_repo_id = "benitoals/my-lora-hf"
combined_model_repo_id = "benitoals/my-lora-local-combined"

tokenizer = AutoTokenizer.from_pretrained(model_name)
rouge = evaluate.load("rouge")

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Preprocessing Function

In [4]:
def preprocess_function(examples, tokenizer, body_key, summary_key, max_input_len=512, max_target_len=256):
    inputs = examples[body_key]
    targets = examples[summary_key]

    model_inputs = tokenizer(inputs, max_length=max_input_len, truncation=True)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=max_target_len, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

 Load and Prepare Datasets

In [5]:
local_data = load_dataset(local_dataset_id)

# Filtering short summaries
local_data = local_data.filter(lambda x: len(x["summary"].split()) >= 50)
print("Local dataset loaded:", local_data)

hf_data = load_dataset(hf_dataset_id, split="train").shuffle(seed=42).select(range(1000))
print("Hugging Face dataset loaded:", hf_data)

Filter:   0%|          | 0/51 [00:00<?, ? examples/s]

Filter:   0%|          | 0/11 [00:00<?, ? examples/s]

Filter:   0%|          | 0/11 [00:00<?, ? examples/s]

Local dataset loaded: DatasetDict({
    train: Dataset({
        features: ['filename', 'summary', 'body'],
        num_rows: 50
    })
    validation: Dataset({
        features: ['filename', 'summary', 'body'],
        num_rows: 11
    })
    test: Dataset({
        features: ['filename', 'summary', 'body'],
        num_rows: 11
    })
})
Hugging Face dataset loaded: Dataset({
    features: ['Unnamed: 0.1', 'Unnamed: 0', 'title', 'abstract'],
    num_rows: 1000
})


LoRA Training Function

In [6]:
def train_lora(base_model, dataset, tokenizer, model_repo_id, 
               body_key="body", summary_key="summary", 
               num_epochs=4, learning_rate=1e-4, skip_if_hf_exists=True,
               freeze_base=False):
    """Fine-tunes a model using LoRA, checks HF repo to skip training if already exists,
       and optionally freezes the base model parameters (non-adapter) before training."""
    device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))

    # Check if model exists on Hugging Face
    api = HfApi()
    try:
        info = api.repo_info(model_repo_id, repo_type="model")
        print(f"\n[Skipping Training?] {model_repo_id} found on HF. Checking for adapter config...")
        
        # This will try to load the adapter config and weights.
        loaded_lora_model = PeftModel.from_pretrained(base_model, model_repo_id)
        print("\n=== LoRA Model Successfully Loaded ===")
        # print(loaded_lora_model)  # Debug info
        print(f"Found LoRA adapter in {model_repo_id}, skipping training.")
        
        loaded_lora_model.to(device)
        return loaded_lora_model
    except (RepositoryNotFoundError, ValueError, OSError) as e:
        print(f"HF repo {model_repo_id} found but no valid LoRA adapter inside (or missing adapter_config.json).")
        print(f"Proceeding with training. Error was: {e}")

    base_model.to(device)

    peft_config = LoraConfig(
        task_type=TaskType.SEQ_2_SEQ_LM,
        r=1, lora_alpha=16, lora_dropout=0.2,
        target_modules=["q", "v"]
    )
    lora_model = get_peft_model(base_model, peft_config).to(device)

    # If freeze_base is True, freeze all parameters except those related to LoRA
    if freeze_base:
        for name, param in lora_model.named_parameters():
            if "lora_" not in name:
                param.requires_grad = False
        print("Base model parameters frozen. Only LoRA adapter parameters will be updated.")

    # Handle dataset splits: if dataset is not a dict, create splits
    if isinstance(dataset, dict):
        train_ds = dataset["train"]
        eval_ds = dataset["validation"]
        test_ds = dataset["test"]
    else:
        splits = dataset.train_test_split(test_size=0.2)
        eval_test = splits["test"].train_test_split(test_size=0.5)
        train_ds = splits["train"]
        eval_ds = eval_test["train"]
        test_ds = eval_test["test"]

    # Tokenize each split separately using their own column names
    def tokenize_dataset(ds):
        return ds.map(lambda x: preprocess_function(x, tokenizer, body_key, summary_key),
                      batched=True,
                      remove_columns=ds.column_names)
    
    tokenized_train = tokenize_dataset(train_ds)
    tokenized_eval = tokenize_dataset(eval_ds)
    tokenized_test = tokenize_dataset(test_ds)
    
    # Prepare a dictionary for consistency
    tokenized_ds = {"train": tokenized_train, "validation": tokenized_eval, "test": tokenized_test}

    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=lora_model, label_pad_token_id=-100)

    def compute_metrics(eval_preds):
        preds, labels = eval_preds
        if isinstance(preds, tuple):
            preds = preds[0]
        # Fix shape issues
        if preds.ndim == 3 and preds.shape[1] == 1:
            preds = np.squeeze(preds, axis=1)
        if labels.ndim == 3 and labels.shape[1] == 1:
            labels = np.squeeze(labels, axis=1)
        preds = np.where(preds != -100, preds, tokenizer.pad_token_id)
        labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
        preds = np.clip(preds, 0, tokenizer.vocab_size - 1)
        labels = np.clip(labels, 0, tokenizer.vocab_size - 1)
        decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
        decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
        result = evaluate.load("rouge").compute(predictions=decoded_preds, references=decoded_labels)
        if isinstance(result["rouge1"], float):
            return {k: v * 100 for k, v in result.items()}
        return {k: v.mid.fmeasure * 100 for k, v in result.items()}

    training_args = Seq2SeqTrainingArguments(
        output_dir="model_lora_temp",
        evaluation_strategy="steps",
        save_strategy="steps",
        load_best_model_at_end=True,
        learning_rate=learning_rate,
        max_grad_norm=0.1,
        eval_steps=5,
        save_steps=5,
        num_train_epochs=num_epochs,
        predict_with_generate=True,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        weight_decay=0.01,
        logging_steps=5,
        push_to_hub=True,
        hub_model_id=model_repo_id,
        hub_strategy="end",
        report_to=["tensorboard"]
    )

    trainer = CustomSeq2SeqTrainer(
        model=lora_model,
        args=training_args,
        train_dataset=tokenized_ds["train"],
        eval_dataset=tokenized_ds["validation"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    print(f"\n=== Start LoRA Fine-tuning on {model_repo_id} ===")
    trainer.train()
    print("=== LoRA Fine-tuning complete ===")

    # Save LoRA weights locally and push to Hugging Face
    trainer.save_model()
    lora_model.save_pretrained(training_args.output_dir)

    final_eval = trainer.evaluate(tokenized_ds["test"])
    print("Trainer Evaluate (test set):", final_eval)

    return lora_model


ROUGE Score

In [8]:
def get_rouge_scores(model, dataset, tokenizer, device, body_key="body", summary_key="summary", max_length=128, num_beams=3):
    """Evaluate a model by generating summaries and comparing with reference summaries.
       Uses bad_words_ids to prevent generation of <extra_id_0>."""
    debug = True

    rouge = evaluate.load("rouge")
    preds, refs = [], []
    
    # Get the token id for <extra_id_0>
    bad_token_id = tokenizer.convert_tokens_to_ids("<extra_id_0>")
    bad_words = [[bad_token_id]]

    for i, ex in enumerate(dataset):
        body_text = ex[body_key]
        ref_text  = ex[summary_key]
        if not body_text.strip():
            preds.append("")
            refs.append(ref_text)
            continue

        input_ids = tokenizer.encode("summarize: " + body_text, return_tensors="pt", truncation=True, max_length=512).to(device)
        outputs = model.generate(
            input_ids=input_ids,
            max_length=max_length,
            num_beams=num_beams,
            early_stopping=True,
            no_repeat_ngram_size=3,  # Avoid repeated words
            do_sample=True,  # Use deterministic generation
            temperature=0.7,  # Ensure stable output
            top_k=50,  # Prevent degenerate outputs
            top_p=0.95, # Ensure diverse summaries
            bad_words_ids=bad_words 
        )
        pred_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        preds.append(pred_text)
        refs.append(ref_text)

        # Print a few debug examples
        if debug and i < 3:
            print(f"\n--- Debug Example {i} ---")
            print("Input (first 300 chars):", body_text[:300])
            print("Predicted Summary:", pred_text)
            print("Reference Summary:", ref_text)
            print("-" * 50)

    result = rouge.compute(predictions=preds, references=refs)
    # Convert floats to percentages if needed
    if isinstance(result["rouge1"], float):
        return {k: v * 100 for k, v in result.items()}
    return {k: v.mid.fmeasure * 100 for k, v in result.items()}


Training the Four Models

Baseline (Pretrained)

In [9]:
baseline_model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
baseline_rouge = get_rouge_scores(baseline_model, local_data["test"], tokenizer, device)
print("Baseline ROUGE Scores:", baseline_rouge)


--- Debug Example 0 ---
Input (first 300 chars): Empirical Study of PLC Authentication Protocols in Industrial Control Systems Adeen Ayub Department of Computer Science Virginia Commonwealth University Richmond, United States of America ayuba2@vcu.eduHyunguk Yoo Department of Computer Science The University of New Orleans New Orleans, United State
Predicted Summary: <extra_id_0>- tionDirect.com
Reference Summary: Programmable logic controllers (PLCs) run a  con- trol logic  program that de nes how to control a physicalprocess such as a nuclear plant, power grid stations, and gas pipelines. Attackers target the control logic of a PLC to sabotage a physical process. Most PLCs employ password- based authentication mechanisms to prevent unauthorized remoteaccess to control logic. This paper presents an empirical study on proprietary authentication mechanisms in  ve industry-scale PLCs to understand the security-design practices of four popular ICS vendors, i.e., Allen-Bradley, Schneider E

LoRA on Local Dataset

In [10]:
local_lora_model = train_lora(baseline_model, local_data, tokenizer, local_model_repo_id)
local_lora_rouge = get_rouge_scores(local_lora_model, local_data["test"], tokenizer, device)
print("Local LoRA ROUGE Scores:", local_lora_rouge)


[Skipping Training?] benitoals/my-lora found on HF. Checking for adapter config...

=== LoRA Model Successfully Loaded ===
Found LoRA adapter in benitoals/my-lora, skipping training.

--- Debug Example 0 ---
Input (first 300 chars): Empirical Study of PLC Authentication Protocols in Industrial Control Systems Adeen Ayub Department of Computer Science Virginia Commonwealth University Richmond, United States of America ayuba2@vcu.eduHyunguk Yoo Department of Computer Science The University of New Orleans New Orleans, United State
Predicted Summary: <extra_id_0> PLCs (PLC)s. The PLC uses a control-logic program to control a physical process (PLC). The software uses an authentication protocol. The software is based on the PLC (PLC), which allows PLC to control the control- logic. These systems are known as PLC. However, PLC is responsible for controlling a system (PLCs) to analyze the remote control systems (ICS) and PLC systems. This PLC can be controlled by PLC-s. This paper presents an

LoRA on HF Science Dataset

In [11]:
hf_lora_model = train_lora(baseline_model, hf_data, tokenizer, hf_model_repo_id, 
                           body_key="title", summary_key="abstract")
hf_lora_rouge = get_rouge_scores(hf_lora_model, local_data["test"], tokenizer, device)
print("HF LoRA ROUGE Scores:", hf_lora_rouge)


[Skipping Training?] benitoals/my-lora-hf found on HF. Checking for adapter config...

=== LoRA Model Successfully Loaded ===
Found LoRA adapter in benitoals/my-lora-hf, skipping training.

--- Debug Example 0 ---
Input (first 300 chars): Empirical Study of PLC Authentication Protocols in Industrial Control Systems Adeen Ayub Department of Computer Science Virginia Commonwealth University Richmond, United States of America ayuba2@vcu.eduHyunguk Yoo Department of Computer Science The University of New Orleans New Orleans, United State
Predicted Summary: <extra_id_0> a - - the a- s - of a system that uses a method of authentication . using a software using the software. a. s. based algorithm. which is connected with the algorithm of the PLC. where their authentication and authentication in a computer based system. . and a the .. - and which the e- n e . the s in the implementation of these algorithms.., which allows 
Reference Summary: Programmable logic controllers (PLCs) run a  con- t

HF + Local Fine-tuning

In [12]:
combined_lora_model = train_lora(hf_lora_model, local_data, tokenizer, combined_model_repo_id, freeze_base=True)
combined_lora_rouge = get_rouge_scores(combined_lora_model, local_data["test"], tokenizer, device)
print("Combined (HF+Local) ROUGE Scores:", combined_lora_rouge)


[Skipping Training?] benitoals/my-lora-local-combined found on HF. Checking for adapter config...

=== LoRA Model Successfully Loaded ===
Found LoRA adapter in benitoals/my-lora-local-combined, skipping training.

--- Debug Example 0 ---
Input (first 300 chars): Empirical Study of PLC Authentication Protocols in Industrial Control Systems Adeen Ayub Department of Computer Science Virginia Commonwealth University Richmond, United States of America ayuba2@vcu.eduHyunguk Yoo Department of Computer Science The University of New Orleans New Orleans, United State
Predicted Summary: <extra_id_0> logic (PLCs) are used in PLCs. In industrial control systems (ICS), the PLC uses a control-logic program (PLC). The PLC is based on a physical process (PLC) using a software based PLC. They operate remotely. These systems rely on PLC-s. The software is designed to control the physical processes. The ICS uses the control logic based control- logic (ICs) to control physical control systems. In ICS, the

Interactive Model Evaluation

 Imports and Setup

In [16]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModel
import evaluate

device = torch.device("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu"))

model_name = "google/mt5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
rouge = evaluate.load("rouge")



 Helper Functions

In [17]:
def generate_summary(model, tokenizer, text, device, max_length=128):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).input_ids.to(device)
    outputs = model.generate(
        input_ids=inputs,
        max_length=max_length,
        num_beams=4,
        early_stopping=True,
        no_repeat_ngram_size=3
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)

def evaluate_summary(pred, ref):
    scores = rouge.compute(predictions=[pred], references=[ref])
    return {k: round(v * 100, 2) for k, v in scores.items()}

def load_model(base_model_name, repo_id, device):
    base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model_name).to(device)
    if repo_id:
        try:
            model = PeftModel.from_pretrained(base_model, repo_id).to(device)
            print(f"Loaded LoRA adapter from {repo_id}")
            return model
        except Exception as e:
            print(f"Error loading LoRA adapter from {repo_id}: {e}. Using base model instead.")
            return base_model
    else:
        print("Using baseline pretrained model")
        return base_model

Load All Models

In [15]:
model_repos = {
    "Baseline (Pretrained)": None,
    "LoRA Local": "benitoals/my-lora",
    "HF Science Dataset": "benitoals/my-lora-hf",
    "HF + Local Fine-tuned": "benitoals/my-lora-local-combined"
}

models = {}

for model_label, repo_id in model_repos.items():
    print(f"\nLoading {model_label}...")
    models[model_label] = load_model(model_name, repo_id, device).eval()


Loading Baseline (Pretrained)...
Using baseline pretrained model

Loading LoRA Local...
Loaded LoRA adapter from benitoals/my-lora

Loading HF Science Dataset...
Loaded LoRA adapter from benitoals/my-lora-hf

Loading HF + Local Fine-tuned...
Loaded LoRA adapter from benitoals/my-lora-local-combined




Load Input and Reference from .txt files

In [18]:
# Load input_text from file
with open('input_text.txt', 'r', encoding='utf-8') as f:
    input_text = f.read().strip()

# Load reference_summary from file
with open('abstract.txt', 'r', encoding='utf-8') as f:
    reference_summary = f.read().strip()

print("Loaded input_text:\n", input_text[:500], "...\n")
print("Loaded abstract as reference_summary:\n", reference_summary[:300], "...")

Loaded input_text:
 I. I NTRODUCTION
Processors are becoming more and more important in the
world. Today processors can be seen almost everywhere and in
the future they will only become more abundant. There are also
many types of computers such microcontrollers, distributed
control systems (DCSs), and regular personal computers.
However, most factories use programmable logic controllers
(PLCs) for system control. While these are not very fast, they
are very reliable, being able to run 24 hours a day, seven days a
w ...

Loaded abstract as reference_summary:
 Formal Veriﬁcation of Ladder
Logic programs using NuSMV
Sam Kottler, Mehdy Khayamy,ySyed Rafay Hasan,zand Omar Elkeelanyx
Colorado College, Department of Mathematics and Computer Science, sam.kottler@coloradocollege.edu
y z xTennessee Technological University, Electrical and Computer Engineering D ...


Evaluate and Summarize

In [19]:
for model_label, model in models.items():
    print(f"\n=== {model_label} ===")
    summary = generate_summary(model, tokenizer, input_text, device)
    scores = evaluate_summary(summary, reference_summary)

    print("\nGenerated Summary:\n", summary)
    print("\nReference Abstract:\n", reference_summary)
    print("\nROUGE Scores:", scores)
    print("-" * 80)


=== Baseline (Pretrained) ===

Generated Summary:
 <extra_id_0> could interfere.

Reference Abstract:
 Formal Veriﬁcation of Ladder
Logic programs using NuSMV
Sam Kottler, Mehdy Khayamy,ySyed Rafay Hasan,zand Omar Elkeelanyx
Colorado College, Department of Mathematics and Computer Science, sam.kottler@coloradocollege.edu
y z xTennessee Technological University, Electrical and Computer Engineering Department
ymkhayamy42@students.tntech.edu,zshasan@tntech.edu,xoelkeelany@tntech.edu
Abstract —Programmable logic controllers (PLCs) are heavy-
duty computers used to control industrial systems. For many
years these systems were physically separated from any other
network making attacks extremely difﬁcult. However, these in-
creasingly connected systems have not improved much in terms of
security, leaving them vulnerable to attacks. This paper attempts
to show that ladder logic programs for PLCs can be modeled
in NuSMV and veriﬁed using computational tree logic (CTL)
speciﬁcations. This pap