In [None]:
!pip install datasets evaluate transformers accelerate tqdm peft

Collecting datasets
  Downloading datasets-3.0.2-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting peft
  Downloading peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.7/472.7 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading peft-0.13.2-py3-none-any.wh

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from datasets import load_dataset
from evaluate import load
import torch
import logging
import warnings
from tqdm import tqdm
from peft import get_peft_model, LoraConfig, TaskType
from accelerate import Accelerator
import os

warnings.filterwarnings("ignore")

logger = logging.getLogger(__name__)
logging.basicConfig(filename="testingLLMs_boolq.log", encoding="utf-8", level=logging.DEBUG)

class boolq:
    def __init__(self, model, tokenizer) -> None:
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = model.to(self.device)
        self.tokenizer = tokenizer

        self.tokenizer.pad_token = self.tokenizer.eos_token

        accelerator = Accelerator()


        self.model, self.tokenizer = accelerator.prepare(self.model, self.tokenizer)
        # Load dataset
        self.dataset = load_dataset("super_glue", 'boolq', trust_remote_code=True)


        self.tokenized_dataset = self.dataset.map(
            self.__preprocess_function,
            batched=True,
            remove_columns=self.dataset["train"].column_names
        )

    def __preprocess_function(self, examples):
        """Preprocess the data for T5"""

        inputs = [
            f"answer yes or no: {question} passage: {passage}"
            for question, passage in zip(examples["question"], examples["passage"])
        ]


        targets = [
            "yes" if label else "no"
            for label in examples["label"]
        ]


        model_inputs = self.tokenizer(
            inputs,
            max_length=256,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )


        with self.tokenizer.as_target_tokenizer():
            labels = self.tokenizer(
                targets,
                max_length=8,
                padding="max_length",
                truncation=True,
                return_tensors="pt"
            )

        model_inputs["labels"] = labels["input_ids"]


        model_inputs["labels"] = [
            [(l if l != self.tokenizer.pad_token_id else -100) for l in label]
            for label in labels["input_ids"]
        ]

        return model_inputs

    def train(self):
        """Training with LoRA applied to the model"""

        # Enable LoRA
        peft_config = LoraConfig(
            task_type=TaskType.SEQ_2_SEQ_LM,
            r=16,  # LoRA rank
            lora_alpha=32,  # Scaling factor
            lora_dropout=0.1,  # Dropout applied to LoRA layers
            target_modules=["q", "v"]  # Apply LoRA to attention query and value projection layers
        )

        self.model = get_peft_model(self.model, peft_config)

        self.model.gradient_checkpointing_enable()


        for param in self.model.parameters():
            param.requires_grad = True  #

        data_collator = DataCollatorForSeq2Seq(
            tokenizer=self.tokenizer,
            model=self.model,
            padding=True
        )


        accuracy_metric = load('accuracy')

        def compute_metrics(eval_preds):
            logits, labels = eval_preds

            logits = logits[0] if isinstance(logits, tuple) else logits
            logits=torch.tensor(logits)
            labels = labels[0] if isinstance(labels, tuple) else labels
            labels = torch.tensor(labels)
            predictions = torch.argmax(logits, dim=-1)
            predictions = predictions.view(-1)
            labels = labels.view(-1)

            mask = labels != -100
            predictions = predictions[mask]
            labels = labels[mask]

            return accuracy_metric.compute(predictions=predictions, references=labels)

        training_args = TrainingArguments(
            output_dir='./flan_t5_boolq_lora',
            evaluation_strategy="steps",
            save_strategy="steps",
            save_steps=250,
            num_train_epochs=3,
            per_device_train_batch_size=1,
            per_device_eval_batch_size=1,
            gradient_accumulation_steps=4,
            logging_dir='./boolq_training_logs',
            logging_steps=10,
            save_total_limit=1,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",  # Use accuracy as the metric for best model selection
            fp16=False,
            optim="adamw_torch",
            gradient_checkpointing=True,
            eval_steps=250,
            report_to="none"
        )

        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=self.tokenized_dataset["train"],
            eval_dataset=self.tokenized_dataset["validation"],
            data_collator=data_collator,
            tokenizer=self.tokenizer,
            compute_metrics=compute_metrics
        )

        return trainer.train()

    def __predict(self):
        predictions = []
        for example in tqdm(self.dataset['validation'], desc="Predicting"):
            input_text = f"answer yes or no: {example['question']} passage: {example['passage']}"
            inputs = self.tokenizer(
                input_text,
                max_length=256,
                padding="max_length",
                truncation=True,
                return_tensors="pt"
            ).to(self.device)

            with torch.no_grad():
                outputs = self.model.generate(**inputs)

            pred = self.tokenizer.decode(outputs[0], skip_special_tokens=True).lower()
            predictions.append(1 if pred == "yes" else 0)

        return predictions

    def compute_metric(self, metric='accuracy'):
        metric = load(metric)
        predictions = self.__predict()
        return metric.compute(
            predictions=predictions,
            references=self.dataset['validation']['label']
        )

if __name__ == "__main__":
    # Clear CUDA cache
    torch.cuda.empty_cache()

    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
    model_name = "google/flan-t5-small"
    model = AutoModelForSeq2SeqLM.from_pretrained(
        model_name,
        torch_dtype=torch.float32
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Train with LoRA
    boolq_model = boolq(model=model, tokenizer=tokenizer)
    boolq_model.train()

    # Evaluate
    result = boolq_model.compute_metric()
    print(f"Model accuracy: {result}")


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/18.2k [00:00<?, ?B/s]

super_glue.py:   0%|          | 0.00/30.7k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/4.12M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9427 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3270 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3245 [00:00<?, ? examples/s]

Map:   0%|          | 0/9427 [00:00<?, ? examples/s]

Map:   0%|          | 0/3270 [00:00<?, ? examples/s]

Map:   0%|          | 0/3245 [00:00<?, ? examples/s]

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss,Accuracy
250,0.7897,0.630991,0.644648
500,0.655,0.619163,0.674312
750,0.5839,0.599622,0.684404
1000,0.6347,0.611749,0.683792
1250,0.6432,0.59418,0.704893
1500,0.6198,0.575335,0.707645
1750,0.5718,0.581899,0.706422
2000,0.5024,0.650339,0.715902
2250,0.6821,0.639389,0.713456
2500,0.6216,0.628523,0.725382


Predicting: 100%|██████████| 3270/3270 [02:42<00:00, 20.18it/s]

Model accuracy: {'accuracy': 0.7370030581039755}





In [None]:
boolq_model.model.save_pretrained("./flan_t5_boolq_lora_saved")

In [None]:
import shutil
from google.colab import files

In [None]:
shutil.make_archive('/content/flan_t5_boolq_lora_saved', 'zip', '/content', 'flan_t5_boolq_lora_saved')
files.download('/content/flan_t5_boolq_lora_saved.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
shutil.make_archive('/content/flan_t5_boolq_lora', 'zip', '/content', 'flan_t5_boolq_lora')
files.download('/content/flan_t5_boolq_lora.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
torch.cuda.empty_cache()