In [None]:
!pip install datasets evaluate transformers accelerate tqdm peft

Collecting datasets
  Downloading datasets-3.0.2-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting peft
  Downloading peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.7/472.7 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading peft-0.13.2-py3-none-any.wh

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments, DataCollatorForSeq2Seq
from datasets import load_dataset
from evaluate import load
import torch
import logging
import warnings
from tqdm import tqdm
from peft import get_peft_model, LoraConfig, TaskType
from accelerate import Accelerator
import os

warnings.filterwarnings("ignore")

logger = logging.getLogger(__name__)
logging.basicConfig(filename="testingLLMs_copa.log", encoding="utf-8", level=logging.DEBUG)

class CoPA:
    def __init__(self, model, tokenizer) -> None:
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = model.to(self.device)
        self.tokenizer = tokenizer

        self.tokenizer.pad_token = self.tokenizer.eos_token

        accelerator = Accelerator()
        self.model, self.tokenizer = accelerator.prepare(self.model, self.tokenizer)

        # Load CoPA dataset
        self.dataset = load_dataset("super_glue", 'copa', trust_remote_code=True)

        self.tokenized_dataset = self.dataset.map(
            self.__preprocess_function,
            batched=True,
            remove_columns=self.dataset["train"].column_names
        )

    def __preprocess_function(self, examples):
        """Preprocess the data for T5"""

        inputs = [
            f"{premise} what is the {question}: {choice1} or {choice2}?"
            for premise, question, choice1, choice2 in zip(
                examples["premise"], examples["question"], examples["choice1"], examples["choice2"]
            )
        ]

        targets = [
            "1" if label == 0 else "2"
            for label in examples["label"]
        ]

        model_inputs = self.tokenizer(
            inputs,
            max_length=256,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        with self.tokenizer.as_target_tokenizer():
            labels = self.tokenizer(
                targets,
                max_length=8,
                padding="max_length",
                truncation=True,
                return_tensors="pt"
            )

        model_inputs["labels"] = [
            [(l if l != self.tokenizer.pad_token_id else -100) for l in label]
            for label in labels["input_ids"]
        ]

        return model_inputs

    def train(self):
        """Training with LoRA applied to the model"""

        # Enable LoRA
        peft_config = LoraConfig(
            task_type=TaskType.SEQ_2_SEQ_LM,
            r=16,  # LoRA rank
            lora_alpha=32,  # Scaling factor
            lora_dropout=0.1,  # Dropout applied to LoRA layers
            target_modules=["q", "v"]  # Apply LoRA to attention query and value projection layers
        )

        self.model = get_peft_model(self.model, peft_config)
        self.model.gradient_checkpointing_enable()

        for param in self.model.parameters():
            param.requires_grad = True

        data_collator = DataCollatorForSeq2Seq(
            tokenizer=self.tokenizer,
            model=self.model,
            padding=True
        )

        accuracy_metric = load('accuracy')

        def compute_metrics(eval_preds):
            logits, labels = eval_preds
            logits = logits[0] if isinstance(logits, tuple) else logits
            logits = torch.tensor(logits)
            labels = labels[0] if isinstance(labels, tuple) else labels
            labels = torch.tensor(labels)
            predictions = torch.argmax(logits, dim=-1)
            predictions = predictions.view(-1)
            labels = labels.view(-1)

            mask = labels != -100
            predictions = predictions[mask]
            labels = labels[mask]

            return accuracy_metric.compute(predictions=predictions, references=labels)

        training_args = TrainingArguments(
            output_dir='./flan_t5_copa_lora',
            evaluation_strategy="steps",
            save_strategy="steps",
            save_steps=50,
            num_train_epochs=50,
            per_device_train_batch_size=1,
            per_device_eval_batch_size=1,
            gradient_accumulation_steps=4,
            logging_dir='./copa_training_logs',
            logging_steps=10,
            save_total_limit=1,
            load_best_model_at_end=True,
            metric_for_best_model="accuracy",
            fp16=False,
            optim="adamw_torch",
            gradient_checkpointing=True,
            eval_steps=50,
            report_to="none"
        )

        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=self.tokenized_dataset["train"],
            eval_dataset=self.tokenized_dataset["validation"],
            data_collator=data_collator,
            tokenizer=self.tokenizer,
            compute_metrics=compute_metrics
        )

        return trainer.train()

    def __predict(self):
        predictions = []
        for example in tqdm(self.dataset['validation'], desc="Predicting"):
            input_text = f"{example['premise']} what is the {example['question']}: {example['choice1']} or {example['choice2']}?"
            inputs = self.tokenizer(
                input_text,
                max_length=256,
                padding="max_length",
                truncation=True,
                return_tensors="pt"
            ).to(self.device)

            with torch.no_grad():
                outputs = self.model.generate(**inputs)

            pred = self.tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
            predictions.append(0 if pred == "1" else 1)

        return predictions

    def compute_metric(self, metric='accuracy'):
        metric = load(metric)
        predictions = self.__predict()
        return metric.compute(
            predictions=predictions,
            references=self.dataset['validation']['label']
        )



In [None]:

# Clear CUDA cache
torch.cuda.empty_cache()

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
model_name = "google/flan-t5-small"
model = AutoModelForSeq2SeqLM.from_pretrained(
  model_name,
  torch_dtype=torch.float32
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Train with LoRA
copa_model = CoPA(model=model, tokenizer=tokenizer)
#copa_model.train()

# Evaluate
result = copa_model.compute_metric()
print(f"Model accuracy: {result}")

Map:   0%|          | 0/400 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Predicting: 100%|██████████| 100/100 [00:12<00:00,  7.80it/s]

Model accuracy: {'accuracy': 0.45}





In [None]:
copa_model.train()


Step,Training Loss,Validation Loss,Accuracy
50,1.2274,0.816694,0.5
100,0.8565,0.71977,0.51
150,0.802,0.714355,0.48
200,0.8629,0.695487,0.52
250,0.7269,0.710853,0.49
300,0.8784,0.753303,0.44
350,0.6782,0.928468,0.45
400,0.7061,0.714335,0.52
450,0.6957,0.887068,0.45
500,0.7086,0.744367,0.42


In [None]:
result = copa_model.compute_metric()
print(f"Model accuracy: {result}")