In [23]:
pip install peft



In [24]:
pip install datasets transformers torch tqdm peft



In [27]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding
from datasets import load_dataset
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm.auto import tqdm
from peft import LoraConfig, get_peft_model

# Function to find the correct target modules
def find_target_modules(model, target_names):
    target_modules = []
    for name, module in model.named_modules():
        for target in target_names:
            if target in name:
                target_modules.append(name)
    return target_modules

# Function to train and evaluate the model
def train_and_evaluate(r):
    print(f"\nFine-tuning with LoRA, r={r}")

    # Load the MRPC dataset
    dataset = load_dataset("glue", "mrpc")

    # Load the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
    model = AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-v3-base", num_labels=2)

    # Preprocess the data
    def preprocess_function(examples):
        return tokenizer(examples['sentence1'], examples['sentence2'], truncation=True, padding=True, max_length=128)

    encoded_dataset = dataset.map(preprocess_function, batched=True)

    # Ensure the dataset has the label column
    encoded_dataset = encoded_dataset.rename_column("label", "labels")
    encoded_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    train_dataset = encoded_dataset["train"]
    eval_dataset = encoded_dataset["validation"]

    # Define a data collator that will dynamically pad the inputs
    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    # Create DataLoaders
    train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=16, collate_fn=data_collator)
    eval_dataloader = DataLoader(eval_dataset, batch_size=16, collate_fn=data_collator)

    # Apply LoRA
    target_modules = find_target_modules(model, ["attention.self.query", "attention.self.value"])
    print(f"Target Modules: {target_modules}")

    config = LoraConfig(
        r=r,
        target_modules=target_modules,
        lora_alpha=32,
        lora_dropout=0.1
    )
    model = get_peft_model(model, config)

    # Define optimizer
    optimizer = AdamW(model.parameters(), lr=2e-5)

    # Move model to GPU if available
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    # Training loop
    num_epochs = 3
    model.train()

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        for batch in tqdm(train_dataloader):
            optimizer.zero_grad()

            # Move batch to device
            batch = {k: v.to(device) for k, v in batch.items()}

            # Forward pass
            outputs = model(**batch)
            loss = outputs.loss

            # Backward pass
            loss.backward()
            optimizer.step()

        # Evaluation loop
        model.eval()
        eval_loss = 0
        eval_accuracy = 0
        nb_eval_steps = 0

        for batch in tqdm(eval_dataloader):
            with torch.no_grad():
                # Move batch to device
                batch = {k: v.to(device) for k, v in batch.items()}

                # Forward pass
                outputs = model(**batch)
                loss = outputs.loss
                logits = outputs.logits

                eval_loss += loss.item()

                # Compute accuracy
                predictions = torch.argmax(logits, dim=-1)
                eval_accuracy += (predictions == batch["labels"]).sum().item()
                nb_eval_steps += 1

        eval_loss = eval_loss / nb_eval_steps
        eval_accuracy = eval_accuracy / len(eval_dataset)

        print(f"Validation Loss: {eval_loss}")
        print(f"Validation Accuracy: {eval_accuracy}")

        model.train()

# Try different r factors
r_factors = [4, 16, 32]
for r in r_factors:
    train_and_evaluate(r)




Fine-tuning with LoRA, r=4


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Target Modules: ['deberta.encoder.layer.0.attention.self.query_proj', 'deberta.encoder.layer.0.attention.self.value_proj', 'deberta.encoder.layer.1.attention.self.query_proj', 'deberta.encoder.layer.1.attention.self.value_proj', 'deberta.encoder.layer.2.attention.self.query_proj', 'deberta.encoder.layer.2.attention.self.value_proj', 'deberta.encoder.layer.3.attention.self.query_proj', 'deberta.encoder.layer.3.attention.self.value_proj', 'deberta.encoder.layer.4.attention.self.query_proj', 'deberta.encoder.layer.4.attention.self.value_proj', 'deberta.encoder.layer.5.attention.self.query_proj', 'deberta.encoder.layer.5.attention.self.value_proj', 'deberta.encoder.layer.6.attention.self.query_proj', 'deberta.encoder.layer.6.attention.self.value_proj', 'deberta.encoder.layer.7.attention.self.query_proj', 'deberta.encoder.layer.7.attention.self.value_proj', 'deberta.encoder.layer.8.attention.self.query_proj', 'deberta.encoder.layer.8.attention.self.value_proj', 'deberta.encoder.layer.9.atte

  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.6365729799637427
Validation Accuracy: 0.6838235294117647
Epoch 2/3


  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.52394676896242
Validation Accuracy: 0.6838235294117647
Epoch 3/3


  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.5102431648052655
Validation Accuracy: 0.6911764705882353

Fine-tuning with LoRA, r=16


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

Target Modules: ['deberta.encoder.layer.0.attention.self.query_proj', 'deberta.encoder.layer.0.attention.self.value_proj', 'deberta.encoder.layer.1.attention.self.query_proj', 'deberta.encoder.layer.1.attention.self.value_proj', 'deberta.encoder.layer.2.attention.self.query_proj', 'deberta.encoder.layer.2.attention.self.value_proj', 'deberta.encoder.layer.3.attention.self.query_proj', 'deberta.encoder.layer.3.attention.self.value_proj', 'deberta.encoder.layer.4.attention.self.query_proj', 'deberta.encoder.layer.4.attention.self.value_proj', 'deberta.encoder.layer.5.attention.self.query_proj', 'deberta.encoder.layer.5.attention.self.value_proj', 'deberta.encoder.layer.6.attention.self.query_proj', 'deberta.encoder.layer.6.attention.self.value_proj', 'deberta.encoder.layer.7.attention.self.query_proj', 'deberta.encoder.layer.7.attention.self.value_proj', 'deberta.encoder.layer.8.attention.self.query_proj', 'deberta.encoder.layer.8.attention.self.value_proj', 'deberta.encoder.layer.9.atte

  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.6177108941169885
Validation Accuracy: 0.6838235294117647
Epoch 2/3


  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.540055112196849
Validation Accuracy: 0.6838235294117647
Epoch 3/3


  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.5193130637590702
Validation Accuracy: 0.6838235294117647

Fine-tuning with LoRA, r=32


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Target Modules: ['deberta.encoder.layer.0.attention.self.query_proj', 'deberta.encoder.layer.0.attention.self.value_proj', 'deberta.encoder.layer.1.attention.self.query_proj', 'deberta.encoder.layer.1.attention.self.value_proj', 'deberta.encoder.layer.2.attention.self.query_proj', 'deberta.encoder.layer.2.attention.self.value_proj', 'deberta.encoder.layer.3.attention.self.query_proj', 'deberta.encoder.layer.3.attention.self.value_proj', 'deberta.encoder.layer.4.attention.self.query_proj', 'deberta.encoder.layer.4.attention.self.value_proj', 'deberta.encoder.layer.5.attention.self.query_proj', 'deberta.encoder.layer.5.attention.self.value_proj', 'deberta.encoder.layer.6.attention.self.query_proj', 'deberta.encoder.layer.6.attention.self.value_proj', 'deberta.encoder.layer.7.attention.self.query_proj', 'deberta.encoder.layer.7.attention.self.value_proj', 'deberta.encoder.layer.8.attention.self.query_proj', 'deberta.encoder.layer.8.attention.self.value_proj', 'deberta.encoder.layer.9.atte

  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.6605689250505887
Validation Accuracy: 0.6838235294117647
Epoch 2/3


  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.5415127930732874
Validation Accuracy: 0.6838235294117647
Epoch 3/3


  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.5218032541183325
Validation Accuracy: 0.6911764705882353


In [28]:
train_and_evaluate(64)



Fine-tuning with LoRA, r=64


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Target Modules: ['deberta.encoder.layer.0.attention.self.query_proj', 'deberta.encoder.layer.0.attention.self.value_proj', 'deberta.encoder.layer.1.attention.self.query_proj', 'deberta.encoder.layer.1.attention.self.value_proj', 'deberta.encoder.layer.2.attention.self.query_proj', 'deberta.encoder.layer.2.attention.self.value_proj', 'deberta.encoder.layer.3.attention.self.query_proj', 'deberta.encoder.layer.3.attention.self.value_proj', 'deberta.encoder.layer.4.attention.self.query_proj', 'deberta.encoder.layer.4.attention.self.value_proj', 'deberta.encoder.layer.5.attention.self.query_proj', 'deberta.encoder.layer.5.attention.self.value_proj', 'deberta.encoder.layer.6.attention.self.query_proj', 'deberta.encoder.layer.6.attention.self.value_proj', 'deberta.encoder.layer.7.attention.self.query_proj', 'deberta.encoder.layer.7.attention.self.value_proj', 'deberta.encoder.layer.8.attention.self.query_proj', 'deberta.encoder.layer.8.attention.self.value_proj', 'deberta.encoder.layer.9.atte

  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.6388099331122178
Validation Accuracy: 0.6838235294117647
Epoch 2/3


  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.5500026734975668
Validation Accuracy: 0.6838235294117647
Epoch 3/3


  0%|          | 0/230 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

Validation Loss: 0.5275630022470768
Validation Accuracy: 0.6838235294117647
