In [16]:
pip install datasets transformers torch tqdm



In [17]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding
from datasets import load_dataset
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm.auto import tqdm

In [18]:
# Load the MRPC dataset
dataset = load_dataset("glue", "mrpc")

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
model = AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-v3-base", num_labels=2)


Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [25]:
# Preprocess the data
def preprocess_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], truncation=True, padding=True)

encoded_dataset = dataset.map(preprocess_function, batched=True)

In [26]:
# Ensure the dataset has the label column
encoded_dataset = encoded_dataset.rename_column("label", "labels")
encoded_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])  # Changed line

train_dataset = encoded_dataset["train"]
eval_dataset = encoded_dataset["validation"]

# Define a data collator that will dynamically pad the inputs
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Create DataLoaders
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=16, collate_fn=data_collator)
eval_dataloader = DataLoader(eval_dataset, batch_size=16, collate_fn=data_collator)

In [27]:
# Define optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

# Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
print(device)

# Training loop
num_epochs = 3
model.train()


print(device)

cuda
cuda


In [28]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    for batch in tqdm(train_dataloader):
        optimizer.zero_grad()

        # Move batch to device
        batch = {k: v.to(device) for k, v in batch.items()}

        # Forward pass
        outputs = model(**batch)
        loss = outputs.loss

        # Backward pass
        loss.backward()
        optimizer.step()

    # Evaluation loop
    model.eval()
    eval_loss = 0
    eval_accuracy = 0
    nb_eval_steps = 0

    for batch in tqdm(eval_dataloader):
        with torch.no_grad():
            # Move batch to device
            batch = {k: v.to(device) for k, v in batch.items()}

            # Forward pass
            outputs = model(**batch)
            loss = outputs.loss
            logits = outputs.logits

            eval_loss += loss.item()

            # Compute accuracy
            predictions = torch.argmax(logits, dim=-1)
            eval_accuracy += (predictions == batch["labels"]).sum().item()
            nb_eval_steps += 1

    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / len(eval_dataset)

    print(f"Validation Loss: {eval_loss}")
    print(f"Validation Accuracy: {eval_accuracy}")

    model.train()

Epoch 1/3


  0%|          | 0/115 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Validation Loss: 0.3614845344653496
Validation Accuracy: 0.8455882352941176
Epoch 2/3


  0%|          | 0/115 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Validation Loss: 0.29627354041888165
Validation Accuracy: 0.8946078431372549
Epoch 3/3


  0%|          | 0/115 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

Validation Loss: 0.41171327290626675
Validation Accuracy: 0.8799019607843137
