# Fine Tuning BERT For Paraphrase Classification
Fine tuning BERT for paraphrase classification using the Microsoft Research Paraphrase Classification dataset.

In [1]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
import evaluate

# Check if MPS is available and set the device
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple Silicon (MPS) backend.")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using NVIDIA CUDA backend.")
else:
    device = torch.device("cpu")
    print("Using CPU backend.")

# Load the full dataset dictionary (train and validation splits)
dataset_dict = load_dataset("nyu-mll/glue", "mrpc")

# Load the model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2).to(device)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Tokenize the dataset, truncate and pad text
def encode(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, padding="max_length")

tokenized_datasets = dataset_dict.map(encode, batched=True)

# Rename the label column to labels and remove unnecessary columns
tokenized_datasets = tokenized_datasets.map(lambda examples: {"labels": examples["label"]}, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(["sentence1", "sentence2", "idx", "label"])
tokenized_datasets.set_format("torch")

# Get the separate train and validation datasets
train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["validation"]

# Define the metric computation function
metric = evaluate.load("glue", "mrpc")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# Define Training Arguments
training_args = TrainingArguments(
    output_dir="./mrpc-results",      # Directory to save the model and results
    logging_dir='./mrpc-logs',        # Directory for logs
    num_train_epochs=3,               # Total number of training epochs
    per_device_train_batch_size=16,   # Batch size for training
    per_device_eval_batch_size=16,    # Batch size for evaluation
    
    # --- Settings for Tracking Metrics ---
    eval_strategy="epoch",            # Run evaluation at the end of each epoch
    logging_strategy="steps",         # Log metrics during training
    logging_steps=50,                 # Log training loss every 50 steps
    
    # --- Settings for Saving the Model ---
    save_strategy="epoch",            # Save a checkpoint at the end of each epoch
    load_best_model_at_end=True,      # Load the best model found during training
    metric_for_best_model="accuracy", # Use accuracy to determine the best model
)

# 3. Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# 4. Start Training
print("Starting training...")
trainer.train()
print("Training complete!")

# 5. Save the final best model
final_model_path = "./final_mrpc_model"
print(f"Saving the best model to {final_model_path}")
trainer.save_model(final_model_path)
print("Model saved successfully.")

Using Apple Silicon (MPS) backend.


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

  trainer = Trainer(


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.0,,0.316176,0.0




KeyboardInterrupt: 