In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification

raw_datasets = load_dataset("glue", "mrpc", cache_dir='./cache')

In [2]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)


tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

In [3]:
import evaluate
import numpy as np

def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "mrpc")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

using trackio here instead of wandb, because trackio is local and we don't need an account

In [4]:
# Example of tracking loss during training with the Trainer
from transformers import Trainer, TrainingArguments
import trackio as wandb


# Initialize Weights & Biases for experiment tracking
wandb.init(project="transformer-fine-tuning", name="bert-mrpc-analysis")

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    eval_steps=50,
    save_steps=100,
    logging_steps=10,  # Log metrics every 10 steps
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    report_to="trackio",  # Send logs to trackio
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

# Train and automatically log metrics
trainer.train()

* Trackio project initialized: transformer-fine-tuning
* Trackio metrics logged to: /home/cpadwick/.cache/huggingface/trackio


* Created new run: bert-mrpc-analysis


    Found GPU0 NVIDIA GB10 which is of cuda capability 12.1.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (8.0) - (12.0)
    


* Trackio project initialized: huggingface
* Trackio metrics will be synced to Hugging Face Dataset: cgpadwick2020/trackio-dataset
* Found existing space: https://huggingface.co/spaces/cgpadwick2020/trackio
* View dashboard by going to: https://cgpadwick2020-trackio.hf.space/


* Created new run: cgpadwick2020-1762833441


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.4234,0.404081,0.838235,0.888514
2,0.2568,0.367878,0.855392,0.896309
3,0.0927,0.43105,0.872549,0.908772
4,0.0478,0.663587,0.865196,0.90566
5,0.1098,0.723618,0.865196,0.905336


* Run finished. Uploading logs to Trackio (please wait...)


TrainOutput(global_step=1150, training_loss=0.22997024494344773, metrics={'train_runtime': 132.6945, 'train_samples_per_second': 138.212, 'train_steps_per_second': 8.667, 'total_flos': 714950848507680.0, 'train_loss': 0.22997024494344773, 'epoch': 5.0})

In [5]:
from transformers import EarlyStoppingCallback


training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="steps",
    eval_steps=50,
    save_steps=100,
    logging_steps=10,  # Log metrics every 10 steps
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    report_to="trackio",  # Send logs to trackio
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

# Train and automatically log metrics
trainer.train()

* Created new run: cgpadwick2020-1762833619


Step,Training Loss,Validation Loss,Accuracy,F1
50,0.103,1.065688,0.833333,0.872659
100,0.1376,0.541872,0.879902,0.915078
150,0.0932,0.691845,0.843137,0.886926
200,0.1119,0.905711,0.830882,0.8867
250,0.0502,0.898524,0.835784,0.880143


* Run finished. Uploading logs to Trackio (please wait...)


TrainOutput(global_step=250, training_loss=0.09743888974189759, metrics={'train_runtime': 36.5167, 'train_samples_per_second': 1004.473, 'train_steps_per_second': 62.985, 'total_flos': 154980633827520.0, 'train_loss': 0.09743888974189759, 'epoch': 1.0869565217391304})

In [6]:
from transformers import EarlyStoppingCallback


training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="steps",
    eval_steps=50,
    save_steps=100,
    logging_steps=10,  # Log metrics every 10 steps
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    learning_rate=1e-4,
    report_to="trackio",  # Send logs to trackio
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

# Train and automatically log metrics
trainer.train()

* Created new run: cgpadwick2020-1762833677


Step,Training Loss,Validation Loss,Accuracy,F1
50,0.0503,0.894451,0.848039,0.890845
100,0.1108,0.642441,0.852941,0.892086
150,0.1262,0.732666,0.845588,0.889279
200,0.1104,0.693013,0.843137,0.891525
250,0.1731,1.208443,0.828431,0.885621
300,0.0082,0.911451,0.833333,0.883959
350,0.115,0.894882,0.818627,0.866426
400,0.0266,1.115359,0.828431,0.882155
450,0.0482,0.733676,0.816176,0.859287
500,0.065,1.162766,0.823529,0.877133


* Run finished. Uploading logs to Trackio (please wait...)


TrainOutput(global_step=1150, training_loss=0.048566606368505114, metrics={'train_runtime': 239.4458, 'train_samples_per_second': 153.187, 'train_steps_per_second': 4.803, 'total_flos': 1506078514207440.0, 'train_loss': 0.048566606368505114, 'epoch': 10.0})