In [4]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification

raw_datasets = load_dataset("glue", "mrpc", cache_dir='./cache')

In [5]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

def tokenize_function(example):
    return tokenizer(example["sentence1"], example["sentence2"], truncation=True)


tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

In [14]:
import evaluate
import numpy as np

def compute_metrics(eval_preds):
    metric = evaluate.load("glue", "mrpc")
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

using trackio here instead of wandb, because trackio is local and we don't need an account

In [15]:
# Example of tracking loss during training with the Trainer
from transformers import Trainer, TrainingArguments
import trackio as wandb


# Initialize Weights & Biases for experiment tracking
wandb.init(project="transformer-fine-tuning", name="bert-mrpc-analysis")

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    eval_steps=50,
    save_steps=100,
    logging_steps=10,  # Log metrics every 10 steps
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    report_to="trackio",  # Send logs to trackio
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

# Train and automatically log metrics
trainer.train()

* Trackio project initialized: transformer-fine-tuning
* Trackio metrics logged to: /home/cpadwick/.cache/huggingface/trackio


* Created new run: bert-mrpc-analysis
* Trackio project initialized: huggingface
* Trackio metrics will be synced to Hugging Face Dataset: cgpadwick2020/trackio-dataset
* Found existing space: https://huggingface.co/spaces/cgpadwick2020/trackio
* View dashboard by going to: https://cgpadwick2020-trackio.hf.space/


* Created new run: cgpadwick2020-1761876321


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.1494,0.956016,0.808824,0.855019
2,0.1281,0.568662,0.845588,0.888496
3,0.0987,0.915307,0.818627,0.871972
4,0.0004,1.028838,0.833333,0.883562
5,0.0728,1.05796,0.838235,0.886986


* Run finished. Uploading logs to Trackio (please wait...)


TrainOutput(global_step=1150, training_loss=0.0701496794850201, metrics={'train_runtime': 139.3584, 'train_samples_per_second': 131.603, 'train_steps_per_second': 8.252, 'total_flos': 714950848507680.0, 'train_loss': 0.0701496794850201, 'epoch': 5.0})

In [16]:
from transformers import EarlyStoppingCallback


training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="steps",
    eval_steps=50,
    save_steps=100,
    logging_steps=10,  # Log metrics every 10 steps
    num_train_epochs=10,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    report_to="trackio",  # Send logs to trackio
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

# Train and automatically log metrics
trainer.train()

* Trackio project initialized: huggingface
* Trackio metrics will be synced to Hugging Face Dataset: cgpadwick2020/trackio-dataset
* Found existing space: https://huggingface.co/spaces/cgpadwick2020/trackio
* View dashboard by going to: https://cgpadwick2020-trackio.hf.space/


* Created new run: cgpadwick2020-1761876635


Step,Training Loss,Validation Loss,Accuracy,F1
50,0.0001,1.332098,0.830882,0.879159
100,0.0616,1.355341,0.801471,0.858639
150,0.0186,1.046736,0.833333,0.881944
200,0.0004,1.182334,0.835784,0.886248
250,0.1247,1.433932,0.808824,0.873377
300,0.11,1.443094,0.818627,0.879085


* Run finished. Uploading logs to Trackio (please wait...)


TrainOutput(global_step=300, training_loss=0.04643597889119216, metrics={'train_runtime': 51.5522, 'train_samples_per_second': 711.512, 'train_steps_per_second': 44.615, 'total_flos': 186397738281600.0, 'train_loss': 0.04643597889119216, 'epoch': 1.3043478260869565})

In [18]:
from transformers import EarlyStoppingCallback


training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="steps",
    eval_steps=50,
    save_steps=100,
    logging_steps=10,  # Log metrics every 10 steps
    num_train_epochs=10,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    learning_rate=1e-4,
    report_to="trackio",  # Send logs to trackio
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

# Train and automatically log metrics
trainer.train()

* Trackio project initialized: huggingface
* Trackio metrics will be synced to Hugging Face Dataset: cgpadwick2020/trackio-dataset
* Found existing space: https://huggingface.co/spaces/cgpadwick2020/trackio
* View dashboard by going to: https://cgpadwick2020-trackio.hf.space/


* Created new run: cgpadwick2020-1761877033


Step,Training Loss,Validation Loss,Accuracy,F1
50,0.0868,0.909971,0.818627,0.870175
100,0.0181,1.148268,0.791667,0.845173
150,0.0438,1.068863,0.845588,0.889667
200,0.0383,0.776802,0.833333,0.886288
250,0.0214,1.194211,0.833333,0.885135
300,0.0201,1.088498,0.823529,0.870504
350,0.035,1.234127,0.803922,0.858657
400,0.0373,1.296117,0.803922,0.855596
450,0.014,1.020792,0.838235,0.885813
500,0.0155,1.070799,0.835784,0.884283


Exception in thread Thread-64 (_init_client_background):
Traceback (most recent call last):
  File "/usr/lib/python3.12/threading.py", line 1073, in _bootstrap_inner
    self.run()
  File "/home/cpadwick/code/hf_course/.venv/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 788, in run_closure
    _threading_Thread_run(self)
  File "/usr/lib/python3.12/threading.py", line 1010, in run
    self._target(*self._args, **self._kwargs)
  File "/home/cpadwick/code/hf_course/.venv/lib/python3.12/site-packages/trackio/run.py", line 113, in _init_client_background
    self._batch_sender()
  File "/home/cpadwick/code/hf_course/.venv/lib/python3.12/site-packages/trackio/run.py", line 84, in _batch_sender
    self._client.predict(
  File "/home/cpadwick/code/hf_course/.venv/lib/python3.12/site-packages/gradio_client/client.py", line 505, in predict
    ).result()
      ^^^^^^^^
  File "/home/cpadwick/code/hf_course/.venv/lib/python3.12/site-packages/gradio_client/client.py", line 1610, in r

* Run finished. Uploading logs to Trackio (please wait...)


TrainOutput(global_step=1150, training_loss=0.027414699421092138, metrics={'train_runtime': 244.4884, 'train_samples_per_second': 150.028, 'train_steps_per_second': 4.704, 'total_flos': 1506078514207440.0, 'train_loss': 0.027414699421092138, 'epoch': 10.0})