<a href="https://colab.research.google.com/github/mlop-ai/mlop/blob/main/examples/transformers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1 align="center" style="font-family: Inter, sans-serif; font-style: normal; font-weight: 700; font-size: 72px">m:lop</h1>


In [None]:
%pip install -Uq "mlop[full]" datasets evaluate accelerate scikit-learn transformers
# %pip install "mlop[full] @ git+https://github.com/mlop-ai/mlop.git"
# import sys; import os; sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.dirname("__file__"))))
import mlop
from mlop.compat.transformers import MLOPCallback

mlop.login()

In [None]:
from datasets import load_dataset
import evaluate
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification

dataset = load_dataset("yahoo_answers_topics")
dataset = dataset.rename_column("topic", "labels")

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
dataset = dataset.map(
    lambda x: tokenizer(x["question_title"], truncation=True), batched=True
)
model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=len(dataset["train"].unique("labels"))
)
accuracy_metric = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

In [None]:
from transformers import Trainer, TrainingArguments

args = TrainingArguments(
    report_to="mlop",
    output_dir=".mlop",
    overwrite_output_dir=True,
    eval_strategy="steps",
    learning_rate=5e-5,
    max_steps=100,
    logging_steps=10,
    eval_steps=30,
    save_steps=90,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    run_name="custom_training",
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()
mlop.finish()