In [1]:
pip install mlflow

Collecting mlflow
  Downloading mlflow-3.1.1-py3-none-any.whl.metadata (29 kB)
Collecting mlflow-skinny==3.1.1 (from mlflow)
  Downloading mlflow_skinny-3.1.1-py3-none-any.whl.metadata (30 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.1.1->mlflow)
  Downloading databricks_sdk-0.57.0-py3-none-any.whl.metadata (39 kB)
Collecting fastapi<1 (from mlflow-skinny==3.1.1->mlflow)
  Downloading fastapi-0.116.0-py3-none-any.whl.metadata (28 kB)
Collecting uvicorn<1 (from mlflow-skinny==3.1.1->mlflow)
  Downloading uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_core-3.2.6-py3-none-any.whl.metadata (11 kB)
Collecting graphql-relay<3.3,>=3.1 (from graphene<4->mlflow)
  Downloadi

In [2]:
import mlflow
from transformers import AutoTokenizer,AutoModelForSequenceClassification,Trainer,TrainingArguments,EarlyStoppingCallback
import torch
from datasets import load_dataset
from datetime import datetime
import os
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
os.environ["HF_DATASETS_CACHE"] = "./hf_cache"
os.environ["TRANSFORMERS_CACHE"] = "./transformers_cache"

mlflow.set_experiment("MLflow DistilRoBERTa Sentiment Analysis")

dataset = load_dataset("yelp_review_full")
train_dataset = dataset["train"].shuffle(seed=42).select(range(100000))
val_dataset = dataset["test"].shuffle(seed=42).select(range(25000))

print(f"Train size: {len(train_dataset)}")
print(f"Validation size: {len(val_dataset)}")

base_model_id = "distilroberta-base"
MAX_LENGTH = 512

tokenizer = AutoTokenizer.from_pretrained(base_model_id)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token or tokenizer.unk_token or tokenizer.cls_token

def tokenize_and_pad(sample):
    encodings = tokenizer(
        text=sample["text"],
        truncation=True,
        padding="max_length",
        max_length=MAX_LENGTH
    )
    encodings["labels"] = sample["label"]
    return encodings

train_dataset = train_dataset.map(tokenize_and_pad, batched=True, remove_columns=["text"])
val_dataset = val_dataset.map(tokenize_and_pad, batched=True, remove_columns=["text"])

train_dataset.set_format("torch")
val_dataset.set_format("torch")

model = AutoModelForSequenceClassification.from_pretrained(
    base_model_id,
    num_labels=5
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = torch.tensor(logits).argmax(dim=-1).numpy()
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds, average="weighted"),
        "precision": precision_score(labels, preds, average="weighted"),
        "recall": recall_score(labels, preds, average="weighted"),
    }

training_args = TrainingArguments(
    report_to="mlflow",
    run_name=f"distilroberta-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}",
    output_dir="outputs_distilroberta",
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    optim="adamw_torch",
    fp16=torch.cuda.is_available(),
    learning_rate=2e-5,
    lr_scheduler_type="constant",
    num_train_epochs=5,
    save_total_limit=1,
    save_strategy="epoch",
    logging_steps=100,
    eval_strategy="epoch",
    warmup_steps=100,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss"
)

trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    args=training_args,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)]
)

model.config.use_cache = False

trainer.train()


2025-07-07 19:11:49.367188: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751915509.807924      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751915509.935823      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025/07/07 19:12:08 INFO mlflow.tracking.fluent: Experiment with name 'MLflow DistilRoBERTa Sentiment Analysis' does not exist. Creating a new experiment.


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/299M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/23.5M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/650000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Train size: 100000
Validation size: 25000


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/331M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.8002,0.775328,0.66356,0.662134,0.662465,0.66356
2,0.7367,0.747524,0.67644,0.676818,0.677871,0.67644
3,0.6823,0.752892,0.67744,0.675723,0.674751,0.67744




TrainOutput(global_step=4689, training_loss=0.7537587678973978, metrics={'train_runtime': 9002.4623, 'train_samples_per_second': 55.54, 'train_steps_per_second': 0.868, 'total_flos': 3.9742345728e+16, 'train_loss': 0.7537587678973978, 'epoch': 3.0})