In [2]:
import os
import math
import optuna
import wandb
from optuna.pruners import SuccessiveHalvingPruner
from optuna.exceptions import TrialPruned
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoProcessor,
    Trainer,
    TrainingArguments,
    TrainerCallback
)
from peft import get_peft_model, LoraConfig, TaskType
import torch
import numpy as np
import evaluate
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import EarlyStoppingCallback

# -- 1) Setup HF login and global objects --
load_dotenv("secrets.env")
hf_token = os.getenv("HF_TOKEN")
login(token=hf_token)

MODEL_NAME = "microsoft/Florence-2-base-ft"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)

# Load full dataset
raw_ds = load_dataset("SimulaMet-HOST/Kvasir-VQA")['raw']

# Load metrics
bleu, meteor, rouge = map(evaluate.load, ["bleu", "meteor", "rouge"])

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # decode predictions and labels
    preds = np.argmax(logits[0], axis=-1)
    pred_texts = processor.tokenizer.batch_decode(preds, skip_special_tokens=True)
    label_ids = np.where(labels != -100, labels, processor.tokenizer.pad_token_id)
    label_texts = processor.tokenizer.batch_decode(label_ids, skip_special_tokens=True)
    pred_texts = [p.strip() for p in pred_texts]
    label_texts = [l.strip() for l in label_texts]
    return {"rougeL": rouge.compute(predictions=pred_texts, references=label_texts)["rougeL"]}

# Callback to report metrics to Optuna and prune
class OptunaPruningCallback(TrainerCallback):
    def __init__(self, trial, metric_name: str):
        self.trial = trial
        self.metric_name = metric_name

    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        score = metrics.get(self.metric_name)
        step = state.global_step
        self.trial.report(score, step)
        if self.trial.should_prune():
            raise TrialPruned(f"Trial was pruned at step {step}")
        return control

# Objective for Optuna
def objective(trial):
    # Initialize WandB
    run_name = f"hparam_trial_{trial.number}"
    wandb.init(
        project="florence2_hparam_trial",
        name=run_name,
        reinit=True
    )

    # -- 2) Sample hyperparameters --
    lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
    train_bs = trial.suggest_categorical("per_device_train_batch_size", [2, 4])
    grad_acc = trial.suggest_categorical("gradient_accumulation_steps", [1, 2, 4])
    weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
    lora_r      = trial.suggest_categorical("lora_r", [4, 8, 16])
    lora_alpha  = trial.suggest_categorical("lora_alpha", [8, 16, 32])
    lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)

    # Log hyperparameters to WandB
    wandb.config.update({
        "lr": lr,
        "train_bs": train_bs,
        "grad_acc": grad_acc,
        "weight_decay": weight_decay,
        "lora_r": lora_r,
        "lora_alpha": lora_alpha,
        "lora_dropout": lora_dropout,
    })

    # -- 3) Prepare small train/val split (2.5% coverage) --
    DATASET_COVERAGE = 0.025
    SEED = 7
    TEST_SIZE = 0.1
    
    if DATASET_COVERAGE < 1:
        dataset = raw_ds.train_test_split(test_size=DATASET_COVERAGE, seed=SEED, shuffle=True)
        dataset = dataset['test'].train_test_split(test_size=TEST_SIZE, seed=SEED)
        train_dataset, val_dataset = dataset['train'], dataset['test']
    else:
        dataset = ds.train_test_split(test_size=TEST_SIZE, seed=SEED)
        train_dataset, val_dataset = dataset['train'], dataset['test']
    
    # -- 4) Load model, processor, and apply LoRA --
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
    peft_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "lm_head", "fc2"],
        task_type=TaskType.CAUSAL_LM,
        lora_dropout=lora_dropout,
        bias="none",
        inference_mode=False
    )
    model = get_peft_model(model, peft_config)
    model = model.to(DEVICE)

    # -- 5) Collate function --
    def collate_fn(batch):
        questions = [x['question'] for x in batch]
        images = [x['image'].convert("RGB") if x['image'].mode != "RGB" else x['image'] for x in batch]
        answers = [x['answer'] for x in batch]

        inputs = processor(text=questions, images=images, return_tensors="pt", padding=True)
        labels = processor.tokenizer(answers, return_tensors="pt", padding=True).input_ids
        labels[labels == processor.tokenizer.pad_token_id] = -100
        inputs["labels"] = labels
        return inputs

    # -- 6) Compute steps --
    TRAIN_DATASET_SIZE = len(train_dataset)
    NUM_EPOCHS = 1
    effective_bs = train_bs * grad_acc
    steps_per_epoch = math.ceil(TRAIN_DATASET_SIZE / effective_bs)
    max_steps = steps_per_epoch * NUM_EPOCHS

    # -- 7) TrainingArguments with pruning --
    training_args = TrainingArguments(
        output_dir=run_name,
        per_device_train_batch_size=train_bs,
        gradient_accumulation_steps=grad_acc,
        per_device_eval_batch_size=train_bs,
        eval_accumulation_steps=1,
        max_steps=max_steps,
        learning_rate=lr,
        weight_decay=weight_decay,
        logging_steps=10,
        save_strategy="no",
        # save_steps=steps_per_epoch,
        eval_strategy="steps",
        eval_steps=steps_per_epoch,
        report_to=["wandb"],
        # load_best_model_at_end=True,
        metric_for_best_model="rougeL",
        greater_is_better=True,
        fp16=True,
        remove_unused_columns=False,
    )

    pruning_cb = OptunaPruningCallback(trial, metric_name="eval_rougeL")
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=collate_fn,
        compute_metrics=compute_metrics,
        callbacks=[pruning_cb, EarlyStoppingCallback(early_stopping_patience=2)]
    )

    # -- 8) Train & evaluate --
    trainer.train()
    results = trainer.evaluate()
    wandb.finish()

    return results["eval_rougeL"]

# -- 9) Run Optuna study --
pruner = SuccessiveHalvingPruner()
study = optuna.create_study(
    direction="maximize",
    pruner=pruner,
    sampler=optuna.samplers.TPESampler(seed=42)
)
study.optimize(objective, n_trials=50)

print("Best trial:")
print(study.best_trial)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


Resolving data files:   0%|          | 0/31 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/31 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/30 [00:00<?, ?it/s]

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[I 2025-05-17 08:54:30,023] A new study created in memory with name: no-name-76498728-bf5a-4c51-952f-1d81b0e27549


0,1
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇███
train/grad_norm,▂▂▂▃▃▅▃▂▄▅▃▃▂▄▂▃▅▃▄▄▄▂▄█▂▅▃▁▂▁▄▄▄▅▃▁▃▆▄▄
train/learning_rate,█████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train/loss,▇▄▅▅█▅▆▅▇▆▄▄▅▄▃▃▇▅▅▆▃▄▂▂▃▄▅▂▅▁▅▅▆▅▃▄▅▄▇▃

0,1
train/epoch,0.99698
train/global_step,660.0
train/grad_norm,19.8411
train/learning_rate,0.0
train/loss,1.8912


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
662,1.8844,2.401741,0.284204




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,▂▂▁▁▂▂▄▂▃▂▆▄▃▄▄▆▅▃▃▇▄▅█▄▁▆▅▁▂▃▅▄▄▆▃▁▄▇▅▅
train/learning_rate,█████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁
train/loss,▄▃▅▄▆▅▇▇▆▇▇▄▅▃▄▂▇▄▂▃▅▁▅▃▄▃▅▅▂▄▄▆▄▆▅▁▄▆█▃

0,1
eval/loss,2.40174
eval/rougeL,0.2842
eval/runtime,24.4172
eval/samples_per_second,6.061
eval/steps_per_second,3.031
total_flos,23362239693936.0
train/epoch,1.0
train/global_step,662.0
train/grad_norm,18.33407
train/learning_rate,0.0


[I 2025-05-17 09:00:16,182] Trial 0 finished with value: 0.28420393001275346 and parameters: {'learning_rate': 5.611516415334504e-06, 'per_device_train_batch_size': 2, 'gradient_accumulation_steps': 1, 'weight_decay': 0.005808361216819946, 'lora_r': 4, 'lora_alpha': 16, 'lora_dropout': 0.06370173320348284}. Best is trial 0 with value: 0.28420393001275346.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
331,2.4524,2.989171,0.117415




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▂▅█▁▄▅▂▃▆▄▇▃▅▁▃▅▃▆█▄▃▂▂▅▁▃▅▅▄▂▅▄▄
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,▅▃▅▅▄▆▆▄▅▃▄▂▅▂▅▅▃▅▃▁▅▁▃▄▄▁▇▇▃▅▅█▃

0,1
eval/loss,2.98917
eval/rougeL,0.11742
eval/runtime,17.378
eval/samples_per_second,8.516
eval/steps_per_second,2.129
total_flos,26495539862400.0
train/epoch,1.0
train/global_step,331.0
train/grad_norm,4.73576
train/learning_rate,0.0


[I 2025-05-17 09:05:23,421] Trial 1 finished with value: 0.117415161900456 and parameters: {'learning_rate': 2.310201887845294e-06, 'per_device_train_batch_size': 4, 'gradient_accumulation_steps': 1, 'weight_decay': 0.06118528947223795, 'lora_r': 16, 'lora_alpha': 16, 'lora_dropout': 0.15427033152408348}. Best is trial 0 with value: 0.28420393001275346.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 09:10:06,496] Trial 2 pruned. Trial was pruned at step 83


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▂▃▄▅▆▇██
train/global_step,▁▂▃▄▅▆▇██
train/grad_norm,▁▄▄▅▇▃█▇
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,▆█▃▆▁▁▆█

0,1
eval/loss,2.93404
eval/rougeL,0.11448
eval/runtime,17.3426
eval/samples_per_second,8.534
eval/steps_per_second,2.133
train/epoch,1.0
train/global_step,83.0
train/grad_norm,3.35293
train/learning_rate,0.0
train/loss,2.993


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
662,2.2905,2.915723,0.146858




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇███
train/grad_norm,▄▄▄▃▄▄▄▄▃▃▅▅▅█▃▄▆▆▇▇▄▇█▃▅▁▆▄▂▄▅▅▆▅▇▄▅▅▅▅
train/learning_rate,████▇▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▁
train/loss,▂▂▃▅█▄▇▃▅▃▅▂▇▃▂▅▆▁▇▅▁▄▅▁▂▄▄▅▂▅▆▆▆▄▂▅▄▇█▃

0,1
eval/loss,2.91572
eval/rougeL,0.14686
eval/runtime,23.6244
eval/samples_per_second,6.265
eval/steps_per_second,3.132
total_flos,23459936509152.0
train/epoch,1.0
train/global_step,662.0
train/grad_norm,5.29441
train/learning_rate,0.0


[I 2025-05-17 09:15:56,483] Trial 3 finished with value: 0.14685844649079938 and parameters: {'learning_rate': 1.1715937392307063e-06, 'per_device_train_batch_size': 2, 'gradient_accumulation_steps': 1, 'weight_decay': 0.054671027934327966, 'lora_r': 8, 'lora_alpha': 8, 'lora_dropout': 0.27656227050693505}. Best is trial 0 with value: 0.28420393001275346.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
331,2.2824,2.896617,0.146858




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▂▅▄▂▃▁▂▄▅▆▄▂▅▂▂▅▄▃█▄▄▄▄▃▁▁▅▃▄▃▄▄▄
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,▅▃▄▆▄▅▆▅▅▄▄▂▅▂▆▆▄▆▄▂▆▂▃▄▄▁▆▇▃▅▅█▃

0,1
eval/loss,2.89662
eval/rougeL,0.14686
eval/runtime,23.5076
eval/samples_per_second,6.296
eval/steps_per_second,3.148
total_flos,23655330139584.0
train/epoch,1.0
train/global_step,331.0
train/grad_norm,4.96036
train/learning_rate,0.0


[I 2025-05-17 09:21:36,568] Trial 4 finished with value: 0.14685844649079938 and parameters: {'learning_rate': 1.5030900645056814e-06, 'per_device_train_batch_size': 2, 'gradient_accumulation_steps': 2, 'weight_decay': 0.08287375091519295, 'lora_r': 16, 'lora_alpha': 16, 'lora_dropout': 0.2960660809801552}. Best is trial 0 with value: 0.28420393001275346.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
662,1.1767,1.365281,0.477004




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇█████
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▁▂▁▂▃▃▄▂▃▄▃▅▅▄▇▅▄▇█▆▁▆▃▂▇▃▄█▄▃▂▁▂█▅▅▄▂▅█
train/learning_rate,████▇▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅▅▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁
train/loss,█▅▅▆▅▅▅▅▆▅▃▄▄▅▃▅▃▂▂▅▃▃▂▃▂▂▃▃▃▃▃▁▃▃▃▂▂▄▄▂

0,1
eval/loss,1.36528
eval/rougeL,0.477
eval/runtime,23.6109
eval/samples_per_second,6.268
eval/steps_per_second,3.134
total_flos,23459936509152.0
train/epoch,1.0
train/global_step,662.0
train/grad_norm,13.58626
train/learning_rate,0.0


[I 2025-05-17 09:27:16,317] Trial 5 finished with value: 0.47700376928318106 and parameters: {'learning_rate': 3.50339849115869e-05, 'per_device_train_batch_size': 2, 'gradient_accumulation_steps': 1, 'weight_decay': 0.07712703466859458, 'lora_r': 8, 'lora_alpha': 8, 'lora_dropout': 0.01906750508580709}. Best is trial 5 with value: 0.47700376928318106.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 09:32:05,189] Trial 6 pruned. Trial was pruned at step 166


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▆▇▇██
train/global_step,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
train/grad_norm,▂▁█▄▂▄▁▅▇█▃▆▇▅▃▄
train/learning_rate,██▇▇▆▆▅▅▄▄▃▃▂▂▁▁
train/loss,▃▅▅▅▄▂▃▆▄▁▂▃▁█▄▇

0,1
eval/loss,3.03688
eval/rougeL,0.11835
eval/runtime,18.2387
eval/samples_per_second,8.115
eval/steps_per_second,2.029
train/epoch,1.0
train/global_step,166.0
train/grad_norm,2.44002
train/learning_rate,0.0
train/loss,3.3579


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 09:37:18,977] Trial 7 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
train/grad_norm,▃▃▃▄▃▃▃▅▆▄▄▄▅▂▃▅▇▆▃▄▇▂▄█▅▄▅▃▁▃▅▄▃▅▄▂▃▄▄▄
train/learning_rate,████▇▇▇▇▆▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▁▁▁▁
train/loss,▇▃▂▅█▄▆▅▅▆▇▃▅▃▄▃▇▃▃▅▆▁▇▄▁▅▁▂▃▄▂▆▅▆▆▂▄▅▇▃

0,1
eval/loss,2.81325
eval/rougeL,0.15787
eval/runtime,23.0612
eval/samples_per_second,6.418
eval/steps_per_second,3.209
train/epoch,1.0
train/global_step,662.0
train/grad_norm,12.84392
train/learning_rate,0.0
train/loss,2.2155


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
662,0.8402,0.849291,0.625078




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▁▁▁▂▂▂▁▂▂▃▃▃▃▅▄▆▆▄▅▃▇▆▅▅▄▄▄█▂▂▁▆▅▄▄▄▂█▆▆
train/learning_rate,█████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▁▁▁
train/loss,█▄▅▅█▄▅▅▄▅▄▄▃▃▃▂▄▂▂▁▂▁▄▃▂▁▃▁▃▂▂▂▁▂▂▂▃▃▁▅

0,1
eval/loss,0.84929
eval/rougeL,0.62508
eval/runtime,24.9224
eval/samples_per_second,5.938
eval/steps_per_second,2.969
total_flos,23655330139584.0
train/epoch,1.0
train/global_step,662.0
train/grad_norm,12.12008
train/learning_rate,0.0


[I 2025-05-17 09:43:12,173] Trial 8 finished with value: 0.625078177651707 and parameters: {'learning_rate': 7.234279845665421e-05, 'per_device_train_batch_size': 2, 'gradient_accumulation_steps': 1, 'weight_decay': 0.08925589984899779, 'lora_r': 16, 'lora_alpha': 8, 'lora_dropout': 0.1281323365878769}. Best is trial 8 with value: 0.625078177651707.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 09:48:37,969] Trial 9 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▂▂▂▃▂▃▃▂▂▃▂▃▃▄▅▄▁▆▄▆▁▄▃▂▃▅▄█▂▁▂▆▃▃▄▃▃█▆
train/learning_rate,████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁
train/loss,█▅▆█▄▅▅▄▄▃▃▄▁▄▂▃▃▃▂▁▃▂▂▂▁▂▂▂▁▂▁▂▂▂▂▃▂▁▂▁

0,1
eval/loss,0.94476
eval/rougeL,0.60895
eval/runtime,25.5849
eval/samples_per_second,5.785
eval/steps_per_second,2.892
train/epoch,1.0
train/global_step,662.0
train/grad_norm,23.66553
train/learning_rate,0.0
train/loss,0.9581


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 09:53:18,188] Trial 10 pruned. Trial was pruned at step 83


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▂▃▄▅▆▇██
train/global_step,▁▂▃▄▅▆▇██
train/grad_norm,▃▆▁▅▆▄▅█
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▆▃▃▂▂▁▂

0,1
eval/loss,1.14739
eval/rougeL,0.56187
eval/runtime,17.781
eval/samples_per_second,8.323
eval/steps_per_second,2.081
train/epoch,1.0
train/global_step,83.0
train/grad_norm,6.20321
train/learning_rate,1e-05
train/loss,1.3201


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 09:58:37,465] Trial 11 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▂▁ ▂▂▃▄▂▃▄▅▅▄▆▄▇▅▇▅▅▆▂▂▂▇▆▃▄▅▇▂▁▃▇▆▅▂▆██
train/learning_rate,███▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▁▁▁▁
train/loss,█▄▄▅█▅▅▄▆▃▃▃▄▂▂▂▁▄▃▄▁▃▂▂▁▃▃▁▂▂▁▂▁▃▂▂▁▃▂▂

0,1
eval/loss,1.51672
eval/rougeL,0.4655
eval/runtime,25.3268
eval/samples_per_second,5.844
eval/steps_per_second,2.922
train/epoch,1.0
train/global_step,662.0
train/grad_norm,17.81431
train/learning_rate,0.0
train/loss,1.2522


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
662,0.6677,0.687185,0.691592




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇███
train/global_step,▁▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▁▁▂▁▂▂▂▁▂▃▃▃▃▄▃▅▃▆▆▃▄▂▄▁▅▂▄▃▂▂▆▄▄▃▂▁█▆▇▆
train/learning_rate,█████▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
train/loss,█▅▄▅▄▃▄▄▃▄▄▃▃▃▄▁▄▂▁▃▁▃▂▂▂▂▂▃▂▂▂▁▂▂▂▂▂▃▁▄

0,1
eval/loss,0.68719
eval/rougeL,0.69159
eval/runtime,25.34
eval/samples_per_second,5.841
eval/steps_per_second,2.92
total_flos,23655330139584.0
train/epoch,1.0
train/global_step,662.0
train/grad_norm,12.76628
train/learning_rate,0.0


[I 2025-05-17 10:04:38,459] Trial 12 finished with value: 0.6915921386509621 and parameters: {'learning_rate': 9.653752916643676e-05, 'per_device_train_batch_size': 2, 'gradient_accumulation_steps': 1, 'weight_decay': 0.07569703666274985, 'lora_r': 16, 'lora_alpha': 8, 'lora_dropout': 0.08804517663760333}. Best is trial 12 with value: 0.6915921386509621.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 10:09:58,455] Trial 13 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇██
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
train/grad_norm,▁▁▁▂▂▂▂▂▂▄▃▃▃▄▃▅▅▄▆▆▆▇▃▅▅▂▂▅▄█▂▁▁▆▅▂▄▃█▅
train/learning_rate,████▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
train/loss,▅▅▅▆█▄▄▄▃▃▃▂▄▂▂▃▂▁▃▃▂▂▁▂▁▃▂▂▁▁▂▂▂▂▂▂▂▂▄▂

0,1
eval/loss,0.67477
eval/rougeL,0.68407
eval/runtime,24.2487
eval/samples_per_second,6.103
eval/steps_per_second,3.052
train/epoch,1.0
train/global_step,662.0
train/grad_norm,14.71365
train/learning_rate,0.0
train/loss,0.64


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 10:15:16,327] Trial 14 pruned. Trial was pruned at step 166


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▆▇▇██
train/global_step,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
train/grad_norm,▁▁▁█▃▃▆▄▅▃▂▄▂▃▄▇
train/learning_rate,██▇▇▆▆▅▅▄▄▃▃▂▂▁▁
train/loss,▇█▇▇▆▄▄▆▄▂▃▄▁▄▃▅

0,1
eval/loss,1.84643
eval/rougeL,0.36919
eval/runtime,24.1479
eval/samples_per_second,6.129
eval/steps_per_second,3.064
train/epoch,1.0
train/global_step,166.0
train/grad_norm,3.54781
train/learning_rate,0.0
train/loss,2.0427


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 10:20:45,804] Trial 15 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▁▃▃▂▃▁▃▄▄ ▃▂▅▃▆▆▄▆█▅▅▄▆▅▂▃▅▅▆▃▆▇▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,▇▅▆█▆▇█▆▇▅▆▃▆▃▇▇▄▆▄▂▆▂▄▅▃▁▆▆▃▅▅█▃

0,1
eval/loss,2.33735
eval/rougeL,0.30188
eval/runtime,24.1756
eval/samples_per_second,6.122
eval/steps_per_second,3.061
train/epoch,1.0
train/global_step,331.0
train/grad_norm,4.02642
train/learning_rate,0.0
train/loss,1.7631


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
662,0.4586,0.49281,0.700363




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▃▂▃▃▃▃▃▃▄▃▄▄▆▄▃▆▅▇▇▄▅▅▃▂▅▄▄█▄▃▆▄▄▁▄▂▂▆▇▆
train/learning_rate,████▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
train/loss,█▅▄▅▇▄▄▄▃▄▄▃▃▂▃▃▃▂▁▂▃▂▁▃▂▂▁▂▁▃▂▂▂▁▂▂▂▂▁▁

0,1
eval/loss,0.49281
eval/rougeL,0.70036
eval/runtime,25.4701
eval/samples_per_second,5.811
eval/steps_per_second,2.905
total_flos,23655330139584.0
train/epoch,1.0
train/global_step,662.0
train/grad_norm,26.73811
train/learning_rate,0.0


[I 2025-05-17 10:26:44,748] Trial 16 finished with value: 0.700363424995778 and parameters: {'learning_rate': 6.593100555844184e-05, 'per_device_train_batch_size': 2, 'gradient_accumulation_steps': 1, 'weight_decay': 0.042558946436382895, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.1987551295450799}. Best is trial 16 with value: 0.700363424995778.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 10:32:05,841] Trial 17 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇█
train/global_step,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█
train/grad_norm,▂▃▂▂▃▃▃▃▂▃▃▃▄▅▄▃▇▄▆█▅▁▅▃▂▆▅▅▄█▂▁█▆▅▂▄▇██
train/learning_rate,███▇▇▇▇▇▇▇▆▆▆▆▆▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▁▁▁▁
train/loss,▅█▄▅▅▅▄▅▅▃▄▁▄▂▁▃▃▄▂▁▃▂▂▁▂▁▃▂▃▁▂▂▂▂▁▂▁▂▅▂

0,1
eval/loss,1.21358
eval/rougeL,0.51352
eval/runtime,23.7118
eval/samples_per_second,6.242
eval/steps_per_second,3.121
train/epoch,1.0
train/global_step,662.0
train/grad_norm,59.9896
train/learning_rate,0.0
train/loss,1.0797


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 10:36:39,084] Trial 18 pruned. Trial was pruned at step 166


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▆▇▇██
train/global_step,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
train/grad_norm,▄▁▆▆▄▄▃▄██▄▂▅▆▄▆
train/learning_rate,██▇▇▆▆▅▅▄▄▃▃▂▂▁▁
train/loss,▆█▇▇▆▄▄▇▄▁▃▃▁█▄▇

0,1
eval/loss,2.59606
eval/rougeL,0.17859
eval/runtime,18.8332
eval/samples_per_second,7.858
eval/steps_per_second,1.965
train/epoch,1.0
train/global_step,166.0
train/grad_norm,7.34823
train/learning_rate,0.0
train/loss,2.9044


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 10:41:46,198] Trial 19 pruned. Trial was pruned at step 166


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▆▇▇██
train/global_step,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
train/grad_norm,▃▂▃▃▁▁▃▃▃▃▃▄▁▄▅█
train/learning_rate,██▇▇▆▆▅▅▄▄▃▃▂▂▁▁
train/loss,██▆▆▅▄▃▄▃▂▃▃▁▂▂▃

0,1
eval/loss,1.02506
eval/rougeL,0.58351
eval/runtime,24.5861
eval/samples_per_second,6.02
eval/steps_per_second,3.01
train/epoch,1.0
train/global_step,166.0
train/grad_norm,11.82874
train/learning_rate,0.0
train/loss,1.2372


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 10:47:11,942] Trial 20 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▇▇▇▇▇▇▇██
train/grad_norm,▃▂▃▃▂▃▃▃▂▂▄▄▆▆▃▃▆▇▇▄▅▄▂▅▄▃▄▇▃▃▂▁▆▅▃▃█▇▆▇
train/learning_rate,██▇▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
train/loss,▅▅▆█▅▅▄▃▅▃▂▅▂▁▄▂▄▃▂▃▁▃▃▂▃▃▃▂▁▁▂▃▂▂▂▃▂▂▅▂

0,1
eval/loss,0.89735
eval/rougeL,0.64651
eval/runtime,24.5577
eval/samples_per_second,6.027
eval/steps_per_second,3.013
train/epoch,1.0
train/global_step,662.0
train/grad_norm,55.63017
train/learning_rate,0.0
train/loss,0.8637


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 10:52:30,519] Trial 21 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇███
train/grad_norm,▁▁▂▂▂▃▂▃▂▃▅▄▃▆▆▅▃▇▇▆▅▄▃▂▆▃▄▄█▂▂▁▅▄▃▄▂█▇▆
train/learning_rate,███▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▁▁▁▁
train/loss,█▅▅▆█▄▅▄▅▄▃▄▃▃▂▁▃▃▃▁▂▂▁▃▁▂▃▂▂▁▂▂▂▂▂▂▁▂▅▂

0,1
eval/loss,0.84533
eval/rougeL,0.629
eval/runtime,23.7762
eval/samples_per_second,6.225
eval/steps_per_second,3.112
train/epoch,1.0
train/global_step,662.0
train/grad_norm,12.33494
train/learning_rate,0.0
train/loss,0.8334


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 10:57:48,266] Trial 22 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,▁▂▃▂▂▂▃▂▂▃▃▅▄▆▅▁▄▃▇▆▂▅▄▂▅▂▄█▂▂▂▁▁▆▅▃█▇▇▆
train/learning_rate,████▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train/loss,█▅▅▅█▄▄▅▄▅▄▃▃▄▂▄▂▁▃▃▂▁▄▃▂▂▃▁▃▂▁▂▁▂▂▃▃▂▁▅

0,1
eval/loss,0.89273
eval/rougeL,0.61744
eval/runtime,24.4341
eval/samples_per_second,6.057
eval/steps_per_second,3.029
train/epoch,1.0
train/global_step,662.0
train/grad_norm,11.63495
train/learning_rate,0.0
train/loss,0.8665


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 11:03:11,152] Trial 23 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▂▂▃▃▃▂▂▂▃▃▃▃▄▄▅▃▆▁▅▄█▆▅▅▃▄▂▄█▂▃▁▇▅▃▄▂▃█▇
train/learning_rate,████▇▇▇▆▆▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
train/loss,█▅▅▅▆▄▅▅▄▅▃▃▃▄▃▅▂▂▁▄▃▄▃▂▃▂▃▂▃▃▂▁▁▃▂▂▂▂▃▂

0,1
eval/loss,1.07127
eval/rougeL,0.54288
eval/runtime,24.0894
eval/samples_per_second,6.144
eval/steps_per_second,3.072
train/epoch,1.0
train/global_step,662.0
train/grad_norm,22.92767
train/learning_rate,0.0
train/loss,1.0191


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 11:08:29,634] Trial 24 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▁▂▂▂▃▂▃▃▂▃▃▃▄▄▃▅▄▆▅▅▅▃▅▄▅▄▇▂▂▃▁▇▅▄▃▅▄█▇▆
train/learning_rate,█████▇▇▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁
train/loss,█▅▆█▄▅▅▅▅▃▃▃▄▄▃▂▁▃▃▄▄▃▂▂▂▃▃▃▃▂▁▁▃▂▂▃▄▃▂▂

0,1
eval/loss,1.15395
eval/rougeL,0.53136
eval/runtime,24.9074
eval/samples_per_second,5.942
eval/steps_per_second,2.971
train/epoch,1.0
train/global_step,662.0
train/grad_norm,10.20439
train/learning_rate,0.0
train/loss,1.0454


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
662,0.3883,0.426198,0.722275




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇██████
train/grad_norm,▃▃▁▃▃▃▃▄▃▂▃▄▃▅▃▄▇▄▆▇▃▆█▇▁▁▄▂▃█▄▃▃▆▄▁█▆▇▆
train/learning_rate,█████▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
train/loss,█▅▄▃▃▃▃▂▂▂▃▂▁▂▁▂▂▂▂▁▂▁▂▂▁▂▂▁▂▁▂▁▂▂▂▂▂▁▁▁

0,1
eval/loss,0.4262
eval/rougeL,0.72228
eval/runtime,24.5796
eval/samples_per_second,6.021
eval/steps_per_second,3.011
total_flos,23655330139584.0
train/epoch,1.0
train/global_step,662.0
train/grad_norm,26.72816
train/learning_rate,0.0


[I 2025-05-17 11:14:17,186] Trial 25 finished with value: 0.7222753035253036 and parameters: {'learning_rate': 9.581302406749229e-05, 'per_device_train_batch_size': 2, 'gradient_accumulation_steps': 1, 'weight_decay': 0.06651142424918541, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.13983398787788434}. Best is trial 25 with value: 0.7222753035253036.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
331,0.4602,0.50871,0.701283




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▁▂▃▁▃▁▄▂▆▃▇▄▇▂▅▆▆▄▆▄▂█▇▃▂▃▄▄▇▁█▇▇
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▆▅▄▄▄▃▃▂▃▁▃▁▂▃▂▂▂▁▂▂▂▂▂▁▂▁▁▂▂▂▁

0,1
eval/loss,0.50871
eval/rougeL,0.70128
eval/runtime,17.4197
eval/samples_per_second,8.496
eval/steps_per_second,2.124
total_flos,26495539862400.0
train/epoch,1.0
train/global_step,331.0
train/grad_norm,18.32772
train/learning_rate,0.0


[I 2025-05-17 11:19:17,923] Trial 26 finished with value: 0.701282906613789 and parameters: {'learning_rate': 9.328221965821797e-05, 'per_device_train_batch_size': 4, 'gradient_accumulation_steps': 1, 'weight_decay': 0.06536815457058985, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.049303095757850704}. Best is trial 25 with value: 0.7222753035253036.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 11:24:07,714] Trial 27 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▂▄▁▃▂▂▂▄▂▄▂▄▁▃▄▄▃▆▃▃▆▆▂▂▄▄▃█▃▆▆▅
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▇▆▅▅▅▄▅▃▄▂▄▁▃▃▂▄▂▁▃▂▂▃▂▁▃▂▂▃▂▄▂

0,1
eval/loss,0.99683
eval/rougeL,0.59716
eval/runtime,17.7962
eval/samples_per_second,8.316
eval/steps_per_second,2.079
train/epoch,1.0
train/global_step,331.0
train/grad_norm,14.65456
train/learning_rate,0.0
train/loss,0.8647


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 11:28:59,794] Trial 28 pruned. Trial was pruned at step 83


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▂▃▄▅▆▇██
train/global_step,▁▂▃▄▅▆▇██
train/grad_norm,▄█▁▆▃▃▄▆
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,█▇▄▃▁▂▁▂

0,1
eval/loss,1.56871
eval/rougeL,0.39408
eval/runtime,18.6733
eval/samples_per_second,7.926
eval/steps_per_second,1.981
train/epoch,1.0
train/global_step,83.0
train/grad_norm,10.46507
train/learning_rate,0.0
train/loss,1.6933


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 11:33:43,138] Trial 29 pruned. Trial was pruned at step 166


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▆▇▇██
train/global_step,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
train/grad_norm,▃▁█▇▃▂▃▆█▇▂▂▃▇▃▆
train/learning_rate,██▇▇▆▆▅▅▄▄▃▃▂▂▁▁
train/loss,▆▇▇▇▆▃▄▇▄▁▃▃▁█▄▇

0,1
eval/loss,2.6317
eval/rougeL,0.17135
eval/runtime,17.5033
eval/samples_per_second,8.456
eval/steps_per_second,2.114
train/epoch,1.0
train/global_step,166.0
train/grad_norm,7.16964
train/learning_rate,0.0
train/loss,2.9416


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 11:38:29,966] Trial 30 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▁▄█▂▄▃▃▂▅▃▇▂▄▁▃▄▄▇▇▄▃▄▅▃▃▂▅▅▅▃█▅▄
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,▆▄▆▆▅▇▇▅▆▄▅▂▆▂▆▆▃▆▃▂▅▂▃▄▄▁▇▇▃▅▄█▃

0,1
eval/loss,2.6794
eval/rougeL,0.16403
eval/runtime,19.106
eval/samples_per_second,7.746
eval/steps_per_second,1.937
train/epoch,1.0
train/global_step,331.0
train/grad_norm,18.04598
train/learning_rate,0.0
train/loss,2.1813


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
662,0.3774,0.42534,0.729011




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████
train/grad_norm,▃▂▂▂▃▂▂▃▃▄▅▃▃▅▅▃▅▃▅▅▃▄▃▃▁▁▃▂▆▁▂▃▃▃▁▃█▅▇▅
train/learning_rate,████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁
train/loss,█▅▄▄▄▃▃▄▃▃▂▂▂▃▁▂▁▂▂▂▂▂▂▂▁▂▂▂▁▂▁▂▂▂▂▂▁▁▃▁

0,1
eval/loss,0.42534
eval/rougeL,0.72901
eval/runtime,25.8509
eval/samples_per_second,5.725
eval/steps_per_second,2.863
total_flos,23655330139584.0
train/epoch,1.0
train/global_step,662.0
train/grad_norm,26.29254
train/learning_rate,0.0


[I 2025-05-17 11:44:26,370] Trial 31 finished with value: 0.7290111782758841 and parameters: {'learning_rate': 9.596461243144801e-05, 'per_device_train_batch_size': 2, 'gradient_accumulation_steps': 1, 'weight_decay': 0.07115933746212984, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.054778160858588026}. Best is trial 31 with value: 0.7290111782758841.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
331,0.5675,0.616216,0.714602




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,▁█
eval/samples_per_second,█▁
eval/steps_per_second,█▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▁▂▆▁▃▁▃▂▅▃▆▃▆▁▄▅▅▄▇▄▂█▇▃▂▃▅▄▇▂█▇▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▆▅▄▄▄▃▄▃▃▂▃▁▃▃▂▃▂▁▂▂▂▂▂▁▂▁▁▃▂▃▁

0,1
eval/loss,0.61622
eval/rougeL,0.7146
eval/runtime,17.6185
eval/samples_per_second,8.4
eval/steps_per_second,2.1
total_flos,26495539862400.0
train/epoch,1.0
train/global_step,331.0
train/grad_norm,17.60512
train/learning_rate,0.0


[I 2025-05-17 11:49:34,781] Trial 32 finished with value: 0.7146017320649674 and parameters: {'learning_rate': 7.094134626491238e-05, 'per_device_train_batch_size': 4, 'gradient_accumulation_steps': 1, 'weight_decay': 0.04267706493188495, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.003903592764484154}. Best is trial 31 with value: 0.7290111782758841.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 11:54:20,707] Trial 33 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▁▂▃▁▂▁▃▁▅▂▆▃▆▁▅▅▃▄▆▃▁██▂▁▃▄▄▇▁▇▇▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▆▅▄▄▄▃▄▃▃▁▃▁▂▃▂▃▂▁▂▂▂▂▂▁▂▁▁▂▁▃▁

0,1
eval/loss,0.59227
eval/rougeL,0.71213
eval/runtime,18.2869
eval/samples_per_second,8.093
eval/steps_per_second,2.023
train/epoch,1.0
train/global_step,331.0
train/grad_norm,17.31422
train/learning_rate,0.0
train/loss,0.5181


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 11:59:16,377] Trial 34 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▁▂▃▁▂▁▃▁▅▂▆▃▅▂▅▅▄▄▆▄▂██▃▂▄▅▄▇▂█▇▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▆▆▄▄▄▃▄▃▄▂▃▁▃▃▂▃▂▁▂▂▂▂▂▁▂▂▂▃▂▃▁

0,1
eval/loss,0.68326
eval/rougeL,0.69078
eval/runtime,18.3465
eval/samples_per_second,8.067
eval/steps_per_second,2.017
train/epoch,1.0
train/global_step,331.0
train/grad_norm,10.74683
train/learning_rate,0.0
train/loss,0.6017


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 12:04:13,741] Trial 35 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▂▃▁▃▁▂▂▄▂▅▂▅▁▄▅▄▄▇▃▃██▂▂▅▅▅▇▃▇▆▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▇▆▅▅▅▄▄▃▄▂▃▁▃▃▂▃▂▁▂▂▂▂▂▁▂▂▂▃▂▃▂

0,1
eval/loss,0.78923
eval/rougeL,0.66505
eval/runtime,17.6276
eval/samples_per_second,8.396
eval/steps_per_second,2.099
train/epoch,1.0
train/global_step,331.0
train/grad_norm,14.91206
train/learning_rate,0.0
train/loss,0.7224


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 12:09:12,226] Trial 36 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▃▅▂▄▂▂▃▅▄▆▁▅▁▄▄▄▅▃▄▂▅▆▂▁▂▆▄▇▂█▆▅
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▇██▆▇▇▅▆▄▅▃▅▂▅▅▂▅▃▂▄▃▃▄▃▁▅▄▂▅▃▆▂

0,1
eval/loss,1.69672
eval/rougeL,0.30848
eval/runtime,18.7756
eval/samples_per_second,7.883
eval/steps_per_second,1.971
train/epoch,1.0
train/global_step,331.0
train/grad_norm,11.59246
train/learning_rate,0.0
train/loss,1.3955


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 12:14:00,821] Trial 37 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▃▄▂▃▂▂▃▅▃▅▁▅▂▄▅▄▄▅▅▃▇▆▃▂▄▆▄█▄█▇▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▇█▇▆▆▆▄▅▃▅▂▄▂▄▄▂▄▂▂▃▃▃▃▂▁▃▂▂▄▂▅▂

0,1
eval/loss,1.31968
eval/rougeL,0.46217
eval/runtime,18.8375
eval/samples_per_second,7.857
eval/steps_per_second,1.964
train/epoch,1.0
train/global_step,331.0
train/grad_norm,13.89428
train/learning_rate,0.0
train/loss,1.1104


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 12:18:46,816] Trial 38 pruned. Trial was pruned at step 83


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▂▃▄▅▆▇██
train/global_step,▁▂▃▄▅▆▇██
train/grad_norm,▂▄▅▅▇▁█▆
train/learning_rate,█▇▆▅▄▃▂▁
train/loss,▅▇▃▅▁▁▆█

0,1
eval/loss,3.02649
eval/rougeL,0.11293
eval/runtime,17.8862
eval/samples_per_second,8.275
eval/steps_per_second,2.069
train/epoch,1.0
train/global_step,83.0
train/grad_norm,5.30007
train/learning_rate,0.0
train/loss,3.0855


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
331,0.5745,0.605809,0.725558




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▂▂▂▁▂▁▃▁▄▂▅▃▅▂▅▄▄▄▄▄▂██▃▂▃▄▄▇▂█▇▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▆▅▄▄▄▃▄▃▃▁▃▁▃▃▂▃▂▁▂▂▂▂▂▁▂▂▁▂▁▃▁

0,1
eval/loss,0.60581
eval/rougeL,0.72556
eval/runtime,18.5581
eval/samples_per_second,7.975
eval/steps_per_second,1.994
total_flos,26167259109600.0
train/epoch,1.0
train/global_step,331.0
train/grad_norm,35.1296
train/learning_rate,0.0


[I 2025-05-17 12:23:54,106] Trial 39 finished with value: 0.7255577639768817 and parameters: {'learning_rate': 7.908136490509011e-05, 'per_device_train_batch_size': 4, 'gradient_accumulation_steps': 1, 'weight_decay': 0.0827533116653657, 'lora_r': 4, 'lora_alpha': 32, 'lora_dropout': 0.0797229015275476}. Best is trial 31 with value: 0.7290111782758841.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 12:28:34,739] Trial 40 pruned. Trial was pruned at step 166


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▂▂▃▃▄▄▅▅▆▆▆▇▇██
train/global_step,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
train/grad_norm,▃▁▅▆▂▂▂▅▆▃▂▆▄▂▅█
train/learning_rate,██▇▇▆▆▅▅▄▄▃▃▂▂▁▁
train/loss,▇█▇▇▅▄▃▅▃▂▃▃▁▄▃▄

0,1
eval/loss,1.74923
eval/rougeL,0.31878
eval/runtime,18.5529
eval/samples_per_second,7.977
eval/steps_per_second,1.994
train/epoch,1.0
train/global_step,166.0
train/grad_norm,18.05854
train/learning_rate,0.0
train/loss,2.0111


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 12:33:07,432] Trial 41 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▂▂▁▂▁▃▁▄▂▅▃▅▂▅▅▄▄▄▄▁██▃▂▃▄▄▇▂█▇▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▆▅▄▄▄▃▄▃▃▁▃▁▃▃▂▃▂▁▂▂▂▂▂▁▂▂▁▂▂▃▁

0,1
eval/loss,0.60676
eval/rougeL,0.71788
eval/runtime,18.5042
eval/samples_per_second,7.998
eval/steps_per_second,2.0
train/epoch,1.0
train/global_step,331.0
train/grad_norm,36.21384
train/learning_rate,0.0
train/loss,0.5648


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss,Rougel
331,0.5513,0.590708,0.720112




0,1
eval/loss,▁▁
eval/rougeL,▁▁
eval/runtime,█▁
eval/samples_per_second,▁█
eval/steps_per_second,▁█
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇██████
train/grad_norm,▂▂▂▁▂▁▃▁▄▂▅▄▅▂▅▅▄▄▅▄▁██▃▂▃▄▄▇▂█▇▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▇▅▄▄▄▃▄▃▃▁▃▁▃▃▂▃▂▁▂▂▂▂▂▁▂▁▁▂▁▃▁

0,1
eval/loss,0.59071
eval/rougeL,0.72011
eval/runtime,18.6577
eval/samples_per_second,7.932
eval/steps_per_second,1.983
total_flos,26167259109600.0
train/epoch,1.0
train/global_step,331.0
train/grad_norm,34.77969
train/learning_rate,0.0


[I 2025-05-17 12:38:15,375] Trial 42 finished with value: 0.7201117443764503 and parameters: {'learning_rate': 8.085332777225086e-05, 'per_device_train_batch_size': 4, 'gradient_accumulation_steps': 1, 'weight_decay': 0.07441542829839998, 'lora_r': 4, 'lora_alpha': 32, 'lora_dropout': 0.04559102042483379}. Best is trial 31 with value: 0.7290111782758841.


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 12:43:04,595] Trial 43 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▂▃▁▂▁▂▂▄▂▆▃▄▁▄▅▅▄▆▃▄▇█▂▂▄▅▄▇▃▇▆▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▇▆▅▅▅▃▄▃▄▂▃▁▃▃▂▃▂▁▂▂▂▂▂▁▂▂▂▃▂▃▂

0,1
eval/loss,0.73073
eval/rougeL,0.69384
eval/runtime,18.8323
eval/samples_per_second,7.859
eval/steps_per_second,1.965
train/epoch,1.0
train/global_step,331.0
train/grad_norm,36.69892
train/learning_rate,0.0
train/loss,0.6835


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 12:47:52,633] Trial 44 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▂▃▁▃▁▂▁▃▂▃▂▃▁▃▄▄▃ ▄▃▆▆▂▂▄▅▄█▄▇▆▅
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▇▆▅▅▅▄▄▃▄▂▃▁▃▃▂▄▂▂▃▂▂▃▂▁▃▂▂▃▂▄▂

0,1
eval/loss,0.9268
eval/rougeL,0.58382
eval/runtime,17.8752
eval/samples_per_second,8.28
eval/steps_per_second,2.07
train/epoch,1.0
train/global_step,331.0
train/grad_norm,29.69999
train/learning_rate,0.0
train/loss,0.8354


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 12:52:42,172] Trial 45 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▁▂▃▁▂▁▂▁▃▂▆▁▄▁▄▅▄▃▅▄▄▇█▂▂▄▅▄▇▃▇▆▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▇▆▅▅▅▄▄▃▄▂▃▁▃▃▂▃▂▁▃▂▂▃▂▁▃▂▂▃▂▃▂

0,1
eval/loss,0.85086
eval/rougeL,0.65818
eval/runtime,19.1568
eval/samples_per_second,7.726
eval/steps_per_second,1.931
train/epoch,1.0
train/global_step,331.0
train/grad_norm,18.13784
train/learning_rate,0.0
train/loss,0.7934


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 12:57:27,516] Trial 46 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▁▂▂▁▁▁▂▁▃▂▄▃▄▁▄▄▃▃▄▃▁▆▇▂▂▂▄▃▅▂█▅▅
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▆▅▄▄▄▃▄▃▃▁▃▁▃▃▂▃▂▁▂▂▂▂▂▁▂▁▂▂▂▃▁

0,1
eval/loss,0.59581
eval/rougeL,0.7137
eval/runtime,18.4344
eval/samples_per_second,8.028
eval/steps_per_second,2.007
train/epoch,1.0
train/global_step,331.0
train/grad_norm,33.63998
train/learning_rate,0.0
train/loss,0.5536


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 13:02:45,486] Trial 47 pruned. Trial was pruned at step 662


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇██
train/grad_norm,▂▂▁▂▂▂▂▁▂▄▃▆▄▃▆▂▂▆▄▇▇▁▄▃▂▂▂▄█▁▄▁▁▆▃▄█▆▇▆
train/learning_rate,███▇▇▇▇▇▆▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train/loss,█▅▄▆▃▄▃▄▄▄▂▃▄▃▂▃▃▂▁▃▂▂▂▁▂▃▂▂▂▂▂▁▂▂▂▃▂▁▂▂

0,1
eval/loss,0.64329
eval/rougeL,0.7004
eval/runtime,24.4284
eval/samples_per_second,6.059
eval/steps_per_second,3.029
train/epoch,1.0
train/global_step,662.0
train/grad_norm,45.82072
train/learning_rate,0.0
train/loss,0.6608


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 13:07:21,885] Trial 48 pruned. Trial was pruned at step 331


0,1
eval/loss,▁
eval/rougeL,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇████
train/grad_norm,▂▂▃▂▂▂▃▁▅▂▅▂▆▁▄▅▄▄▅▃▃▇█▃▂▃▅▅█▂█▆▆
train/learning_rate,███▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁
train/loss,█▆▇▆▅▅▅▃▄▃▄▂▃▁▃▃▂▃▂▁▂▂▂▂▂▁▂▂▂▃▂▃▁

0,1
eval/loss,0.68933
eval/rougeL,0.69016
eval/runtime,17.5464
eval/samples_per_second,8.435
eval/steps_per_second,2.109
train/epoch,1.0
train/global_step,331.0
train/grad_norm,29.21919
train/learning_rate,0.0
train/loss,0.6145


  lr       = trial.suggest_loguniform("learning_rate", 1e-6, 1e-4)
  weight_decay = trial.suggest_uniform("weight_decay", 0.0, 0.1)
  lora_dropout= trial.suggest_uniform("lora_dropout", 0.0, 0.3)
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
Using EarlyStoppingCallback without load_best_model_at_end=True. Once training is finished, the best model will not be loaded automatically.


Step,Training Loss,Validation Loss


[I 2025-05-17 13:12:26,127] Trial 49 pruned. Trial was pruned at step 331


Best trial:
FrozenTrial(number=31, state=TrialState.COMPLETE, values=[0.7290111782758841], datetime_start=datetime.datetime(2025, 5, 17, 11, 38, 29, 967734), datetime_complete=datetime.datetime(2025, 5, 17, 11, 44, 26, 369689), params={'learning_rate': 9.596461243144801e-05, 'per_device_train_batch_size': 2, 'gradient_accumulation_steps': 1, 'weight_decay': 0.07115933746212984, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.054778160858588026}, user_attrs={}, system_attrs={'completed_rung_0': 0.7290111782758841, 'completed_rung_1': 0.7290111782758841, 'completed_rung_2': 0.7290111782758841, 'completed_rung_3': 0.7290111782758841}, intermediate_values={662: 0.7290111782758841}, distributions={'learning_rate': FloatDistribution(high=0.0001, log=True, low=1e-06, step=None), 'per_device_train_batch_size': CategoricalDistribution(choices=(2, 4)), 'gradient_accumulation_steps': CategoricalDistribution(choices=(1, 2, 4)), 'weight_decay': FloatDistribution(high=0.1, log=False, low=0.0, step

In [3]:
study.best_params

{'learning_rate': 9.596461243144801e-05,
 'per_device_train_batch_size': 2,
 'gradient_accumulation_steps': 1,
 'weight_decay': 0.07115933746212984,
 'lora_r': 16,
 'lora_alpha': 32,
 'lora_dropout': 0.054778160858588026}

In [11]:
9.596461243144801e-05/4

2.3991153107862002e-05