## SFTTrainer Subclassing

What if you don't want to evaluate on the entire evaluation dataset, but instead a sample of size `eval_sample_size`?  Just subclass!

In [None]:
import random
from trl import SFTTrainer


class SFTTrainerEvalSampling(SFTTrainer):
    def __init__(self, *args, eval_sample_size=16, **kwargs):
        super().__init__(*args, **kwargs)
        self.eval_sample_size = eval_sample_size

    def get_eval_dataloader(self, eval_dataset=None):
        'Samples the evaluation dataset and returns a subset of size eval_sample_size.'
        if eval_dataset is None:
            eval_dataset = self.eval_dataset
        idxs = random.sample(range(len(eval_dataset)), self.eval_sample_size)
        eval_subset = eval_dataset.select(idxs)  # Select subset
        return super().get_eval_dataloader(eval_subset)

Simply wrap the `SFTTrainer` class in a new class `SFTTrainerEvalSampling` and overwrite the `get_eval_dataloader` function of the  `SFTTrainer` class so that when it is called, it passes a sample of the eval dataset to the eval dataloader rather than entire eval dataset.

Then call the `SFTTrainerEvalSampling` just like you would the `SFTTrainer` class, remembering to add the additional argument for `eval_sample_size`.

In [None]:
# example of calling SFTTrainerEvalSampling

trainer = SFTTrainerEvalSampling(
    eval_sample_size = 16 # NEW PARAMETER ADDED !!!
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset['train'],
    eval_dataset = dataset['test'],
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = True, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        per_device_eval_batch_size = 2,
        eval_strategy = 'steps',
        eval_steps = 1,
        gradient_accumulation_steps = 64,
        eval_accumulation_steps = 16,
        num_train_epochs = 3, # INCREASE FOR CONTINUED TRAINING
        warmup_steps = 5,
        max_steps = 500,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = '/content/drive/MyDrive/ML_Projects/Blog_Headings/Output_Dir',
        save_strategy = 'steps',
        save_steps = 25,
        save_total_limit = 20,
        load_best_model_at_end = True,
        metric_for_best_model = 'eval_loss',
        greater_is_better = False,
    ),
)