In [4]:
import numpy
import pandas
import torch
from transformers import AutoModel, AutoTokenizer, Trainer, TrainingArguments, AutoModelForCausalLM


In [None]:
DEVICE = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
CHECKPOINT = "microsoft/DialoGPT-large"
tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)
model = AutoModelForCausalLM.from_pretrained(CHECKPOINT, pad_token_id=tokenizer.eos_token_id).to(DEVICE)

In [None]:
max_steps = 3

In [None]:
trained_model_name = f"dialo_train_{max_steps}_steps"
output_dir = trained_model_name

In [None]:
training_args = TrainingArguments (
    # Main Args
    learning_rate=1e-5,
    num_train_epochs=1,
    max_steps=max_steps,
    per_device_train_batch_size=1,
    output_dir=output_dir,

    # Other Args
    overwrite_output_dir=False,
    disable_tqdm=False,
    eval_steps=120,
    save_steps=120,
    warmup_steps=1,
    per_device_eval_batch_size=1,
    evaluation_strategy="steps",
    logging_strategy="steps",
    logging_steps=1,
    optim="adafactor",
    gradient_accumulation_steps=4,
    gradient_checkpointing=False,

    # Early Stopping
    load_best_model_at_end=True,
    save_total_limit=1,
    metric_for_best_model="eval_loss",
    greater_is_better=False
)

trainer = Trainer (
    model=model,
    args=training_args,
    total_steps=max_steps,
    train_dataset="",
    eval_dataset=""
)

In [None]:
# Training
training_output = trainer.train()

In [None]:
# Save model locally
save_dir = f'{output_dir}/final'

trainer.save_model(save_dir)
print("Saved model to:", save_dir)