In [1]:
from transformers import AutoTokenizer
from datasets import load_dataset
from transformers import DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer


model_name = "microsoft/deberta-v3-small"

tokenizer = AutoTokenizer.from_pretrained(model_name)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at microsoft/deberta-v3-small were not used when initializing DebertaV2ForSequenceClassification: ['mask_predictions.LayerNorm.bias', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.bias', 'mask_predictions.dense.bias', 'lm_predictions.lm_head.dense.bias', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.dense.weight', 'mask_predictions.dense.weight', 'lm_predictions.lm_head.LayerNorm.bias', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.classifier.bias']
- This IS expected if you are initializing DebertaV2ForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializ

In [2]:
from core.dataloaders.focus.focus_dataloader import FoCusDatasetPersonaV2
from core.utils import PytorchDatasetFactory
from core.dataloaders.focus.models.debertav3_dataloaders import DebertaV3FoCusPersonaDatasetSampleV2
from core.hyperparameters.debertav3_hyperparameters import DebertaV3HyperparametersV1

hyperparameters = DebertaV3HyperparametersV1(
    train_batch_size=16,
    valid_batch_size=16,
    max_dialog_history_tokens=70,
    max_knowledge_candidates_tokens=220,
    max_persona_tokens=20,
    model_name=model_name,
    project_name="focus_persona_classification",
)

train_dataset = FoCusDatasetPersonaV2(
    input_dataset_path="./datasets/FoCus/train_focus.json",
    is_train=True,
)


valid_dataset = FoCusDatasetPersonaV2(
    input_dataset_path="./datasets/FoCus/valid_focus.json",
    is_train=False,
)

train_dataset = PytorchDatasetFactory(
    dataset=train_dataset,
    tokenizer=tokenizer,
    hyperparameters=hyperparameters,
    dataset_sample_class=DebertaV3FoCusPersonaDatasetSampleV2,
)

valid_dataset = PytorchDatasetFactory(
    dataset=valid_dataset,
    tokenizer=tokenizer,
    hyperparameters=hyperparameters,
    dataset_sample_class=DebertaV3FoCusPersonaDatasetSampleV2,
)

from datasets import load_metric
import numpy as np

accuracy_metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=-1)
    return accuracy_metric.compute(predictions=predictions, references=labels)


In [3]:
%env WANDB_PROJECT=focus_persona_classification

training_args = TrainingArguments(
    output_dir=f"./results/{model_name}",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_steps=10,
    overwrite_output_dir=True,
    run_name=f"huggingface_{model_name}",
    fp16=True,
    evaluation_strategy="epoch",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

trainer.train()

env: WANDB_PROJECT=focus_persona_classification


Using cuda_amp half precision backend
***** Running training *****
  Num examples = 114630
  Num Epochs = 2
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 14330
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdimweb[0m. Use [1m`wandb login --relogin`[0m to force relogin


The following columns in the training set don't have a corresponding argument in `DebertaV2ForSequenceClassification.forward` and have been ignored: unique_id. If unique_id are not expected by `DebertaV2ForSequenceClassification.forward`,  you can safely ignore this message.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3088,0.340775,0.860046
2,0.3262,0.402499,0.859088


Saving model checkpoint to ./results/microsoft/deberta-v3-small/checkpoint-500
Configuration saved in ./results/microsoft/deberta-v3-small/checkpoint-500/config.json
Model weights saved in ./results/microsoft/deberta-v3-small/checkpoint-500/pytorch_model.bin
tokenizer config file saved in ./results/microsoft/deberta-v3-small/checkpoint-500/tokenizer_config.json
Special tokens file saved in ./results/microsoft/deberta-v3-small/checkpoint-500/special_tokens_map.json
Saving model checkpoint to ./results/microsoft/deberta-v3-small/checkpoint-1000
Configuration saved in ./results/microsoft/deberta-v3-small/checkpoint-1000/config.json
Model weights saved in ./results/microsoft/deberta-v3-small/checkpoint-1000/pytorch_model.bin
tokenizer config file saved in ./results/microsoft/deberta-v3-small/checkpoint-1000/tokenizer_config.json
Special tokens file saved in ./results/microsoft/deberta-v3-small/checkpoint-1000/special_tokens_map.json
Saving model checkpoint to ./results/microsoft/deberta-v3

TrainOutput(global_step=14330, training_loss=0.31399058339817076, metrics={'train_runtime': 1903.375, 'train_samples_per_second': 120.449, 'train_steps_per_second': 7.529, 'total_flos': 7440217809914304.0, 'train_loss': 0.31399058339817076, 'epoch': 2.0})