In [1]:
from core.dataloaders.focus.focus_dataloader import FoCusDatasetPersonaV2
from core.utils import PytorchDatasetFactory
from core.dataloaders.focus.models.debertav3_dataloaders import DebertaV3FoCusPersonaDatasetSampleV2
from core.hyperparameters.debertav3_hyperparameters import DebertaV3HyperparametersV1

from transformers import AutoTokenizer
from datasets import load_dataset
from transformers import DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

import torch


# model_name = "microsoft/deberta-v3-base"
model_name = "microsoft/deberta-v3-small"

tokenizer = AutoTokenizer.from_pretrained(model_name)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

hyperparameters = DebertaV3HyperparametersV1(
    train_batch_size=16,
    valid_batch_size=16,
    max_dialog_history_tokens=70,
    max_knowledge_candidates_tokens=220,
    max_persona_tokens=20,
    model_name=model_name,
    project_name="focus_persona_classification",
)

train_dataset = FoCusDatasetPersonaV2(
    input_dataset_path="./datasets/FoCus/train_focus.json",
    is_train=True,
)


valid_dataset = FoCusDatasetPersonaV2(
    input_dataset_path="./datasets/FoCus/valid_focus.json",
    is_train=False,
)

train_dataset = PytorchDatasetFactory(
    dataset=train_dataset,
    tokenizer=tokenizer,
    hyperparameters=hyperparameters,
    dataset_sample_class=DebertaV3FoCusPersonaDatasetSampleV2,
)

valid_dataset = PytorchDatasetFactory(
    dataset=valid_dataset,
    tokenizer=tokenizer,
    hyperparameters=hyperparameters,
    dataset_sample_class=DebertaV3FoCusPersonaDatasetSampleV2,
)

from datasets import load_metric
import numpy as np

accuracy_metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=-1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

# train_positive = 0
# train_negative = 0
# for sample in train_dataset:
#     if sample["labels"] == 1:
#         train_positive += 1
#     else:
#         train_negative += 1

# print("Train positive: ", train_positive)
# print("Train negative: ", train_negative)
# print("Train ratio: ", train_positive / (train_positive + train_negative))


# positive_ratio = train_positive / (train_positive + train_negative)
# class_weights = [positive_ratio, 1 - positive_ratio]

class_weights = torch.tensor([0.38646951059931955, 0.6135304894006804])

from core.base_models.debertav3_models import DebertaV3PersonaClassificationV3
from transformers import DebertaV2Config 
# model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

model = DebertaV3PersonaClassificationV3.from_pretrained(
    hyperparameters.model_name,
    config=DebertaV2Config.from_pretrained(
        hyperparameters.model_name,
    ),
    class_weights=class_weights,
)

model.class_weights

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at microsoft/deberta-v3-small were not used when initializing DebertaV3PersonaClassificationV3: ['lm_predictions.lm_head.LayerNorm.bias', 'mask_predictions.dense.weight', 'lm_predictions.lm_head.dense.weight', 'lm_predictions.lm_head.LayerNorm.weight', 'mask_predictions.dense.bias', 'mask_predictions.LayerNorm.weight', 'lm_predictions.lm_head.bias', 'mask_predictions.LayerNorm.bias', 'mask_predictions.classifier.weight', 'lm_predictions.lm_head.dense.bias', 'mask_predictions.classifier.bias']
- This IS expected if you are initializing DebertaV3PersonaClassificationV3 from the checkpoint of a model trained on another task or with another architecture (e.g. initializing 

tensor([0.3865, 0.6135])

In [None]:
%env WANDB_PROJECT=focus_persona_classification

training_args = TrainingArguments(
    output_dir=f"./results/{model_name}",
    learning_rate=2e-5,
    per_device_train_batch_size=hyperparameters.train_batch_size,
    per_device_eval_batch_size=hyperparameters.valid_batch_size,
    num_train_epochs=4,
    weight_decay=0.02,
    logging_steps=10,
    overwrite_output_dir=True,
    run_name=f"huggingface_{model_name}",
    fp16=True,
    evaluation_strategy="steps",
    eval_steps=1000,
    do_train=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

trainer.train()
# trainer.evaluate()

### evaluate best persona model

In [5]:
from core.dataloaders.focus.focus_dataloader import FoCusDatasetPersonaV2
from core.utils import PytorchDatasetFactory
from core.dataloaders.focus.models.debertav3_dataloaders import DebertaV3FoCusPersonaDatasetSampleV2
from core.hyperparameters.debertav3_hyperparameters import DebertaV3HyperparametersV1

from transformers import AutoTokenizer
from datasets import load_dataset
from transformers import DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

import torch
from torch.utils.data import DataLoader


# model_name = "microsoft/deberta-v3-base"
# model_name = "microsoft/deberta-v3-small"
model_name = "./results/microsoft/deberta-v3-small/checkpoint-87000/"

tokenizer = AutoTokenizer.from_pretrained(model_name)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

hyperparameters = DebertaV3HyperparametersV1(
    train_batch_size=16,
    valid_batch_size=16,
    max_dialog_history_tokens=70,
    max_knowledge_candidates_tokens=220,
    max_persona_tokens=20,
    model_name=model_name,
    project_name="focus_persona_classification",
)

train_dataset = FoCusDatasetPersonaV2(
    input_dataset_path="./datasets/FoCus/train_focus.json",
    is_train=True,
)


valid_dataset = FoCusDatasetPersonaV2(
    input_dataset_path="./datasets/FoCus/valid_focus.json",
    is_train=False,
)

train_dataset = PytorchDatasetFactory(
    dataset=train_dataset,
    tokenizer=tokenizer,
    hyperparameters=hyperparameters,
    dataset_sample_class=DebertaV3FoCusPersonaDatasetSampleV2,
)

valid_dataset = PytorchDatasetFactory(
    dataset=valid_dataset,
    tokenizer=tokenizer,
    hyperparameters=hyperparameters,
    dataset_sample_class=DebertaV3FoCusPersonaDatasetSampleV2,
)

from datasets import load_metric
import numpy as np

accuracy_metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=-1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

class_weights = torch.tensor([0.1, 0.9])

from core.base_models.debertav3_models import DebertaV3PersonaClassificationV3
from transformers import DebertaV2Config 
# model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

model = DebertaV3PersonaClassificationV3.from_pretrained(
    hyperparameters.model_name,
    config=DebertaV2Config.from_pretrained(
        hyperparameters.model_name,
    ),
    class_weights=class_weights,
)

model.class_weights

tensor([0.1000, 0.9000])

In [9]:
from core.dataloaders.focus.lighting.debertav3_lighting_dataloaders import DebertaV3FoCusPersonaLightningDataModuleV2 
dataset = DebertaV3FoCusPersonaLightningDataModuleV2(
    train_path_dataset="./datasets/FoCus/train_focus.json",
    valid_path_dataset="./datasets/FoCus/valid_focus.json",
    hyperparameters=hyperparameters,
    tokenizer=tokenizer,  # type: ignore
    debug_status=0,
)
dataset.setup()

In [11]:
%env TOKENIZERS_PARALLELISM=true

env: TOKENIZERS_PARALLELISM=true


In [23]:
model = model.cuda()
model.eval()
None

In [24]:
total_accuracy = 0

for train_sample in dataset.train_dataloader():
    train_sample.pop("unique_ids", None)
    for key in train_sample.keys():
        train_sample[key] = train_sample[key].cuda()

    output = model(**train_sample)
    labels = train_sample["labels"]
    logits = output.logits
    predicts = torch.argmax(logits, dim=-1)
    # print("Labels: ", labels)
    # print("Logits: ", predicts)
    accuracy = (predicts == labels).int().sum().item() / len(labels)
    total_accuracy += accuracy
    # break

print("Accuracy: ", total_accuracy / len(dataset.train_dataloader()))

Accuracy:  0.9329495231449174


In [26]:
total_accuracy = 0

for train_sample in dataset.val_dataloader():
    train_sample.pop("unique_ids", None)
    for key in train_sample.keys():
        train_sample[key] = train_sample[key].cuda()

    output = model(**train_sample)
    labels = train_sample["labels"]
    logits = output.logits
    predicts = torch.argmax(logits, dim=-1)
    # print("Labels: ", labels)
    # print("Logits: ", predicts)
    accuracy = (predicts == labels).int().sum().item() / len(labels)
    total_accuracy += accuracy
    # break

print("Accuracy: ", total_accuracy / len(dataset.val_dataloader()))

Accuracy:  0.9173993193420307
