In [5]:
from transformers import DebertaV2ForSequenceClassification, DebertaV2Tokenizer
from transformers.models.deberta_v2.modeling_deberta_v2 import StableDropout, DebertaV2Model, ContextPooler
from transformers.modeling_outputs import SequenceClassifierOutput
from typing import Optional, List, Dict, Any, Union, Tuple
import torch
from torch import nn

class DebertaV3ForClassification(DebertaV2ForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)

        num_labels = getattr(config, "num_labels", 2)
        self.num_labels = num_labels

        self.deberta = DebertaV2Model(config)
        self.pooler = ContextPooler(config)
        output_dim = self.pooler.output_dim

        self.classifier = nn.Linear(output_dim, num_labels)
        drop_out = getattr(config, "cls_dropout", None)
        drop_out = self.config.hidden_dropout_prob if drop_out is None else drop_out
        self.dropout = StableDropout(drop_out)

        # Initialize weights and apply final processing
        self.post_init()

    def forward(
        self,
        input_ids: Optional[torch.Tensor] = None,
        attention_mask: Optional[torch.Tensor] = None,
        token_type_ids: Optional[torch.Tensor] = None,
        position_ids: Optional[torch.Tensor] = None,
        inputs_embeds: Optional[torch.Tensor] = None,
        labels: Optional[torch.Tensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple, SequenceClassifierOutput]:
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.deberta(
            input_ids,
            token_type_ids=token_type_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        encoder_layer = outputs[0]
        pooled_output = self.pooler(encoder_layer)
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        loss = None
        if labels is not None:
            loss_fn = nn.MSELoss()
            logits = logits.view(-1).to(labels.dtype)
            loss = loss_fn(logits, labels.view(-1))
            

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states, 
            attentions=outputs.attentions
        )

# model = DebertaV3ForClassification.from_pretrained("microsoft/deberta-v3-base")
tokenizer = DebertaV2Tokenizer.from_pretrained("microsoft/deberta-v3-base")
tokenizer.encode("Hello world")

loading file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/spm.model from cache at /home/dimweb/.cache/huggingface/transformers/ec748fd4f03d0e5a2d5d56dff01e6dd733f23c67105cd54a9910f9d711870253.0abaeacf7287ee8ba758fec15ddfb4bb6c697bb1a8db272725f8aa633501787a
loading file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/added_tokens.json from cache at None
loading file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/special_tokens_map.json from cache at None
loading file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/tokenizer_config.json from cache at /home/dimweb/.cache/huggingface/transformers/967a4d63eb35950cfd24a9e335906419009f32940fa2ba1b73e7ba032628c38d.df5a7f41459442f66bec27ac9352bba694cde109855024b3ae61be2f5734ee9a
loading configuration file https://huggingface.co/microsoft/deberta-v3-base/resolve/main/config.json from cache at /home/dimweb/.cache/huggingface/transformers/e6f9db57345f0f60c9f837fa97bcb27b1ed31e99feb33d732d

[1, 5365, 447, 2]

In [6]:
tokenizer.bos_token_id

1

In [1]:
from core.hyperparameters.debertav3_hyperparameters import DebertaV3HyperparametersV1
from transformers import DebertaV2Tokenizer
from core.dataloaders.focus.lighting.debertav3_lighting_dataloaders import DebertaV3FoCusLightningDataModuleV1, DebertaV3FoCusLightningDataModuleV2 

hyperparameters = DebertaV3HyperparametersV1(
        train_batch_size=16,
        valid_batch_size=16,
    )

tokenizer = DebertaV2Tokenizer.from_pretrained(
        hyperparameters.model_name,
    )

data_module = DebertaV3FoCusLightningDataModuleV2(
        train_path_dataset="./datasets/FoCus/train_focus.json",
        valid_path_dataset="./datasets/FoCus/valid_focus.json",
        hyperparameters=hyperparameters,
        tokenizer=tokenizer,  # type: ignore
        debug_status=0,
    )
    
data_module.setup()

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
next(iter(data_module.train_dataloader()))

{'input_ids': tensor([[    1,   279, 10367,  ...,     0,     0,     0],
         [    1,   606,   307,  ...,     0,     0,     0],
         [    1, 10420, 16479,  ..., 15713,   302,     2],
         ...,
         [    1,   279, 23409,  ...,     0,     0,     0],
         [    1,   279, 75313,  ...,     0,     0,     0],
         [    1,   434,   340,  ...,     0,     0,     0]]),
 'labels': tensor([1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0]),
 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 1, 1, 1],
         ...,
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0],
         [1, 1, 1,  ..., 0, 0, 0]]),
 'unique_ids': ['WL1G6EX8T9JY_dialogue1',
  'NZQX869YZZK9_dialogue6',
  'NTGAXOBEIRLQ_dialogue6',
  'ZANR2M0FJKWF_dialogue1',
  'EKEMEF4T3OHH_dialogue5',
  'VTFNAHV7OIG4_dialogue5',
  'HYOQLKN3XGCD_dialogue5',
  'KAN28FJWSTJP_dialogue4',
  'P878Z7YI2MMT_dialogue1',
  'YJH7UTD0DR6A_dialogue4',
  'K74N0MZ