In [12]:
from transformers import  MPNetModel, MPNetForSequenceClassification, AutoTokenizer, AutoConfig
from transformers.models.mpnet.modeling_mpnet import MPNetClassificationHead, SequenceClassifierOutput
from typing import List, Optional, Union, Tuple
import torch
from torch import nn
from transformers.utils import ModelOutput


In [8]:


class MPNetForSequenceClassificationV1(MPNetForSequenceClassification):
    def __init__(self, 
            config,
            cross_entropy_loss_weights=None
        ):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.mpnet = MPNetModel(config)
        self.classifier = MPNetClassificationHead(config)
        self.cross_entropy_loss_weights = cross_entropy_loss_weights
        
        # Initialize weights and apply final processing
        self.post_init()
    
    def forward(
        self,
        input_ids: Optional[torch.LongTensor] = None,
        attention_mask: Optional[torch.FloatTensor] = None,
        position_ids: Optional[torch.LongTensor] = None,
        head_mask: Optional[torch.FloatTensor] = None,
        inputs_embeds: Optional[torch.FloatTensor] = None,
        labels: Optional[torch.LongTensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:

        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.mpnet(
            input_ids,
            attention_mask=attention_mask,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )
        sequence_output = outputs[0]
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            loss_fct = None
            if self.cross_entropy_loss_weights is None:
                loss_fct = nn.CrossEntropyLoss()
            else:
                loss_fct = nn.CrossEntropyLoss(
                    weight=torch.tensor(self.cross_entropy_loss_weights, dtype=torch.float32)
                )
            
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )



Some weights of MPNetForSequenceClassificationV1 were not initialized from the model checkpoint at sentence-transformers/all-mpnet-base-v2 and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Downloading tokenizer_config.json: 100%|██████████| 363/363 [00:00<00:00, 197kB/s]
Downloading vocab.txt: 100%|██████████| 226k/226k [00:00<00:00, 390kB/s]  
Downloading tokenizer.json: 100%|██████████| 455k/455k [00:00<00:00, 606kB/s]  
Downloading special_tokens_map.json: 100%|██████████| 239/239 [00:00<00:00, 121kB/s]


In [11]:
inputs = tokenizer.batch_encode_plus(["Hello world"], return_tensors="pt")
model(**inputs)

SequenceClassifierOutput(loss=None, logits=tensor([[-0.0100,  0.0156]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

  from .autonotebook import tqdm as notebook_tqdm


Class weights:  [0.13454188704999148, 0.8654581129500085]


Some weights of MPNetForSequenceClassificationV1 were not initialized from the model checkpoint at sentence-transformers/all-mpnet-base-v2 and are newly initialized: ['classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 351660
  Num Epochs = 4
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 87916
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdimweb[0m. Use [1m`wandb login 

  weight=torch.tensor(


Step,Training Loss,Validation Loss


KeyboardInterrupt: 

In [1]:
from core.experiments.mpnet_experiments import experiment_2
experiment_2()

  from .autonotebook import tqdm as notebook_tqdm


Class weights:  [0.13454188704999148, 0.8654581129500085]


Some weights of MPNetForSequenceClassificationV2 were not initialized from the model checkpoint at sentence-transformers/all-mpnet-base-v2 and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using cuda_amp half precision backend
***** Running training *****
  Num examples = 351660
  Num Epochs = 4
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 87916
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdimweb[0m. Use [1m`wandb login 

Step,Training Loss,Validation Loss


In [1]:
from core.experiments.mpnet_experiments import experiment_3
experiment_3()

  from .autonotebook import tqdm as notebook_tqdm
Iteration:   2%|▏         | 1068/43958 [09:05<6:05:15,  1.96it/s]
Epoch:   0%|          | 0/4 [09:05<?, ?it/s]
