In [7]:
import torch
import torch.nn as nn
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification

class TwinnedModel(nn.Module):
    def __init__(self, model_name1='microsoft/deberta-v3-xsmall', model_name2='microsoft/deberta-v3-xsmall', num_classes=3):
        super(TwinnedModel, self).__init__()

        # Load pre-trained models
        self.model1 = DebertaV2ForSequenceClassification.from_pretrained(model_name1, num_labels=num_classes)
        self.model2 = DebertaV2ForSequenceClassification.from_pretrained(model_name2, num_labels=num_classes)

        # Freeze the models if you don't want to train them further
        for param in self.model1.parameters():
            param.requires_grad = False
        for param in self.model2.parameters():
            param.requires_grad = False

        # Define a fully connected layer to combine the outputs
        combined_hidden_size = self.model1.config.hidden_size + self.model2.config.hidden_size
        self.fc = nn.Linear(combined_hidden_size, num_classes)

    def forward(self, input_ids1, attention_mask1, input_ids2, attention_mask2):
        # Get outputs from both models
        outputs1 = self.model1(input_ids1, attention_mask=attention_mask1)
        outputs2 = self.model2(input_ids2, attention_mask=attention_mask2)

        # Get the logits from both models' outputs
        logits1 = outputs1.logits
        logits2 = outputs2.logits

        # Concatenate the logits
        combined_logits = torch.cat((logits1, logits2), dim=1)

        # Pass the combined logits through the fully connected layer
        logits = self.fc(combined_logits)

        return logits

# Example usage
tokenizer = DebertaV2Tokenizer.from_pretrained('microsoft/deberta-v3-xsmall')

# Define sample input sentences
premise = "A man is eating food."
hypothesis = "The man is having a meal."

# Tokenize the input sentences for both models
inputs1 = tokenizer(premise, hypothesis, return_tensors='pt', padding=True, truncation=True)
inputs2 = tokenizer(hypothesis, premise, return_tensors='pt', padding=True, truncation=True)

# Instantiate the model
model = TwinnedModel()

# Pass the inputs through the model
logits = model(inputs1['input_ids'], inputs1['attention_mask'], inputs2['input_ids'], inputs2['attention_mask'])

# Output logits for each class (entailment, contradiction, neutral)
print("Logits:", logits)

# Get the predicted class by applying torch.argmax
predicted_class = torch.argmax(logits, dim=1).item()

# Define the mapping of index to label
label_map = {0: "entailment", 1: "contradiction", 2: "neutral"}

# Get the corresponding label
predicted_label = label_map[predicted_class]

print(f"Predicted class: {predicted_class} : {predicted_label}")


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  return self.fget.__get__(instance, owner)()
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-xsmall and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-xsmall and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x6 and 768x3)