In [37]:
import torch
import torch.nn as nn
from transformers import BertForSequenceClassification, AutoTokenizer

device = "cuda" if torch.cuda.is_available() else "cpu"

class CustomModel(nn.Module):
    def __init__(self, num_categories, hidden_size, device=device):
        super().__init__()
        self.model = BertForSequenceClassification.from_pretrained(
            # "microsoft/MiniLM-L12-H384-uncased",
            'bert-base-uncased',
            num_labels=2,
            label2id = {
                'NO_SP': 0,
                'SP': 1
            },
            id2label= {
                0: 'NO_SP',
                1: 'SP'
            }).to(device)
        self.num_categories = num_categories
        self.device = device
        self.embedding = nn.Embedding(num_categories, hidden_size)
        self.classifier = nn.Sequential(
            nn.Linear(num_categories + self.model.config.hidden_size, hidden_size),
            nn.GELU(),
            nn.Linear(hidden_size, 2)
        )

    def forward(self, input_ids, category, attention_mask=None, token_type_ids=None):

        # Extract last hidden state embeds from transformers
        out = self.model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, output_hidden_states=True)
        cls_embeds = out.hidden_states[-1][:, 0, :]
        
        # Concatinate with categorical data
        concat = torch.cat((cls_embeds, category), dim=-1)

        # Classify with classifier
        output = self.classifier(concat)
        return output


# Example usage
model = CustomModel(num_categories=4, hidden_size=128)
tokenizer = AutoTokenizer.from_pretrained("microsoft/MiniLM-L12-H384-uncased")

batch_size = 3
text = ["H"]*batch_size
category = torch.tensor([[0,0,1,0], [1,0,0,0], [0,1,0,0]])
tokens = tokenizer(text, return_tensors="pt")
output = model(**tokens, category=category)
output

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor([[-0.1403,  0.1019],
        [-0.1520,  0.1025],
        [-0.1440,  0.0923]], grad_fn=<AddmmBackward0>)

In [1]:
import torch
import torch.nn as nn
from transformers import AutoModelForSequenceClassification, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
category = 2
cat_one_hot = torch.zeros(4).to("cpu")
cat_one_hot[category] = 1
cat_one_hot

tensor([0., 0., 1., 0.])

In [81]:
from transformers import BertModel, AutoTokenizer
model = BertModel.from_pretrained("Rostlab/prot_bert")#.to(device)
tokenizer = AutoTokenizer.from_pretrained("Rostlab/prot_bert")
tokens = tokenizer("A B C", max_length=5, padding='max_length', return_tensors="pt")
tokens

{'input_ids': tensor([[ 2,  6, 27, 23,  3]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])}

In [80]:
seq_out = model(**tokens)#.logits
seq_out.last_hidden_state.shape

torch.Size([1, 5, 1024])

In [77]:
import torch.nn as nn
from torch import tensor
import torch
embedding = nn.Embedding(4, 1024)
embed = embedding(tensor([1, 0, 0, 0]))
embed.shape

torch.Size([4, 1024])

In [76]:
torch.cat((torch.ones(4,12),torch.ones((4,12))), dim=-1)

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         1., 1., 1., 1., 1., 1.]])

In [72]:
torch.cat((seq_out.last_hidden_state[:, 0], embed), dim=-1)

RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 1 but got size 4 for tensor number 1 in the list.