In [1]:
from transformers import AutoTokenizer, AutoModelForMaskedLM, AutoModel, BertModel
from torch import nn
import torch
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
class RadBertEmbedder(nn.Module):
    _device = f"{torch.device('cuda' if torch.cuda.is_available() else 'cpu')}:{torch.cuda.current_device()}"
    _tokenizer = AutoTokenizer.from_pretrained("StanfordAIMI/RadBERT")
    _model = BertModel.from_pretrained("StanfordAIMI/RadBERT").to('cuda')

    def __init__(self, emb_dim=32,*args, **kwargs):
        super().__init__()
        self.emb_dim = emb_dim
        self.mlp = nn.Sequential(
            nn.Linear(768, emb_dim), # 768 bert output的维度
            nn.LayerNorm(emb_dim),
            nn.ReLU(),
            nn.Linear(emb_dim, emb_dim),
            nn.LayerNorm(emb_dim),
        ).to('cuda')

    def forward(self, condition):
        start = time.time()
        inputs_list = [self._tokenizer(condition_str, return_tensors="pt").to(self._device) for condition_str in condition]
        with torch.no_grad():
            outputs_list = [self._model(**inputs) for inputs in inputs_list]
        c = torch.stack([outputs.pooler_output[0] for outputs in outputs_list])
        c = self.mlp(c)
        end = time.time()
        print("forward_time: ", end-start)
        return c

Some weights of the model checkpoint at StanfordAIMI/RadBERT were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
emb_obj = RadBertEmbedder(emb_dim=1024)
condition = ["A photo of a lung xray with a visible pleural effusion"]*64
emb_obj.forward(condition)

forward_time:  0.9255011081695557


tensor([[-0.5365,  2.0501,  0.8347,  ..., -0.4111,  1.2516, -0.4292],
        [-0.5365,  2.0501,  0.8347,  ..., -0.4111,  1.2516, -0.4292],
        [-0.5365,  2.0501,  0.8347,  ..., -0.4111,  1.2516, -0.4292],
        ...,
        [-0.5365,  2.0501,  0.8347,  ..., -0.4111,  1.2516, -0.4292],
        [-0.5365,  2.0501,  0.8347,  ..., -0.4111,  1.2516, -0.4292],
        [-0.5365,  2.0501,  0.8347,  ..., -0.4111,  1.2516, -0.4292]],
       device='cuda:0', grad_fn=<NativeLayerNormBackward0>)