In [1]:
from transformers import BartTokenizer, BartModel
import torch
from torch import nn

messages = [
    "We have release a new product, do you want to buy it?",
    "Winner! Great deal, call us to get this product for free",
    "Tomorrow is my birthday, do you come to the party?",
]

# Load the tokenizer "facebook/bart-base"
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")

# Tokenize `messages` with:
#  - padding=True
#  - max_length=512
#  - truncation=True
#  - return_tensors="pt"
out = tokenizer(
    messages,
    padding=True,
    max_length=512,
    truncation=True,
    return_tensors="pt"
)

# Load the BART model "facebook/bart-base"
bart_model = BartModel.from_pretrained("facebook/bart-base")

with torch.no_grad():
    # Put the model in eval mode
    bart_model.eval()

    # Print the tokenized output object `out`
    print(out)

    # Run the model with input_ids and attention_mask
    pred = bart_model(
        input_ids=out["input_ids"],
        attention_mask=out["attention_mask"]
    )

    # Take mean over sequence dimension to get embeddings
    embeddings = pred.last_hidden_state.mean(dim=1)

    # Print embeddings.shape and the first row embeddings[0, :]
    print(embeddings.shape)
    print(embeddings[0, :])

  from .autonotebook import tqdm as notebook_tqdm


{'input_ids': tensor([[    0,   170,    33,   800,    10,    92,  1152,     6,   109,    47,
           236,     7,   907,    24,   116,     2],
        [    0, 46722,   328,  2860,   432,     6,   486,   201,     7,   120,
            42,  1152,    13,   481,     2,     1],
        [    0, 38849,    16,   127,  4115,     6,   109,    47,   283,     7,
             5,   537,   116,     2,     1,     1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]])}


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.58.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


torch.Size([3, 768])
tensor([-4.1475e-01, -9.5950e-01, -1.9372e+00, -1.8202e+00, -1.3529e+00,
        -3.2617e-01, -4.3141e-02, -2.8496e-01, -4.4574e-01, -2.8201e+00,
        -7.7633e-01,  9.4289e-01,  3.7461e-01,  3.3576e-01,  5.2033e-01,
         7.1250e-01, -9.4961e-01,  7.0305e-01, -3.9577e-01, -1.0670e+00,
         2.0777e-01, -1.6843e-01, -1.0866e+00,  8.7861e-01,  3.0824e-01,
        -8.2244e-01,  5.6214e-01,  2.9970e+00,  3.1303e-01, -3.6227e+00,
         6.2353e-01,  3.4955e-01,  1.5166e-01, -9.7267e-01, -1.5525e+00,
         1.6685e+00, -1.1181e-01,  1.4668e+00,  1.0774e-01, -2.3462e-01,
         3.8726e-01,  1.2186e+00,  4.9061e-01, -4.8987e-01,  3.9203e-01,
         4.2587e-01, -1.9299e+00,  5.9904e-01, -6.1466e-01,  7.6263e-01,
        -1.3223e+00,  6.8178e-01, -8.5020e-01,  1.1885e-01, -6.4349e-02,
        -1.0802e+00,  3.9520e-01, -1.3714e-02, -1.4325e-01,  3.8025e-01,
         2.1685e-01,  7.3130e-01,  3.3559e-01,  6.3142e-01, -7.0818e-01,
        -1.4829e-01,  7.6586e-