In [1]:
from transformers import BartTokenizer, BartModel
import torch
from torch import nn
from tqdm import tqdm

messages = [
    "We have release a new product, do you want to buy it?",
    "Winner! Great deal, call us to get this product for free",
    "Tomorrow is my birthday, do you come to the party?",
]
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
bart_model = BartModel.from_pretrained("facebook/bart-base")

# Refactor the embedding logic above into a function convert_to_embeddings(messages)
# that:
# - loops over messages with tqdm(messages)
# - tokenizes each message as a single-item list: tokenizer([message], ...)
# - runs bart_model under torch.no_grad() with bart_model.eval()
# - mean-pools last_hidden_state and reshapes to (-1)
# - appends each vector to a list and returns torch.stack(list)
def convert_to_embeddings(messages):
    embeddings_list = []
    for message in tqdm(messages):
        out = tokenizer(
            [message],
            padding=True,
            max_length=512,
            truncation=True,
            return_tensors="pt"
        )
        with torch.no_grad():
            bart_model.eval()
            pred = bart_model(
                input_ids=out["input_ids"],
                attention_mask=out["attention_mask"]
            )
            vec = pred.last_hidden_state.mean(dim=1).reshape((-1))
            embeddings_list.append(vec)
    return torch.stack(embeddings_list)

# Call convert_to_embeddings(messages) → X
X = convert_to_embeddings(messages)
# Print X.shape
print(X.shape)

  from .autonotebook import tqdm as notebook_tqdm
  0%|          | 0/3 [00:00<?, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.58.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
100%|██████████| 3/3 [00:00<00:00, 20.51it/s]

torch.Size([3, 768])



