In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

In [2]:
model = AutoModelForSeq2SeqLM.from_pretrained("GROOV/model_checkpoint/model")
tokenizer = AutoTokenizer.from_pretrained("t5-base", model_max_length=512)
tokenizer.add_special_tokens({'additional_special_tokens': ["<sep>", "<SEP>", "<eos>", "[SEP]"]})
model

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [3]:
input = "HuggingFace is a company based in New York City. Its headquarters are in DUMBO, therefore very close to the Manhattan Bridge."
encoded_input = tokenizer(input, return_tensors="pt", padding=True)

In [36]:
force_words = ["NYC"]
force_words_ids = tokenizer(force_words)
force_words_ids

{'input_ids': [[13465, 1]], 'attention_mask': [[1, 1]]}

In [44]:
import torch

with torch.no_grad():
    tokens = model.generate(**encoded_input, 
                            max_length=len(force_words_ids['input_ids'][0]),
                            num_beams=2,  # Set num_beams to 1 for greedy search
                            early_stopping=True,  # Enable early stopping
                            min_length=len(force_words_ids['input_ids'][0]),
                            output_scores=True,
                            return_dict_in_generate=True,
                            force_words_ids=[force_words_ids['input_ids'][0][:-1]]
                            )

In [45]:
tokens

BeamSearchEncoderDecoderOutput(sequences=tensor([[    0, 13465]]), sequences_scores=tensor([-3.9502]), scores=(tensor([[-2.4528e+01,        -inf, -1.5674e+01,  ..., -4.2404e+01,
         -4.2352e+01, -4.2396e+01],
        [-1.0000e+09,        -inf, -1.0000e+09,  ..., -1.0000e+09,
         -1.0000e+09, -1.0000e+09]]),), beam_indices=tensor([[ 0, -1]]), encoder_attentions=None, encoder_hidden_states=None, decoder_attentions=None, cross_attentions=None, decoder_hidden_states=None)

In [46]:
tokenizer.decode(tokens['sequences'][0])

'<pad> NYC'

In [47]:
# print argmax of each element in the sequence
for score in tokens.scores:
    print(score[0].argmax(), score[0][score[0].argmax()])

tensor(11560) tensor(-0.7218)
