In [2]:
import torch
from utils.model import T5HeadWithValueModel
from transformers import T5Tokenizer

In [3]:
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5HeadWithValueModel.from_pretrained('t5-base')

Some weights of T5HeadWithValueModel were not initialized from the model checkpoint at t5-base and are newly initialized: ['v_head.summary.weight', 'v_head.summary.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
input_ids = tokenizer(
    "Studies have been shown that owning a dog is good for you", return_tensors="pt"
).input_ids  # Batch size 1
decoder_input_ids = tokenizer("Studies show that", return_tensors="pt").input_ids  # Batch size 1

In [5]:
# forward pass
outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)

In [6]:
pred_ids = torch.argmax(outputs.logits, dim=-1)

In [7]:
for i in range(decoder_input_ids.shape[1]):
    current_id = tokenizer.decode(decoder_input_ids[:, i])
    next_id = tokenizer.decode(pred_ids[:, i], skip_special_tokens=False)
    print(current_id, '-->',pred_ids[:, i], next_id)

Studies --> tensor([1]) </s>
show --> tensor([1]) </s>
that --> tensor([1]) </s>
</s> --> tensor([1]) </s>


In [8]:
outputs.values.shape

torch.Size([1, 4])

In [9]:
outputs.values

tensor([[ 0.0030, -0.0015, -0.0038,  0.0014]], grad_fn=<SqueezeBackward1>)

In [10]:
input_ids = tokenizer("translate English to German: The house is wonderful.", return_tensors="pt").input_ids
outputs = model.generate(input_ids)

In [11]:
outputs

tensor([[    0,   644,  4598,   229, 19250,     5,     1]])

In [12]:
tokenizer.decode(outputs[0])

'<pad> Das Haus ist wunderbar.</s>'

In [13]:
query_txt_1 = "My most favourite movie is Transformers."
query_txt_2 = "I eat an apple."
queries_txt = ["translate English to French: "+q for q in [query_txt_1, query_txt_2]]

queries = tokenizer(queries_txt, return_tensors="pt", padding="max_length").input_ids

In [14]:
responses = model.respond_to_batch(queries)
for r in responses:
    print(tokenizer.decode(r))
    print("------")

KeyboardInterrupt: 

In [15]:
decoding_config = {
    "temperature": 2.0,
    "top_k": 10,
    "top_p": 0.7,
    "typical_p": 0.2
}
responses = model.generate(queries, **decoding_config)
for r in responses:
    print(tokenizer.decode(r, skip_special_tokens=True))
    print("------")

Mon film le plus préféré est Transformers.
------
Je mange une pomme.
------


In [None]:
responses

tensor([[    0,  2963,   814,    90,   303, 22157,   259, 31220,     7,     5,
             1],
        [    0,  1022,   388,   397,   245, 26234,     5,     1,     0,     0,
             0]])

In [18]:
tokenizer.batch_decode(responses, skip_special_tokens=True)

['Mon film le plus préféré est Transformers.', 'Je mange une pomme.']