In [1]:
!nvidia-smi

Thu Jan 26 21:59:36 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            On   | 00000000:00:1E.0 Off |                    0 |
| N/A   39C    P8    16W /  70W |      2MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Wed_Jul_22_19:09:09_PDT_2020
Cuda compilation tools, release 11.0, V11.0.221
Build cuda_11.0_bu.TC445_37.28845127_0


In [3]:
import torch 
torch.__version__

'1.13.1+cu117'

In [4]:
import transformers
transformers.__version__

'4.22.2'

# unoptimized onnx

In [5]:
from transformers import AutoTokenizer
from optimum.onnxruntime import ORTModelForSeq2SeqLM


In [6]:

tokenizer = AutoTokenizer.from_pretrained("hf_models/query_classify_v2")
model_ort = ORTModelForSeq2SeqLM.from_pretrained("hf_models/query_classify_v2", from_transformers=False, provider='CUDAExecutionProvider')


2023-01-26 21:59:50.265660534 [W:onnxruntime:, session_state.cc:1030 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.
2023-01-26 21:59:50.265692996 [W:onnxruntime:, session_state.cc:1032 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.
2023-01-26 21:59:54.715583306 [W:onnxruntime:, session_state.cc:1030 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.
2023-01-26 21:59:54.715616662 [W:onnxruntime:, session_state.cc:1032 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.
2023-01-26 22:00:09.994329990 [W:onnxrun

In [7]:
model_ort.device

device(type='cuda', index=0)

In [8]:
inputs = tokenizer("Generate taxonomy for query: dildo", return_tensors="pt").to('cuda')

In [9]:
%%timeit 
outputs = model_ort.generate(**inputs, num_beams=3, num_return_sequences=3)



182 ms ± 2.52 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%%timeit
outputs = model_ort.generate(**inputs)

137 ms ± 1.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [13]:
inputs2 = tokenizer("Generate taxonomy for query: women gucci", return_tensors="pt").to('cuda')
inputs3 = tokenizer("Generate taxonomy for query: baby milk", return_tensors="pt").to('cuda')

# constrained generation (no big difference in speed compared to unconstrained inference)

In [11]:
import sys 
sys.path.append('../modelling')

In [12]:
from main_utils import Trie

In [14]:
allowed_gen_sequences = []

In [15]:
with open('../modelling/datasets/taxonomy/wish_v1.2.1_newtax_leafpaths.txt', 'r') as f:
    for l in f:
        l = l.replace('\n', '').strip()
        if len(l) > 0:
            allowed_gen_sequences.append(l)

In [16]:
trie = Trie([
    [tokenizer.pad_token_id] + tokenizer.encode(i) + [tokenizer.eos_token_id] for i in allowed_gen_sequences
])

In [17]:
def constraint(batch_id, sent):
    return trie.get(sent.tolist())

In [18]:
batch = inputs
batch2 = inputs2
batch3 = inputs3

## constrained beam search

In [19]:
%%timeit
infres = model_ort.generate(
    input_ids = batch["input_ids"], 
    attention_mask = batch["attention_mask"],
    num_beams = 3, 
    num_return_sequences = 3, 
    do_sample = False, 
    length_penalty = 0, 
    max_new_tokens = 50 - 1, # HACK: T5 adds pad token in the beginning
    prefix_allowed_tokens_fn=constraint, 
    output_scores=True, return_dict_in_generate=True
)
prediction = infres.sequences
probs = infres.sequences_scores.exp()
print(tokenizer.batch_decode(prediction))
print(probs)

['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>', '<pad> beauty & health > sexual wellness > safer sex > condoms</s>']
tensor([9.3622e-01, 1.1616e-03, 8.2010e-04], device='cuda:0')
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>', '<pad> beauty & health > sexual wellness > safer sex > condoms</s>']
tensor([9.3622e-01, 1.1616e-03, 8.2010e-04], device='cuda:0')
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>', '<pad> beauty & health > sexual wellness > safer sex > condoms</s>']
tensor([9.3622e-01, 1.1616e-03, 8.2010e-04], device='cuda:0')
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>',

In [20]:
%%timeit
infres = model_ort.generate(
    input_ids = batch2["input_ids"], 
    attention_mask = batch2["attention_mask"],
    num_beams = 3, 
    num_return_sequences = 3, 
    do_sample = False, 
    length_penalty = 0, 
    max_new_tokens = 50 - 1, # HACK: T5 adds pad token in the beginning
    prefix_allowed_tokens_fn=constraint, 
    output_scores=True, return_dict_in_generate=True
)
prediction = infres.sequences
probs = infres.sequences_scores.exp()
print(tokenizer.batch_decode(prediction))
print(probs)

["<pad> luggage & bags > women's bags > shoulder bags</s><pad>", "<pad> luggage & bags > women's bags > top-handle bags</s>", "<pad> women's clothing > tops > tees</s><pad><pad><pad><pad><pad>"]
tensor([0.1285, 0.0830, 0.0501], device='cuda:0')
["<pad> luggage & bags > women's bags > shoulder bags</s><pad>", "<pad> luggage & bags > women's bags > top-handle bags</s>", "<pad> women's clothing > tops > tees</s><pad><pad><pad><pad><pad>"]
tensor([0.1285, 0.0830, 0.0501], device='cuda:0')
["<pad> luggage & bags > women's bags > shoulder bags</s><pad>", "<pad> luggage & bags > women's bags > top-handle bags</s>", "<pad> women's clothing > tops > tees</s><pad><pad><pad><pad><pad>"]
tensor([0.1285, 0.0830, 0.0501], device='cuda:0')
["<pad> luggage & bags > women's bags > shoulder bags</s><pad>", "<pad> luggage & bags > women's bags > top-handle bags</s>", "<pad> women's clothing > tops > tees</s><pad><pad><pad><pad><pad>"]
tensor([0.1285, 0.0830, 0.0501], device='cuda:0')
["<pad> luggage & ba

In [21]:
%%timeit
infres = model_ort.generate(
    input_ids = batch3["input_ids"], 
    attention_mask = batch3["attention_mask"],
    num_beams = 3, 
    num_return_sequences = 3, 
    do_sample = False, 
    length_penalty = 0, 
    max_new_tokens = 50 - 1, # HACK: T5 adds pad token in the beginning
    prefix_allowed_tokens_fn=constraint, 
    output_scores=True, return_dict_in_generate=True
)
prediction = infres.sequences
probs = infres.sequences_scores.exp()
print(tokenizer.batch_decode(prediction))
print(probs)

['<pad> mother & kids > feeding > solid feeding > cups</s><pad><pad>', '<pad> mother & kids > feeding > baby food storage</s><pad><pad><pad><pad><pad>', '<pad> mother & kids > feeding > bottle feeding > formula milk storage</s>']
tensor([0.1415, 0.1347, 0.0729], device='cuda:0')
['<pad> mother & kids > feeding > solid feeding > cups</s><pad><pad>', '<pad> mother & kids > feeding > baby food storage</s><pad><pad><pad><pad><pad>', '<pad> mother & kids > feeding > bottle feeding > formula milk storage</s>']
tensor([0.1415, 0.1347, 0.0729], device='cuda:0')
['<pad> mother & kids > feeding > solid feeding > cups</s><pad><pad>', '<pad> mother & kids > feeding > baby food storage</s><pad><pad><pad><pad><pad>', '<pad> mother & kids > feeding > bottle feeding > formula milk storage</s>']
tensor([0.1415, 0.1347, 0.0729], device='cuda:0')
['<pad> mother & kids > feeding > solid feeding > cups</s><pad><pad>', '<pad> mother & kids > feeding > baby food storage</s><pad><pad><pad><pad><pad>', '<pad> 

## constrained greedy decoding

In [22]:
%%timeit 
print(tokenizer.batch_decode(model_ort.generate(
    input_ids = batch["input_ids"], 
    attention_mask = batch["attention_mask"],
    do_sample = False, 
    length_penalty = 0, 
    max_new_tokens = 50 - 1, # HACK: T5 adds pad token in the beginning
    prefix_allowed_tokens_fn=constraint
)))

['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex 

In [23]:
%%timeit 
print(tokenizer.batch_decode(model_ort.generate(
    input_ids = batch2["input_ids"], 
    attention_mask = batch2["attention_mask"],
    do_sample = False, 
    length_penalty = 0, 
    max_new_tokens = 50 - 1, # HACK: T5 adds pad token in the beginning
    prefix_allowed_tokens_fn=constraint
)))

["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > shoulder bags</s>"]
["<pad> luggage & bags > women's bags > 

In [24]:
%%timeit 
print(tokenizer.batch_decode(model_ort.generate(
    input_ids = batch3["input_ids"], 
    attention_mask = batch3["attention_mask"],
    do_sample = False, 
    length_penalty = 0, 
    max_new_tokens = 50 - 1, # HACK: T5 adds pad token in the beginning
    prefix_allowed_tokens_fn=constraint
)))

['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feeding > bottle feeding > baby bottle accessories</s>']
['<pad> mother & kids > feed