In [1]:
!nvidia-smi

Thu Jan 26 20:37:38 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            On   | 00000000:00:1E.0 Off |                    0 |
| N/A   38C    P8    15W /  70W |      2MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Wed_Jul_22_19:09:09_PDT_2020
Cuda compilation tools, release 11.0, V11.0.221
Build cuda_11.0_bu.TC445_37.28845127_0


In [3]:
import torch 
torch.__version__

'1.13.1+cu117'

In [4]:
import transformers
transformers.__version__

'4.22.2'

# unoptimized onnx

In [5]:
from transformers import AutoTokenizer
from optimum.onnxruntime import ORTModelForSeq2SeqLM


In [6]:

tokenizer = AutoTokenizer.from_pretrained("hf_models/query_classify_v2")
model_ort = ORTModelForSeq2SeqLM.from_pretrained("hf_models/query_classify_v2", from_transformers=False, provider='CUDAExecutionProvider')


2023-01-26 20:38:28.094867661 [W:onnxruntime:, session_state.cc:1030 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.
2023-01-26 20:38:28.094909259 [W:onnxruntime:, session_state.cc:1032 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.
2023-01-26 20:38:44.286767491 [W:onnxruntime:, session_state.cc:1030 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.
2023-01-26 20:38:44.286799147 [W:onnxruntime:, session_state.cc:1032 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.
2023-01-26 20:38:59.583373264 [W:onnxrun

In [7]:
model_ort.device

device(type='cuda', index=0)

In [8]:
inputs = tokenizer("Generate taxonomy for query: dildo", return_tensors="pt").to('cuda')

In [9]:
%%timeit 
outputs = model_ort.generate(**inputs, num_beams=3, num_return_sequences=3)



183 ms ± 1.78 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%%timeit
outputs = model_ort.generate(**inputs)

137 ms ± 825 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


# constrained generation (no big difference in speed compared to unconstrained inference)

In [12]:
import sys 
sys.path.append('../modelling')

In [13]:
from main_utils import Trie

In [14]:
allowed_gen_sequences = []

In [16]:
with open('../modelling/datasets/taxonomy/wish_v1.2.1_newtax_leafpaths.txt', 'r') as f:
    for l in f:
        l = l.replace('\n', '').strip()
        if len(l) > 0:
            allowed_gen_sequences.append(l)

In [17]:
trie = Trie([
    [tokenizer.pad_token_id] + tokenizer.encode(i) + [tokenizer.eos_token_id] for i in allowed_gen_sequences
])

In [18]:
def constraint(batch_id, sent):
    return trie.get(sent.tolist())

In [19]:
batch = inputs

## constrained beam search

In [32]:
%%timeit
infres = model_ort.generate(
    input_ids = batch["input_ids"], 
    attention_mask = batch["attention_mask"],
    num_beams = 3, 
    num_return_sequences = 3, 
    do_sample = False, 
    length_penalty = 0, 
    max_new_tokens = 50 - 1, # HACK: T5 adds pad token in the beginning
    prefix_allowed_tokens_fn=constraint, 
    output_scores=True, return_dict_in_generate=True
)
prediction = infres.sequences
probs = infres.sequences_scores.exp()
print(tokenizer.batch_decode(prediction))
print(probs)

['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>', '<pad> beauty & health > sexual wellness > safer sex > condoms</s>']
tensor([9.3622e-01, 1.1616e-03, 8.2010e-04], device='cuda:0')
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>', '<pad> beauty & health > sexual wellness > safer sex > condoms</s>']
tensor([9.3622e-01, 1.1616e-03, 8.2010e-04], device='cuda:0')
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>', '<pad> beauty & health > sexual wellness > safer sex > condoms</s>']
tensor([9.3622e-01, 1.1616e-03, 8.2010e-04], device='cuda:0')
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>',

## constrained greedy decoding

In [33]:
%%timeit 
print(tokenizer.batch_decode(model_ort.generate(
    input_ids = batch["input_ids"], 
    attention_mask = batch["attention_mask"],
    do_sample = False, 
    length_penalty = 0, 
    max_new_tokens = 50 - 1, # HACK: T5 adds pad token in the beginning
    prefix_allowed_tokens_fn=constraint
)))

['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex 