In [1]:
!python --version

Python 3.10.9


In [2]:
!nvidia-smi

Thu Jan 26 18:42:57 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            On   | 00000000:00:1E.0 Off |                    0 |
| N/A   46C    P8    17W /  70W |      2MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Tue_Mar__8_18:18:20_PST_2022
Cuda compilation tools, release 11.6, V11.6.124
Build cuda_11.6.r11.6/compiler.31057947_0


In [4]:
import torch
torch.__version__

'2.0.0.dev20230125'

In [5]:
import transformers
transformers.__version__

  from .autonotebook import tqdm as notebook_tqdm


'4.22.2'

In [6]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch._dynamo as torchdynamo
import torch

In [7]:
# default cache size needs to be increased to store the many graphs with generative models
torchdynamo.config.cache_size_limit = 512

In [28]:
model_name = "../coeus_inference/hf_models/query_classify_v2"

In [29]:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
model = model.eval().cuda()
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [51]:
inputs = tokenizer("Generate taxonomy for query: dildo", return_tensors="pt", padding='max_length', truncation=True, max_length=50).to('cuda')

# pytorch

## vanilla

In [53]:
%%timeit
print(tokenizer.batch_decode(model.generate(**inputs, num_beams=3, num_return_sequences=3, max_length=50)))

['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sex toys</s><pad><pad><pad>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sex toys</s><pad><pad><pad>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sex toys</s><pad><pad><pad>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sex toys</s><pad><pad><pad>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual 

In [54]:
%%timeit
print(tokenizer.batch_decode(model.generate(**inputs, max_length=50)))

['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
276 ms ± 8.73 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## inference mode

In [55]:
%%timeit
with torch.inference_mode():
    print(tokenizer.batch_decode(model.generate(**inputs, num_beams=3, num_return_sequences=3, max_length=50)))

['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sex toys</s><pad><pad><pad>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sex toys</s><pad><pad><pad>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sex toys</s><pad><pad><pad>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual wellness > sex toys</s><pad><pad><pad>', '<pad> beauty & health > sexual wellness > sm products > masturbation machine</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>', '<pad> beauty & health > sexual 

In [56]:
%%timeit
with torch.inference_mode():
    print(tokenizer.batch_decode(model.generate(**inputs, max_length=50)))

['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
247 ms ± 3.66 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# dynamo

In [57]:
model.generate2 = torchdynamo.optimize("inductor")(model.generate)

In [None]:
# dynamo warm up
with torch.inference_mode():
    print(tokenizer.batch_decode(model.generate2(**inputs)))

In [59]:
%%timeit
with torch.inference_mode():
    print(tokenizer.batch_decode(model.generate2(**inputs)))



['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex toys > dildos</s>']
['<pad> beauty & health > sexual wellness > sex 

In [60]:
inputs2 = tokenizer("Generate taxonomy for query: women gucci", return_tensors="pt", padding='max_length', truncation=True, max_length=50).to('cuda')

In [61]:
%%timeit
with torch.inference_mode():
    print(tokenizer.batch_decode(model.generate2(**inputs2)))

["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's bags</s>"]
["<pad> luggage & bags > women's

In [64]:
inputs3 = tokenizer("Generate taxonomy for query: baby milk", return_tensors="pt", padding='max_length', truncation=True, max_length=50).to('cuda')

In [65]:
%%timeit
with torch.inference_mode():
    print(tokenizer.batch_decode(model.generate2(**inputs3)))

['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > feeding > bottle feeding > baby food storage</s>']
['<pad> mother & kids > f