New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Attention mask in Bert #46
Comments
The current BERT example is only used for benchmarking purposes on fixed
length without mask.
We are currently working with CUTLASS team on a grouped Attention
optimization, which will remove paddings & mask for dynamic sequences. It
will appear in next CUTLASS & AIT release.
On Sun, Oct 16, 2022 at 05:56 Michaël Benesty ***@***.***> wrote:
Hi,
I try to use an attention mask in Bert demo script but when I add the
tensor to the input dict it crashes.
How can I provide this mask?
Reproduction script (run on the docker image):
# Copyright (c) Meta Platforms, Inc. and affiliates.## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.#import time
import clickimport torchfrom benchmark_ait import compile_modulefrom modeling.torch_model import BertBaseUncased as BertPt
def run_model(activation: str, graph_mode: bool, use_fp16_acc: bool, verify: bool):
f = open("measures.txt", mode="w")
shape = (1, 128)
inputs_pt = {
"input_ids": torch.randint(2, 1000, size=shape, dtype=torch.int64, device="cuda"),
"position_ids": torch.arange(shape[1], dtype=torch.int64).expand(shape).contiguous().cuda(),
"attention_mask": torch.ones(shape, dtype=torch.int64, device="cuda"),
"token_type_ids": torch.ones(size=shape, dtype=torch.int64, device="cuda"),
}
batch_size, seq_len = inputs_pt["input_ids"].size()
pt_model = BertPt(pretrained=True)._model
pt_model.eval()
hidden_size = pt_model.config.hidden_size
mod = compile_module(batch_size, seq_len, hidden_size, activation, use_fp16_acc, False, pt_model)
outputs = [torch.empty(mod.get_output_maximum_shape(0)).half().cuda()]
# warmup
for _ in range(10):
mod.run_with_tensors(inputs_pt, outputs, graph_mode=graph_mode)
torch.cuda.synchronize()
timings = list()
for _ in range(10):
start = time.time()
mod.run_with_tensors(inputs_pt, outputs, graph_mode=graph_mode)
torch.cuda.synchronize()
timings.append(time.time() - start)
f.write(f"{shape}: {torch.median(torch.tensor(timings)):.4f}\n")
f.flush()
print(f"Logits: {outputs[0]}")
if verify:
pt_outputs = pt_model.bert(**inputs_pt)
torch.allclose(outputs[0], pt_outputs.last_hidden_state, 1e-1, 1e-1)
print("Verification done!")
f.close()
@***@***.***( "--activation", type=str, default="gelu", help="Activation function applied on BERT, currently only support gelu and ***@***.***( "--graph_mode", type=bool, default=True, help="Use CUDA graph or not. (hipGraph is not supported ***@***.***( "--use_fp16_acc", type=bool, default=False, help="Use fp16 accumulation or not (TensorRT is using ***@***.***( "--verify", type=bool, default=True, help="Verify AIT outputs against PT",)def run_demo(
activation: str,
graph_mode: bool,
use_fp16_acc: bool,
verify: bool,
):
run_model(activation, graph_mode, use_fp16_acc, verify)
if __name__ == "__main__":
torch.manual_seed(4896)
run_demo()
Produces:
...
2022-10-16 12:51:44,784 INFO <aitemplate.backend.builder> Building ./tmp/BERT_gelu_1_128/model_interface.obj
2022-10-16 12:52:03,348 INFO <aitemplate.backend.builder> Building ./tmp/BERT_gelu_1_128/test.so
[12:52:03] ./tmp/BERT_gelu_1_128/model-generated.h:225: Init AITemplate Runtime.
Traceback (most recent call last):
File "./examples/03_bert/demo_new.py", line 101, in <module>
run_demo()
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "./examples/03_bert/demo_new.py", line 96, in run_demo
run_model(activation, graph_mode, use_fp16_acc, verify)
File "./examples/03_bert/demo_new.py", line 45, in run_model
mod.run_with_tensors(inputs_pt, outputs, graph_mode=graph_mode)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 483, in run_with_tensors
outputs_ait = self.run(
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 438, in run
return self._run_impl(
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 367, in _run_impl
inputs = self._dict_to_ordered_list(inputs, is_inputs=True)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 327, in _dict_to_ordered_list
raise ValueError(
ValueError: Did not get correct number of inputs expected 3, got 4
If I replace position_ids by attention_mask I get:
[12:54:38] ./tmp/BERT_gelu_1_128/model-generated.h:225: Init AITemplate Runtime.Traceback (most recent call last):
File "./examples/03_bert/demo_new.py", line 101, in <module>
run_demo()
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "./examples/03_bert/demo_new.py", line 96, in run_demo
run_model(activation, graph_mode, use_fp16_acc, verify)
File "./examples/03_bert/demo_new.py", line 45, in run_model
mod.run_with_tensors(inputs_pt, outputs, graph_mode=graph_mode)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 483, in run_with_tensors
outputs_ait = self.run(
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 438, in run
return self._run_impl(
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 367, in _run_impl
inputs = self._dict_to_ordered_list(inputs, is_inputs=True)
File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 334, in _dict_to_ordered_list
raise ValueError(ValueError: Got unexpected input: attention_mask
—
Reply to this email directly, view it on GitHub
<#46>, or
unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAJTLXS3GRAEPJ2WM4526STWDP3OHANCNFSM6AAAAAARGK5ZLE>
.
You are receiving this because you are subscribed to this thread.Message
ID: ***@***.***>
--
Bing Xu
|
Thank you for your fast answer. |
tissue3
pushed a commit
to tissue3/AITemplate-1
that referenced
this issue
Feb 7, 2023
…or#46) Although it seems to be useless, just made it compatible with others like pytorch and numpy.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
I try to use an attention mask in Bert demo script but when I add the tensor to the input dict it crashes.
How can I provide this mask?
Reproduction script (run on the docker image):
Produces:
If I replace position_ids by attention_mask I get:
The text was updated successfully, but these errors were encountered: