Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attention mask in Bert #46

Closed
pommedeterresautee opened this issue Oct 16, 2022 · 2 comments
Closed

Attention mask in Bert #46

pommedeterresautee opened this issue Oct 16, 2022 · 2 comments

Comments

@pommedeterresautee
Copy link

pommedeterresautee commented Oct 16, 2022

Hi,

I try to use an attention mask in Bert demo script but when I add the tensor to the input dict it crashes.
How can I provide this mask?

Reproduction script (run on the docker image):

#  Copyright (c) Meta Platforms, Inc. and affiliates.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
import time

import click
import torch
from benchmark_ait import compile_module
from modeling.torch_model import BertBaseUncased as BertPt


def run_model(activation: str, graph_mode: bool, use_fp16_acc: bool, verify: bool):
    f = open("measures.txt", mode="w")
    shape = (1, 128)
    inputs_pt = {
        "input_ids": torch.randint(2, 1000, size=shape, dtype=torch.int64, device="cuda"),
        "position_ids": torch.arange(shape[1], dtype=torch.int64).expand(shape).contiguous().cuda(),
        "attention_mask": torch.ones(shape, dtype=torch.int64, device="cuda"),
        "token_type_ids": torch.ones(size=shape, dtype=torch.int64, device="cuda"),
    }

    batch_size, seq_len = inputs_pt["input_ids"].size()

    pt_model = BertPt(pretrained=True)._model
    pt_model.eval()
    hidden_size = pt_model.config.hidden_size

    mod = compile_module(batch_size, seq_len, hidden_size, activation, use_fp16_acc, False, pt_model)

    outputs = [torch.empty(mod.get_output_maximum_shape(0)).half().cuda()]

    # warmup
    for _ in range(10):
        mod.run_with_tensors(inputs_pt, outputs, graph_mode=graph_mode)

    torch.cuda.synchronize()
    timings = list()
    for _ in range(10):
        start = time.time()
        mod.run_with_tensors(inputs_pt, outputs, graph_mode=graph_mode)
        torch.cuda.synchronize()
        timings.append(time.time() - start)

    f.write(f"{shape}: {torch.median(torch.tensor(timings)):.4f}\n")
    f.flush()
    print(f"Logits: {outputs[0]}")
    if verify:
        pt_outputs = pt_model.bert(**inputs_pt)
        torch.allclose(outputs[0], pt_outputs.last_hidden_state, 1e-1, 1e-1)
        print("Verification done!")
    f.close()


@click.command()
@click.option(
    "--activation",
    type=str,
    default="gelu",
    help="Activation function applied on BERT, currently only support gelu and fast_gelu",
)
@click.option(
    "--graph_mode",
    type=bool,
    default=True,
    help="Use CUDA graph or not. (hipGraph is not supported yet)",
)
@click.option(
    "--use_fp16_acc",
    type=bool,
    default=False,
    help="Use fp16 accumulation or not (TensorRT is using fp16_acc)",
)
@click.option(
    "--verify",
    type=bool,
    default=True,
    help="Verify AIT outputs against PT",
)
def run_demo(
    activation: str,
    graph_mode: bool,
    use_fp16_acc: bool,
    verify: bool,
):
    run_model(activation, graph_mode, use_fp16_acc, verify)


if __name__ == "__main__":
    torch.manual_seed(4896)
    run_demo()

Produces:

...
2022-10-16 12:51:44,784 INFO <aitemplate.backend.builder> Building ./tmp/BERT_gelu_1_128/model_interface.obj
2022-10-16 12:52:03,348 INFO <aitemplate.backend.builder> Building ./tmp/BERT_gelu_1_128/test.so
[12:52:03] ./tmp/BERT_gelu_1_128/model-generated.h:225: Init AITemplate Runtime.
Traceback (most recent call last):
  File "./examples/03_bert/demo_new.py", line 101, in <module>
    run_demo()
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1130, in __call__
    return self.main(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1055, in main
    rv = self.invoke(ctx)
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1404, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 760, in invoke
    return __callback(*args, **kwargs)
  File "./examples/03_bert/demo_new.py", line 96, in run_demo
    run_model(activation, graph_mode, use_fp16_acc, verify)
  File "./examples/03_bert/demo_new.py", line 45, in run_model
    mod.run_with_tensors(inputs_pt, outputs, graph_mode=graph_mode)
  File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 483, in run_with_tensors
    outputs_ait = self.run(
  File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 438, in run
    return self._run_impl(
  File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 367, in _run_impl
    inputs = self._dict_to_ordered_list(inputs, is_inputs=True)
  File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 327, in _dict_to_ordered_list
    raise ValueError(
ValueError: Did not get correct number of inputs expected 3, got 4

If I replace position_ids by attention_mask I get:

    inputs_pt = {
        "input_ids": torch.randint(2, 1000, size=shape, dtype=torch.int64, device="cuda"),
        # "position_ids": torch.arange(shape[1], dtype=torch.int64).expand(shape).contiguous().cuda(),
        "attention_mask": torch.ones(shape, dtype=torch.int64, device="cuda"),
        "token_type_ids": torch.ones(size=shape, dtype=torch.int64, device="cuda"),
    }
[12:54:38] ./tmp/BERT_gelu_1_128/model-generated.h:225: Init AITemplate Runtime.
Traceback (most recent call last):
  File "./examples/03_bert/demo_new.py", line 101, in <module>
    run_demo()
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1130, in __call__
    return self.main(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1055, in main
    rv = self.invoke(ctx)
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 1404, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/usr/local/lib/python3.8/dist-packages/click/core.py", line 760, in invoke
    return __callback(*args, **kwargs)
  File "./examples/03_bert/demo_new.py", line 96, in run_demo
    run_model(activation, graph_mode, use_fp16_acc, verify)
  File "./examples/03_bert/demo_new.py", line 45, in run_model
    mod.run_with_tensors(inputs_pt, outputs, graph_mode=graph_mode)
  File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 483, in run_with_tensors
    outputs_ait = self.run(
  File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 438, in run
    return self._run_impl(
  File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 367, in _run_impl
    inputs = self._dict_to_ordered_list(inputs, is_inputs=True)
  File "/usr/local/lib/python3.8/dist-packages/aitemplate/compiler/model.py", line 334, in _dict_to_ordered_list
    raise ValueError(
ValueError: Got unexpected input: attention_mask
@antinucleon
Copy link
Contributor

antinucleon commented Oct 16, 2022 via email

@pommedeterresautee
Copy link
Author

Thank you for your fast answer.
Closing.

tissue3 pushed a commit to tissue3/AITemplate-1 that referenced this issue Feb 7, 2023
…or#46)

Although it seems to be useless, just made it compatible with others like
pytorch and numpy.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants