# Compile BERT from TensorFlow

In [1]:
import sys
import time
import numpy as np
import tensorflow as tf
import modeling

import tvm
from tvm import relay
from tvm.relay.frontend.tensorflow_parser import TFParser
import tvm.contrib.graph_runtime as runtime

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Download official Tensorflow BERT base model

In [2]:
!./download_tf_bert.sh

Helper function for creating the classification model

In [3]:
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 num_labels, use_one_hot_embeddings, use_fp16):
    """Creates a classification model."""
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings,
        compute_type=tf.float16 if use_fp16 else tf.float32)

    # In the demo, we are doing a simple classification task on the entire
    # segment.
    #
    # If you want to use the token-level output, use model.get_sequence_output()
    # instead.
    output_layer = model.get_pooled_output()
    hidden_size = output_layer.shape[-1].value
    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))
    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias, name='cls_logits')
        probabilities = tf.nn.softmax(logits, axis=-1, name='cls_probabilities')
    return probabilities

Configuation of the BERT model

In [4]:
batch = 1
seq_length = 32
num_labels = 2
use_fp16 = False
ckpt_file = "uncased_L-12_H-768_A-12/bert_model.ckpt"
bert_config = modeling.BertConfig.from_json_file("uncased_L-12_H-768_A-12/bert_config.json")

W1115 19:23:00.853036 140291125999424 deprecation_wrapper.py:119] From /home/ubuntu/repo/notebooks/bert/modeling.py:94: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.



Create TF model and load from checkpoint.

In [5]:
shape = (batch, seq_length)
input_ids = tf.placeholder(tf.int32, shape, 'input_ids')
input_mask = tf.placeholder(tf.int32, shape, 'input_mask')
segment_ids = tf.placeholder(tf.int32, shape, 'segment_ids')

tf_model = create_model(bert_config, False, input_ids, input_mask, segment_ids,
                        num_labels, False, use_fp16)

W1115 19:23:00.890331 140291125999424 deprecation_wrapper.py:119] From /home/ubuntu/repo/notebooks/bert/modeling.py:176: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

W1115 19:23:00.893841 140291125999424 deprecation_wrapper.py:119] From /home/ubuntu/repo/notebooks/bert/modeling.py:427: The name tf.get_variable is deprecated. Please use tf.compat.v1.get_variable instead.

W1115 19:23:01.656079 140291125999424 lazy_loader.py:50] 
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

W1115 19:23:01.679353 140291125999424 deprecation.py:323] From /home/ubuntu/repo/notebooks/bert/modeling.py:683: dense (from tensorflow.python.layers.core) is

In [6]:
# Load model from checkpoint
tf_config = tf.ConfigProto()
tf_config.gpu_options.visible_device_list = "0"
tf_config.gpu_options.allow_growth = True
tf_sess = tf.Session(config=tf_config)

tvars = tf.trainable_variables()
(assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(tvars, ckpt_file)
tf.train.init_from_checkpoint(ckpt_file, assignment_map)
tf_sess.run(tf.global_variables_initializer())

Run and benchmark the BERT model

In [7]:
input_ids_data = np.random.uniform(size=shape).astype('int32')
input_mask_data = np.ones(shape).astype('int32')
segment_ids_data = np.zeros(shape).astype('int32')

input_dict = {input_ids: input_ids_data, input_mask: input_mask_data, segment_ids: segment_ids_data}
tf_out = tf_sess.run(tf_model, input_dict)
print(tf_out)

[[0.4380299 0.5619701]]


In [8]:
# dry run
for _ in range(10):
    tf_sess.run(tf_model, input_dict)

min_repeat_ms = 2000
number = 20
while True:
    beg = time.time()
    for _ in range(number):
        tf_sess.run(tf_model, input_dict)
    end = time.time()
    lat = (end - beg) * 1e3
    if lat >= min_repeat_ms:
        break
    number = int(max(min_repeat_ms / (lat / number) + 1, number * 1.618))
print('TF latency for seq length %s: %.2f ms' % (seq_length, lat / number))

TF latency for seq length 32: 6.92 ms


## Compile TF BERT using TVM

First convert to frozen model and export to file.

In [9]:
output_node_names = ['loss/cls_probabilities']
frozen_file = "tf_frozen_bert.pb"

tf.logging.info("**** Trainable Variables ****")
for var in tvars:
    init_string = ""
    if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
    else:
        init_string = ", *NOTTTTTTTTTTTTTTTTTTTTT"
        tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape, init_string)

frozen_graph = tf.graph_util.convert_variables_to_constants(
    tf_sess, tf_sess.graph.as_graph_def(), output_node_names)

num_nodes = len(frozen_graph.node)
from tensorflow.python.compiler.tensorrt import trt_convert as trt
converter = trt.TrtGraphConverter(
    input_graph_def=frozen_graph,
    nodes_blacklist=output_node_names,
    max_workspace_size_bytes=(4096 << 20) - 1000,
    precision_mode = "FP16" if use_fp16 else "FP32",
    minimum_segment_size=4,
    is_dynamic_op=True,
    maximum_cached_engines=1000
)
frozen_graph = converter.convert()

print('Total node count before and after TF-TRT conversion:',
      num_nodes, '->', len(frozen_graph.node))
with tf.gfile.GFile(frozen_file, "wb") as f:
    f.write(frozen_graph.SerializeToString())
print("export frozen model to %s" % frozen_file)

W1115 19:23:13.024651 140291125999424 deprecation.py:323] From <ipython-input-9-1ec1d989451e>:14: convert_variables_to_constants (from tensorflow.python.framework.graph_util_impl) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
W1115 19:23:13.025627 140291125999424 deprecation.py:323] From /home/ubuntu/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/graph_util_impl.py:270: extract_sub_graph (from tensorflow.python.framework.graph_util_impl) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


Total node count before and after TF-TRT conversion: 1454 -> 1248
export frozen model to tf_frozen_bert.pb


Convert the frozen model to Relay

In [10]:
parser = TFParser(frozen_file)
graph_def = parser.parse()
mod, params = relay.frontend.from_tensorflow(graph_def, outputs=output_node_names)

W1115 19:23:42.291382 140291125999424 common.py:398] Attribute Tdim is ignored in relay.sym.expand_dims
W1115 19:23:42.292312 140291125999424 common.py:398] Attribute T is ignored in relay.sym.expand_dims
W1115 19:23:42.292924 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.expand_dims
W1115 19:23:42.293588 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.expand_dims
W1115 19:23:42.294254 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.expand_dims
W1115 19:23:42.294961 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.expand_dims
W1115 19:23:42.412743 140291125999424 common.py:398] Attribute T is ignored in relay.sym.reshape
W1115 19:23:42.413674 140291125999424 common.py:398] Attribute Tshape is ignored in relay.sym.reshape
W1115 19:23:42.414828 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.reshape
W1115 19:23:42.415477 1402911259994

W1115 19:23:43.748646 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.expand_dims
W1115 19:23:43.749109 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.expand_dims
W1115 19:23:43.749685 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.expand_dims
W1115 19:23:43.866663 140291125999424 common.py:398] Attribute T is ignored in relay.sym.expand_dims
W1115 19:23:43.867774 140291125999424 common.py:398] Attribute Tdim is ignored in relay.sym.expand_dims
W1115 19:23:43.868236 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.expand_dims
W1115 19:23:43.868732 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.expand_dims
W1115 19:23:43.869687 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.expand_dims
W1115 19:23:43.870357 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.expand_dims
W1115 1

W1115 19:23:49.309312 140291125999424 common.py:398] Attribute transpose_b is ignored in relay.sym.dense
W1115 19:23:49.309845 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:23:49.310330 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.dense
W1115 19:23:49.310800 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.dense
W1115 19:23:49.311897 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.dense
W1115 19:23:49.312530 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.dense
W1115 19:23:49.431473 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:23:49.432311 140291125999424 common.py:398] Attribute transpose_a is ignored in relay.sym.dense
W1115 19:23:49.432826 140291125999424 common.py:398] Attribute transpose_b is ignored in relay.sym.dense
W1115 19:23:49.433293 140291125999424 common.py:398] Attribute _outp

W1115 19:23:54.426876 140291125999424 common.py:398] Attribute transpose_b is ignored in relay.sym.dense
W1115 19:23:54.427381 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:23:54.427855 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.dense
W1115 19:23:54.428299 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.dense
W1115 19:23:54.429419 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.dense
W1115 19:23:54.430124 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.dense
W1115 19:23:54.792647 140291125999424 common.py:398] Attribute Tidx is ignored in relay.sym.mean
W1115 19:23:54.793493 140291125999424 common.py:398] Attribute T is ignored in relay.sym.mean
W1115 19:23:54.793970 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.mean
W1115 19:23:54.794427 140291125999424 common.py:398] Attribute _node_name i

W1115 19:23:58.530863 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.dense
W1115 19:23:58.903948 140291125999424 common.py:398] Attribute T is ignored in relay.sym.mean
W1115 19:23:58.904785 140291125999424 common.py:398] Attribute Tidx is ignored in relay.sym.mean
W1115 19:23:58.905298 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.mean
W1115 19:23:58.905811 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.mean
W1115 19:23:58.906301 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.mean
W1115 19:23:58.906740 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.mean
W1115 19:23:59.279795 140291125999424 common.py:398] Attribute T is ignored in relay.sym.mean
W1115 19:23:59.280655 140291125999424 common.py:398] Attribute Tidx is ignored in relay.sym.mean
W1115 19:23:59.281195 140291125999424 common.py:398] Attribute _output_shapes is ignored

W1115 19:24:04.129072 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.reshape
W1115 19:24:04.130272 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.reshape
W1115 19:24:04.259068 140291125999424 common.py:398] Attribute T is ignored in relay.sym.reshape
W1115 19:24:04.259914 140291125999424 common.py:398] Attribute Tshape is ignored in relay.sym.reshape
W1115 19:24:04.260443 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.reshape
W1115 19:24:04.261356 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.reshape
W1115 19:24:04.261838 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.reshape
W1115 19:24:04.262356 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.reshape
W1115 19:24:04.390674 140291125999424 common.py:398] Attribute T is ignored in relay.sym.reshape
W1115 19:24:04.391557 140291125999424 common.py:39

W1115 19:24:09.733428 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.mean
W1115 19:24:09.997168 140291125999424 common.py:398] Attribute T is ignored in relay.sym.power
W1115 19:24:09.998065 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.power
W1115 19:24:09.998602 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.power
W1115 19:24:09.999107 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.power
W1115 19:24:09.999583 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.power
W1115 19:24:10.788792 140291125999424 common.py:398] Attribute transpose_b is ignored in relay.sym.dense
W1115 19:24:10.789762 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:24:10.790291 140291125999424 common.py:398] Attribute transpose_a is ignored in relay.sym.dense
W1115 19:24:10.790775 140291125999424 common.py:398] Attribute _out

W1115 19:24:14.415823 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.power
W1115 19:24:15.225188 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:24:15.226090 140291125999424 common.py:398] Attribute transpose_a is ignored in relay.sym.dense
W1115 19:24:15.227002 140291125999424 common.py:398] Attribute transpose_b is ignored in relay.sym.dense
W1115 19:24:15.227489 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.dense
W1115 19:24:15.227956 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.dense
W1115 19:24:15.228446 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.dense
W1115 19:24:15.229471 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.dense
W1115 19:24:16.039738 140291125999424 common.py:398] Attribute T is ignored in relay.sym.tanh
W1115 19:24:16.040652 140291125999424 common.py:398] Attribute _out

W1115 19:24:20.577855 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.softmax
W1115 19:24:20.578404 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.softmax
W1115 19:24:20.991404 140291125999424 common.py:398] Attribute T is ignored in relay.sym.reshape
W1115 19:24:20.992253 140291125999424 common.py:398] Attribute Tshape is ignored in relay.sym.reshape
W1115 19:24:20.992771 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.reshape
W1115 19:24:20.993240 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.reshape
W1115 19:24:20.993760 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.reshape
W1115 19:24:20.994232 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.reshape
W1115 19:24:21.138858 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:24:21.139703 140291125999424 common.py:398]

W1115 19:24:26.628049 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.dense
W1115 19:24:26.628544 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.dense
W1115 19:24:26.629014 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.dense
W1115 19:24:26.772194 140291125999424 common.py:398] Attribute transpose_a is ignored in relay.sym.dense
W1115 19:24:26.773075 140291125999424 common.py:398] Attribute transpose_b is ignored in relay.sym.dense
W1115 19:24:26.773618 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:24:26.774076 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.dense
W1115 19:24:26.774524 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.dense
W1115 19:24:26.774956 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.dense
W1115 19:24:26.775397 140291125999424 common.py:398] Att

W1115 19:24:32.670010 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.dense
W1115 19:24:32.670481 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.dense
W1115 19:24:32.670970 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.dense
W1115 19:24:33.104673 140291125999424 common.py:398] Attribute T is ignored in relay.sym.mean
W1115 19:24:33.105519 140291125999424 common.py:398] Attribute Tidx is ignored in relay.sym.mean
W1115 19:24:33.106219 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.mean
W1115 19:24:33.106730 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.mean
W1115 19:24:33.107185 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.mean
W1115 19:24:33.107647 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.mean
W1115 19:24:33.543117 140291125999424 common.py:398] Attribute T is

W1115 19:24:37.975193 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.mean
W1115 19:24:37.975720 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.mean
W1115 19:24:37.976782 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.mean
W1115 19:24:37.977215 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.mean
W1115 19:24:38.422154 140291125999424 common.py:398] Attribute Tidx is ignored in relay.sym.mean
W1115 19:24:38.423009 140291125999424 common.py:398] Attribute T is ignored in relay.sym.mean
W1115 19:24:38.423597 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.mean
W1115 19:24:38.424044 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.mean
W1115 19:24:38.424557 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.mean
W1115 19:24:38.424991 140291125999424 common.py:398] Attribute _input

W1115 19:24:44.317318 140291125999424 common.py:398] Attribute Tshape is ignored in relay.sym.reshape
W1115 19:24:44.317928 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.reshape
W1115 19:24:44.318895 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.reshape
W1115 19:24:44.319335 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.reshape
W1115 19:24:44.319872 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.reshape
W1115 19:24:44.472469 140291125999424 common.py:398] Attribute T is ignored in relay.sym.reshape
W1115 19:24:44.473307 140291125999424 common.py:398] Attribute Tshape is ignored in relay.sym.reshape
W1115 19:24:44.473892 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.reshape
W1115 19:24:44.474352 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.reshape
W1115 19:24:44.474840 140291125999424 common.py:

W1115 19:24:51.134748 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.power
W1115 19:24:51.135260 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.power
W1115 19:24:51.135749 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.power
W1115 19:24:52.085299 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:24:52.086183 140291125999424 common.py:398] Attribute transpose_a is ignored in relay.sym.dense
W1115 19:24:52.086738 140291125999424 common.py:398] Attribute transpose_b is ignored in relay.sym.dense
W1115 19:24:52.087275 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.dense
W1115 19:24:52.087725 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.dense
W1115 19:24:52.088212 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.dense
W1115 19:24:52.088709 140291125999424 common.py:398] Att

W1115 19:24:57.319272 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:24:57.320222 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.dense
W1115 19:24:57.320666 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.dense
W1115 19:24:57.321197 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.dense
W1115 19:24:57.321700 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.dense
W1115 19:24:58.277302 140291125999424 common.py:398] Attribute T is ignored in relay.sym.tanh
W1115 19:24:58.278198 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.tanh
W1115 19:24:58.278763 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.tanh
W1115 19:24:58.279703 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.tanh
W1115 19:24:58.280139 140291125999424 common.py:398] Attribute _inp

W1115 19:25:04.131224 140291125999424 common.py:398] Attribute Tshape is ignored in relay.sym.reshape
W1115 19:25:04.131764 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.reshape
W1115 19:25:04.132264 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.reshape
W1115 19:25:04.132731 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.reshape
W1115 19:25:04.133174 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.reshape
W1115 19:25:04.298182 140291125999424 common.py:398] Attribute transpose_a is ignored in relay.sym.dense
W1115 19:25:04.299054 140291125999424 common.py:398] Attribute transpose_b is ignored in relay.sym.dense
W1115 19:25:04.299566 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:25:04.300053 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.dense
W1115 19:25:04.300547 140291125999424 common.py:39

W1115 19:25:10.951361 140291125999424 common.py:398] Attribute transpose_b is ignored in relay.sym.dense
W1115 19:25:10.952231 140291125999424 common.py:398] Attribute T is ignored in relay.sym.dense
W1115 19:25:10.952786 140291125999424 common.py:398] Attribute transpose_a is ignored in relay.sym.dense
W1115 19:25:10.953291 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.dense
W1115 19:25:10.953857 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.dense
W1115 19:25:10.954303 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.dense
W1115 19:25:10.954802 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.dense
W1115 19:25:11.613882 140291125999424 common.py:398] Attribute T is ignored in relay.sym.reshape
W1115 19:25:11.614794 140291125999424 common.py:398] Attribute Tshape is ignored in relay.sym.reshape
W1115 19:25:11.615263 140291125999424 common.py:398] Attribute _outpu

W1115 19:25:18.392892 140291125999424 common.py:398] Attribute T is ignored in relay.sym.mean
W1115 19:25:18.393804 140291125999424 common.py:398] Attribute Tidx is ignored in relay.sym.mean
W1115 19:25:18.394377 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.mean
W1115 19:25:18.394893 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.mean
W1115 19:25:18.395347 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.mean
W1115 19:25:18.395801 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.mean
W1115 19:25:18.908644 140291125999424 common.py:398] Attribute Tidx is ignored in relay.sym.mean
W1115 19:25:18.909524 140291125999424 common.py:398] Attribute T is ignored in relay.sym.mean
W1115 19:25:18.910069 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.mean
W1115 19:25:18.910575 140291125999424 common.py:398] Attribute _node_name is ignored in 

W1115 19:25:24.086462 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.mean
W1115 19:25:24.606302 140291125999424 common.py:398] Attribute Tidx is ignored in relay.sym.mean
W1115 19:25:24.607183 140291125999424 common.py:398] Attribute T is ignored in relay.sym.mean
W1115 19:25:24.607733 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.mean
W1115 19:25:24.608699 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.mean
W1115 19:25:24.609138 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.mean
W1115 19:25:24.609680 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.mean
W1115 19:25:24.954646 140291125999424 common.py:398] Attribute T is ignored in relay.sym.power
W1115 19:25:24.955509 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.power
W1115 19:25:24.956034 140291125999424 common.py:398] Attribute _node_name is 

W1115 19:25:31.823310 140291125999424 common.py:398] Attribute _output_shapes is ignored in relay.sym.softmax
W1115 19:25:31.823857 140291125999424 common.py:398] Attribute _node_name is ignored in relay.sym.softmax
W1115 19:25:31.824354 140291125999424 common.py:398] Attribute _target_layout is ignored in relay.sym.softmax
W1115 19:25:31.824795 140291125999424 common.py:398] Attribute _input_shapes is ignored in relay.sym.softmax


Compile the graph and benchmark

In [11]:
target = 'cuda -libs=cublas'
ctx = tvm.gpu(0)
with relay.build_config(opt_level=3):
    graph, lib, new_params = relay.build(mod["main"], target, params=params)

ex = runtime.create(graph, lib, ctx)
ex.set_input(**new_params)
ex.set_input(input_ids=input_ids_data, segment_ids=segment_ids_data, input_mask=input_mask_data)
ex.run()
out = ex.get_output(0)
print(out)

## check correctness
tvm.testing.assert_allclose(out.asnumpy(), tf_out, rtol=1e-5)

W1115 19:25:33.311585 140291125999424 dispatcher.py:541] Cannot find config for target=cuda -libs=cublas, workload=('dense', (1, 768, 'float32'), (2, 768, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.
W1115 19:25:33.319668 140291125999424 dispatcher.py:541] Cannot find config for target=cuda -libs=cublas, workload=('dense', (1, 768, 'float32'), (768, 768, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.
W1115 19:25:33.337387 140291125999424 dispatcher.py:541] Cannot find config for target=cuda -libs=cublas, workload=('dense', (32, 3072, 'float32'), (768, 3072, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.
W1115 19:25:33.348151 140291125999424 dispatcher.py:541] Cannot find config for target=cuda -libs=cublas, workload=('dense', (32, 768, 'float32'), (3072, 768, 'float32'), 0, 'float32'). A fallback configurat

[[0.43802983 0.5619702 ]]


In [12]:
## benchmark
ftimer = ex.module.time_evaluator("run", ctx, min_repeat_ms=2000)
prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
print("TVM latency for seq length %s: %.2f ms" % (seq_length, np.mean(prof_res)))

TVM latency for seq length 32: 3.12 ms
