apache · tqchen · Oct 4, 2018 · Jul 19, 2018 · Jul 24, 2018 · Jul 25, 2018
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -159,6 +159,9 @@ if(USE_GRAPH_RUNTIME)
   list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_SRCS})
 
   if(USE_GRAPH_RUNTIME_DEBUG)
+    message(STATUS "Build with Graph runtime debug support...")
+    file(GLOB RUNTIME_GRAPH_DEBUG_SRCS src/runtime/graph/debug/*.cc)
+    list(APPEND RUNTIME_SRCS ${RUNTIME_GRAPH_DEBUG_SRCS})
     set_source_files_properties(${RUNTIME_GRAPH_SRCS}
       PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_DEBUG")
   endif(USE_GRAPH_RUNTIME_DEBUG)

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -97,6 +97,7 @@ stage('Build') {
            echo set\\(USE_SORT ON\\) >> config.cmake
            echo set\\(USE_GRAPH_RUNTIME ON\\) >> config.cmake
            echo set\\(USE_STACKVM_RUNTIME ON\\) >> config.cmake
+           echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake
            echo set\\(USE_BLAS openblas\\) >> config.cmake
            echo set\\(CMAKE_CXX_COMPILER g++\\) >> config.cmake
            echo set\\(CMAKE_CXX_FLAGS -Werror\\) >> config.cmake
@@ -111,6 +112,7 @@ stage('Build') {
            echo set\\(USE_OPENCL ON\\) >> config.cmake
            echo set\\(USE_ROCM ON\\) >> config.cmake
            echo set\\(USE_VULKAN ON\\) >> config.cmake
+           echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake
            echo set\\(CMAKE_CXX_COMPILER clang-6.0\\) >> config.cmake
            echo set\\(CMAKE_CXX_FLAGS -Werror\\) >> config.cmake
            """
@@ -127,6 +129,7 @@ stage('Build') {
            cd build
            cp ../cmake/config.cmake .
            echo set\\(USE_SORT ON\\) >> config.cmake
+           echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake
            echo set\\(USE_LLVM llvm-config-4.0\\) >> config.cmake
            echo set\\(CMAKE_CXX_COMPILER g++\\) >> config.cmake
            echo set\\(CMAKE_CXX_FLAGS -Werror\\) >> config.cmake
@@ -150,6 +153,7 @@ stage('Build') {
            cp ../cmake/config.cmake .
            echo set\\(USE_SORT ON\\) >> config.cmake
            echo set\\(USE_RPC ON\\) >> config.cmake
+           echo set\\(USE_GRAPH_RUNTIME_DEBUG ON\\) >> config.cmake
            echo set\\(USE_LLVM llvm-config-5.0\\) >> config.cmake
            echo set\\(CMAKE_CXX_COMPILER g++\\) >> config.cmake
            echo set\\(CMAKE_CXX_FLAGS -Werror\\) >> config.cmake

diff --git a/docs/dev/debugger.rst b/docs/dev/debugger.rst
@@ -0,0 +1,155 @@
+=================
+**Debugger**
+=================
+
+TVM Debugger is an interface for debugging TVM's computation graph execution. It helps to provide access to graph structures and tensor values at the TVM runtime.
+
+*******************************************
+**Debug Exchange Format**
+*******************************************
+
+**1. Computational Graph**
+==========================
+The optimized graph build by nnvm in json
+serialized format is dumped as it is. This contains the whole
+information about the graph. The UX can either use this graph directly
+or transform this graph to the format UX can understand.
+
+The Graph JSON format is explained below
+
+1. ``nodes``
+Nodes are either placeholders or computational nodes in NNVM graph. The nodes are stored
+as a list. A node contains the below information
+
+-     ``op`` - operation type, ``null`` means it is a placeholder/variable/input node and``tvm_op`` means this node can be executed
+-     ``name`` - Name of the node
+-     ``inputs`` - Position of the inputs for this operation, Inputs is a list of tuples with (nodeid, index, version). (Optional)
+-     ``attrs`` - Attributes of the node which contains the following information
+
+    -     ``flatten_data`` - Whether this data need to be flattened before execution
+    -     ``func_name`` - Fused function name, corresponds to the symbol in the lib generated by NNVM compilation process.
+    -     ``num_inputs`` - Number of inputs for this node
+    -     ``num_outputs`` - Number of outputs this node produces
+
+2. ``arg_nodes``
+arg_nodes is a list of indices of nodes which is placeholder/variable/input or constant/param to the graph.
+
+3. ``heads``
+heads is a list of entries as the output of the graph.
+
+4. ``node_row_ptr``
+node\_row\_ptr stores the history of forward path, so you can skip constructing the entire graph in inference tasks.
+
+5. ``attrs``
+attrs can contain version numbers or similar helpful information.
+
+- ``storage_id`` - Memory slot id for each node in the storage layout.
+- ``dtype`` - Datatype of each node (enum value).
+- ``dltype`` - Datatype of each node in order.
+- ``shape`` - Shape of each node k order.
+- ``device_index`` - Device assignment for each entry in the graph.
+
+Example of dumped graph:
+
+::
+
+    {
+      "nodes": [                                    # List of nodes
+        {
+          "op": "null",                             # operation type = null, this is a placeholder/variable/input or constant/param node
+          "name": "x",                              # Name of the argument node
+          "inputs": []                              # inputs for this node, its none since this is an argument node
+        },
+        {
+          "op": "tvm_op",                           # operation type = tvm_op, this node can be executed
+          "name": "relu0",                          # Name of the node
+          "attrs": {                                # Attributes of the node
+            "flatten_data": "0",                    # Whether this data need to be flattened
+            "func_name": "fuse_l2_normalize_relu",  # Fused function name, corresponds to the symbol in the lib generated by NNVM compilation process
+            "num_inputs": "1",                      # Number of inputs for this node
+            "num_outputs": "1"                      # Number of outputs this node produces
+          },
+          "inputs": [[0, 0, 0]]                     # Position of the inputs for this operation
+        }
+      ],
+      "arg_nodes": [0],                             # Which all nodes in this are argument nodes
+      "node_row_ptr": [0, 1, 2],                    # Row indices for faster depth first search
+      "heads": [[1, 0, 0]],                         # Position of the output nodes for this operation
+      "attrs": {                                    # Attributes for the graph
+        "storage_id": ["list_int", [1, 0]],         # memory slot id for each node in the storage layout
+        "dtype": ["list_int", [0, 0]],              # Datatype of each node (enum value)
+        "dltype": ["list_str", [                    # Datatype of each node in order
+            "float32",
+            "float32"]],
+        "shape": ["list_shape", [                   # Shape of each node k order
+            [1, 3, 20, 20],
+            [1, 3, 20, 20]]],
+        "device_index": ["list_int", [1, 1]],       # Device assignment for each node in order
+      }
+    }
+
+**2. Tensor dumping**
+=====================
+
+The tensor received after execution is in ``tvm.ndarray`` type. All the tensors will
+be saved as binary bytes in serialized format.  The result binary bytes can be loaded by the
+API "load_params".
+
+Example of loading the parameters
+   ::
+    with open(path_params, "rb") as fi:
+        loaded_params = bytearray(fi.read())
+
+    module.load_params(loaded_params)
+
+***************************************
+How to use Debugger?
+***************************************
+
+1. In ``config.cmake`` set the ``USE_GRAPH_RUNTIME_DEBUG`` flag to ``ON``
+
+   ::
+
+       # Whether enable additional graph debug functions
+       set(USE_GRAPH_RUNTIME_DEBUG ON)
+
+2. Do 'make' tvm, so that it will make the ``libtvm_runtime.so``
+
+3. In frontend script file instead of
+   ``from tvm.contrib import graph_runtime`` import the
+   ``debug_runtime``
+   ``from tvm.contrib.debugger import debug_runtime as graph_runtime``
+
+::
+
+    from tvm.contrib.debugger import debug_runtime as graph_runtime
+    m = graph_runtime.create(graph, lib, ctx, dump_root="/tmp/tvmdbg")
+    # set inputs
+    m.set_input('data', tvm.nd.array(data.astype(dtype)))
+    m.set_input(**params)
+    # execute
+    m.run()
+    tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy()
+
+The outputs are dumped to a temporary folder in ``/tmp`` folder or the
+folder specified while creating the runtime.
+
+***************************************
+Sample Output
+***************************************
+
+The below is the output of running  ``tvm/nnvm/tutorials/from_onnnx.py`` with debugger.
+
+::
+
+    Node Name               Ops                                                                  Time(us)   Time(%)  Start Time       End Time         Shape                Inputs  Outputs
+    ---------               ---                                                                  --------   -------  ----------       --------         -----                ------  -------
+    1_NCHW1c                fuse___layout_transform___4                                          56.52      0.02     15:24:44.177475  15:24:44.177534  (1, 1, 224, 224)     1       1
+    _contrib_conv2d_nchwc0  fuse__contrib_conv2d_NCHWc                                           12436.11   3.4      15:24:44.177549  15:24:44.189993  (1, 1, 224, 224, 1)  2       1
+    relu0_NCHW8c            fuse___layout_transform___broadcast_add_relu___layout_transform__    4375.43    1.2      15:24:44.190027  15:24:44.194410  (8, 1, 5, 5, 1, 8)   2       1
+    _contrib_conv2d_nchwc1  fuse__contrib_conv2d_NCHWc_1                                         213108.6   58.28    15:24:44.194440  15:24:44.407558  (1, 8, 224, 224, 8)  2       1
+    relu1_NCHW8c            fuse___layout_transform___broadcast_add_relu___layout_transform__    2265.57    0.62     15:24:44.407600  15:24:44.409874  (64, 1, 1)           2       1
+    _contrib_conv2d_nchwc2  fuse__contrib_conv2d_NCHWc_2                                         104623.15  28.61    15:24:44.409905  15:24:44.514535  (1, 8, 224, 224, 8)  2       1
+    relu2_NCHW2c            fuse___layout_transform___broadcast_add_relu___layout_transform___1  2004.77    0.55     15:24:44.514567  15:24:44.516582  (8, 8, 3, 3, 8, 8)   2       1
+    _contrib_conv2d_nchwc3  fuse__contrib_conv2d_NCHWc_3                                         25218.4    6.9      15:24:44.516628  15:24:44.541856  (1, 8, 224, 224, 8)  2       1
+    reshape1                fuse___layout_transform___broadcast_add_reshape_transpose_reshape    1554.25    0.43     15:24:44.541893  15:24:44.543452  (64, 1, 1)           2       1
diff --git a/python/tvm/contrib/debugger/__init__.py b/python/tvm/contrib/debugger/__init__.py
diff --git a/python/tvm/contrib/debugger/debug_result.py b/python/tvm/contrib/debugger/debug_result.py
@@ -0,0 +1,189 @@
+"""Graph debug results dumping class."""
+import os
+import json
+import tvm
+
+GRAPH_DUMP_FILE_NAME = '_tvmdbg_graph_dump.json'
+
+class DebugResult(object):
+    """Graph debug data module.
+
+    Data dump module manage all the debug data formatting.
+    Output data and input graphs are formatted and dumped to file.
+    Frontend read these data and graph for visualization.
+
+    Parameters
+    ----------
+    graph_json : str
+        The graph to be deployed in json format output by nnvm graph. Each operator (tvm_op)
+        in the graph will have a one to one mapping with the symbol in libmod which is used
+        to construct a "PackedFunc" .
+
+    dump_path : str
+        Output data path is read/provided from frontend
+    """
+
+    def __init__(self, graph_json, dump_path):
+        self._dump_path = dump_path
+        self._output_tensor_list = []
+        self._time_list = []
+        self._parse_graph(graph_json)
+        # dump the json information
+        self.dump_graph_json(graph_json)
+
+    def _parse_graph(self, graph_json):
+        """Parse and extract the NNVM graph and update the nodes, shapes and dltype.
+
+        Parameters
+        ----------
+        graph_json : str or graph class
+           The graph to be deployed in json format output by nnvm graph.
+        """
+        json_obj = json.loads(graph_json)
+        self._nodes_list = json_obj['nodes']
+        self._shapes_list = json_obj['attrs']['shape']
+        self._dtype_list = json_obj['attrs']['dltype']
+        self._update_graph_json()
+
+    def _update_graph_json(self):
+        """update the nodes_list with name, shape and data type,
+        for temporarily storing the output.
+        """
+
+        nodes_len = len(self._nodes_list)
+        for i in range(nodes_len):
+            node = self._nodes_list[i]
+            input_list = []
+            for input_node in node['inputs']:
+                input_list.append(self._nodes_list[input_node[0]]['name'])
+            node['inputs'] = input_list
+            dtype = str("type: " + self._dtype_list[1][i])
+            if 'attrs' not in node:
+                node['attrs'] = {}
+                node['op'] = "param"
+            else:
+                node['op'] = node['attrs']['func_name']
+            node['attrs'].update({"T": dtype})
+            node['shape'] = self._shapes_list[1][i]
+
+    def _cleanup_tensors(self):
+        """Remove the tensor dump file (graph wont be removed)
+        """
+        for filename in os.listdir(self._dump_path):
+            if os.path.isfile(filename) and not filename.endswith(".json"):
+                os.remove(filename)
+
+    def get_graph_nodes(self):
+        """Return the nodes list
+        """
+        return self._nodes_list
+
+    def get_graph_node_shapes(self):
+        """Return the nodes shapes list
+        """
+        return self._shapes_list
+
+    def get_graph_node_output_num(self, node):
+        """Return the number of outputs of a node
+        """
+        return 1 if node['op'] == 'param' else int(node['attrs']['num_outputs'])
+
+    def get_graph_node_dtypes(self):
+        """Return the nodes dtype list
+        """
+        return self._dtype_list
+
+    def dump_output_tensor(self):
+        """Dump the outputs to a temporary folder, the tensors are in numpy format
+        """
+        #cleanup existing tensors before dumping
+        self._cleanup_tensors()
+        eid = 0
+        order = 0
+        output_tensors = {}
+        for node, time in zip(self._nodes_list, self._time_list):
+            num_outputs = self.get_graph_node_output_num(node)
+            for j in range(num_outputs):
+                order += time[0]
+                key = node['name'] + "_" + str(j) + "__" + str(order)
+                output_tensors[key] = self._output_tensor_list[eid]
+                eid += 1
+
+        with open(os.path.join(self._dump_path, "output_tensors.params"), "wb") as param_f:
+            param_f.write(save_tensors(output_tensors))
+
+    def dump_graph_json(self, graph):
+        """Dump json formatted graph.
+
+        Parameters
+        ----------
+        graph : json format
+            json formatted NNVM graph contain list of each node's
+            name, shape and type.
+        """
+        graph_dump_file_name = GRAPH_DUMP_FILE_NAME
+        with open(os.path.join(self._dump_path, graph_dump_file_name), 'w') as outfile:
+            json.dump(graph, outfile, indent=4, sort_keys=False)
+
+    def display_debug_result(self):
+        """Displays the debugger result"
+        """
+        header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Start Time", \
+                    "End Time", "Shape", "Inputs", "Outputs"]
+        lines = ["---------", "---", "--------", "-------", "----------", \
+                    "--------", "-----", "------", "-------"]
+        eid = 0
+        data = []
+        total_time = sum(time[0] for time in self._time_list)
+        for node, time in zip(self._nodes_list, self._time_list):
+            num_outputs = self.get_graph_node_output_num(node)
+            for j in range(num_outputs):
+                op = node['op']
+                if node['op'] == 'param':
+                    continue
+                name = node['name']
+                shape = str(self._output_tensor_list[eid].shape)
+                time_us = round(time[0] * 1000000, 2)
+                time_percent = round(((time[0] / total_time) * 100), 2)
+                inputs = str(node['attrs']['num_inputs'])
+                outputs = str(node['attrs']['num_outputs'])
+                node_data = [name, op, time_us, time_percent, str(time[1]), str(time[2]), \
+                             shape, inputs, outputs]
+                data.append(node_data)
+                eid += 1
+        fmt = ""
+        for i, _ in enumerate(header):
+            max_len = len(header[i])
+            for j, _ in enumerate(data):
+                item_len = len(str(data[j][i]))
+                if item_len > max_len:
+                    max_len = item_len
+            fmt = fmt + "{:<" + str(max_len + 2) + "}"
+        print(fmt.format(*header))
+        print(fmt.format(*lines))
+        for row in data:
+            print(fmt.format(*row))
+
+def save_tensors(params):
+    """Save parameter dictionary to binary bytes.
+
+    The result binary bytes can be loaded by the
+    GraphModule with API "load_params".
+
+    Parameters
+    ----------
+    params : dict of str to NDArray
+        The parameter dictionary.
+
+    Returns
+    -------
+    param_bytes: bytearray
+        Serialized parameters.
+    """
+    _save_tensors = tvm.get_global_func("_save_param_dict")
+
+    args = []
+    for k, v in params.items():
+        args.append(k)
+        args.append(tvm.nd.array(v))
+    return _save_tensors(*args)