apache · masahi · Jun 21, 2021 · May 14, 2021 · May 17, 2021 · May 19, 2021
diff --git a/include/tvm/runtime/data_type.h b/include/tvm/runtime/data_type.h
@@ -389,4 +389,19 @@ inline DLDataType String2DLDataType(std::string s) {
 using DataType = runtime::DataType;
 
 }  // namespace tvm
+
+namespace std {
+template <>
+struct hash<tvm::DataType> {
+  inline int cantor_pairing_function(int a, int b) const { return (a + b) * (a + b + 1) / 2 + b; }
+  std::size_t operator()(tvm::DataType const& dtype) const {
+    int a = dtype.code();
+    int b = dtype.bits();
+    int c = dtype.lanes();
+    int d = cantor_pairing_function(a, b);
+    return cantor_pairing_function(c, d);
+  }
+};
+}  // namespace std
+
 #endif  //  TVM_RUNTIME_DATA_TYPE_H_
diff --git a/python/tvm/relay/op/__init__.py b/python/tvm/relay/op/__init__.py
@@ -29,6 +29,7 @@
     debug,
     register_external_compiler,
     register_fake_quantization_to_integer,
+    register_mixed_precision_conversion,
 )
 from . import strategy
 

diff --git a/python/tvm/relay/op/op.py b/python/tvm/relay/op/op.py
@@ -18,10 +18,11 @@
 """The base node types for the Relay language."""
 import tvm._ffi
 import tvm.ir
-from tvm.driver import lower, build
-from tvm.target import get_native_generic_func, GenericFunc
-from tvm.runtime import Object
 import tvm.ir._ffi_api
+from tvm.driver import build, lower
+from tvm.runtime import Object
+from tvm.target import GenericFunc, get_native_generic_func
+
 from . import _make
 
 
@@ -457,6 +458,32 @@ def register_fake_quantization_to_integer(op_name, func=None, level=10):
     return tvm.ir.register_op_attr(op_name, "FTVMFakeQuantizationToInteger", func, level)
 
 
+def register_mixed_precision_conversion(op_name, func=None, level=10):
+    """Register mixed precision conversion function for an op
+
+    Given an op the function should return information on how the value should be
+    converted. Specifically the function should take a call node and the target
+    mixed precision datatype (e.g. FP16) and return the conversion category
+    (see python/tvm/relay/transform/mixed_precision.py) as well as the accumulation
+    and output datatype of the operation in the mixed precision dtype space.
+
+    Parameters
+    ----------
+    op_name : str
+        The name of the operator
+
+    func: function (call_node: relay.Call, target_dtype: string)
+    -> [conversion category, accumulation dtype, output dtype]: [int, string, string]
+        A function which given a call_node and target_dtype (e.g. FP16) returns the
+        conversion category and associated accumulation/output of the operation
+        when transformed into the mixed precision dtype space.
+
+    level : int
+        The priority level
+    """
+    return tvm.ir.register_op_attr(op_name, "FTVMMixedPrecisionConversionType", func, level)
+
+
 @tvm._ffi.register_func("relay.op.compiler._lower")
 def _lower(name, schedule, inputs, outputs):
     return lower(schedule, list(inputs) + list(outputs), name=name)

diff --git a/python/tvm/relay/transform/mixed_precision.py b/python/tvm/relay/transform/mixed_precision.py
@@ -0,0 +1,195 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=line-too-long,unused-argument
+"""Default behavior for ops in mixed_precision pass. Import this file to use."""
+from typing import List
+
+from tvm import relay
+from tvm.relay.op import register_mixed_precision_conversion
+
+# MIXED_PRECISION_ALWAYS ops should always be done in lower precision due to the speed and memory
+# savings. MIXED_PRECISION_FOLLOW ops can be done in lower precision but don't have speedups to
+# justify a cast. MIXED_PRECISION_NEVER colored ops should not be done in lower precision due to
+# numerical reasons.
+MIXED_PRECISION_ALWAYS = 0
+MIXED_PRECISION_FOLLOW = 1
+MIXED_PRECISION_NEVER = 2
+
+# Default lists inspired from TF's classifications:
+# github.com/tensorflow/tensorflow/blob/v2.5.0/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h
+# They have a bias toward Nvidia Tensor Cores so modify lists per your hardware choice.
+DEFAULT_ALWAYS_LIST = [
+    "nn.conv1d",
+    "nn.conv2d",
+    "nn.conv3d",
+    "nn.conv1d_transpose",
+    "nn.conv2d_transpose",
+    "nn.conv3d_transpose",
+    "nn.dense",
+    # "nn.batch_matmul", # Handled by a special case
+]
+DEFAULT_FOLLOW_LIST = [
+    # These ops add new data or change shape
+    "nn.pad",
+    "nn.batch_flatten",
+    "concatenate",
+    "zeros",
+    "split",
+    "squeeze",
+    "transpose",
+    "expand_dims",
+    "reshape",
+    "dyn.reshape",
+    "broadcast_to_like",
+    "dyn.broadcast_to",
+    "strided_slice",
+    "dyn.strided_slice",
+    "take",
+    "argwhere",
+    "where",
+    "tile",
+    "dyn.tile",
+    "scatter",
+    "full",
+    "dyn.full",
+    # Comparison
+    "less",
+    "greater",
+    "less_equal",
+    "greater_equal",
+    # By definition copy and cast will depend on inputs for output.
+    "copy",
+    "cast",
+    "cast_like",
+    # Simple arithmetic
+    "add",
+    "subtract",
+    "multiply",
+    "divide",
+    "nn.bias_add",
+    "nn.batch_norm",
+    "sum",
+    "mean",
+    "sqrt",
+    "shape_of",
+    # Simple activations
+    "max",
+    "min",
+    "maximum",
+    "minimum",
+    "nn.relu",
+    "nn.leaky_relu",
+    "nn.prelu",
+    "nn.dropout",
+    # Complicated activations which saturate in a narrow range
+    "sigmoid",
+    "tanh",
+    # Pooling operations
+    "nn.max_pool1d",
+    "nn.max_pool2d",
+    "nn.max_pool3d",
+    "nn.avg_pool1d",
+    "nn.avg_pool2d",
+    "nn.avg_pool3d",
+    # "nn.global_max_pool1d", # does not exist yet
+    "nn.global_max_pool2d",
+    # "nn.global_max_pool3d", # does not exist yet
+    # "nn.global_avg_pool1d", # does not exist yet
+    "nn.global_avg_pool2d",
+    # "nn.global_avg_pool3d", # does not exist yet
+    "nn.adaptive_max_pool1d",
+    "nn.adaptive_max_pool2d",
+    "nn.adaptive_max_pool3d",
+    "nn.adaptive_avg_pool1d",
+    "nn.adaptive_avg_pool2d",
+    "nn.adaptive_avg_pool3d",
+]
+DEFAULT_NEVER_LIST = [
+    # In general if |f(x)| >> |x| for expected inputs then put the op here.
+    "exp",
+    "power",
+    "nn.cross_entropy",
+    "nn.cross_entropy_with_logits",
+    "nn.softmax",
+    "nn.l2_normalize",
+    # Error function doesn't seem to be able to be lowered into fp16 version in llvm.
+    # Move to follow list when it does.
+    "erf",
+]
+
+
+# Returns a decorator which registers for every given op, the function under FTVMMixedPrecisionConversionType
+def register_func_to_op_list(list_ops: List):
+    def decorator(func):
+        for op_name in list_ops:
+            register_mixed_precision_conversion(op_name, func=func)
+
+    return decorator
+
+
+def get_generic_out_dtypes(call_node: relay.Call, mixed_precision_type: str) -> List[str]:
+    """A function which returns output dtypes in a way which works for most ops.
+
+    Parameters
+    ---------
+    call_node: relay.Call
+        The call node containing the op.
+    mixed_precision_type: str
+        The target type to run the operation in.
+    Returns
+    -------
+    output_dtypes : [str, str]
+        A list of two strings. The first represents the datatype used for accumulation
+        in the operation. The second represents the actual output datatype.
+    """
+    # Assume support accumulation dtypes <---> has out_dtype attr.
+    # This is because there is no better way right now to tell which ops support accumulating
+    # at different data types.
+    # Some discussion here about making this better is here:
+    # https://discuss.tvm.apache.org/t/rfc-relay-fp32-fp16-model-support/9994/4?u=andrewzhaoluo
+    if hasattr(call_node.attrs, "out_dtype"):
+        return ["float32", mixed_precision_type]
+
+    # [accumulation_dtype, output_dtype] for the operations
+    return [mixed_precision_type, mixed_precision_type]
+
+
+# Functions for FTVMMixedPrecisionConversionType which
+# Take in CallNodes and a DType and returns a conversion type,
+# an accumulation dtype, and an output_dtype.
+@register_func_to_op_list(list_ops=DEFAULT_ALWAYS_LIST)
+def generic_always_op(call_node: relay.Call, mixed_precision_type: str) -> List:
+    return [MIXED_PRECISION_ALWAYS] + get_generic_out_dtypes(call_node, mixed_precision_type)
+
+
+@register_func_to_op_list(list_ops=DEFAULT_FOLLOW_LIST)
+def generic_follow_op(call_node: relay.Call, mixed_precision_type: str) -> List:
+    return [MIXED_PRECISION_FOLLOW] + get_generic_out_dtypes(call_node, mixed_precision_type)
+
+
+@register_func_to_op_list(list_ops=DEFAULT_NEVER_LIST)
+def generic_never_op(call_node: relay.Call, mixed_precision_type: str) -> List:
+    return [MIXED_PRECISION_NEVER] + get_generic_out_dtypes(call_node, mixed_precision_type)
+
+
+@register_mixed_precision_conversion("nn.batch_matmul")
+def nn_batch_matmul(call_node: relay.Call, mixed_precision_type: str) -> List:
+    # TODO(AndrewZhaoLuo): remove when batch_matmul handles accumulation dtypes well.
+    # Batched matmul has inconsistent support for mixed precision operations.
+    # Many schedules ignore the out_dtype attribute which leads to errors when
+    # input types do not match the out_dtype. Therefore, accumulate to output_dtype.
+    return [MIXED_PRECISION_ALWAYS, "float16", "float16"]
diff --git a/python/tvm/relay/transform/transform.py b/python/tvm/relay/transform/transform.py
@@ -18,16 +18,15 @@
 """
 Relay pass transformation infrastructure.
 """
-import types
-import inspect
 import functools
+import inspect
+import types
 import warnings
 
 import tvm.ir
-from tvm import te
+from tvm import relay, te
 from tvm.runtime import ndarray as _nd
 
-from tvm import relay
 from . import _ffi_api
 
 
@@ -1168,7 +1167,7 @@ def AnnotateSpans():
     Returns
     -------
     ret : tvm.transform.Pass
-        The regsistered AnnotateSpans pass.
+        The registered AnnotateSpans pass.
     """
     return _ffi_api.AnnotateSpans()
 
@@ -1199,3 +1198,29 @@ def FakeQuantizationToInteger():
         The registered SimplifyExpr pass.
     """
     return _ffi_api.FakeQuantizationToInteger()
+
+
+def ToMixedPrecision(mixed_precision_type="float16", missing_op_mode=1):
+    """
+    Automatic mixed precision rewriter. Rewrite an FP32 relay graph into a version
+    where as many operations as possible are in the target mixed_precision_type.
+
+    Parameters
+    ----------
+    mixed_precision_type: str
+      The target datatype to transform operations in the graph to use.
+
+    missing_op_mode: int
+      Determines how to handle ops not registered with FTVMMixedPrecisionConversionType
+        0: Does not allow any missing ops. Will throw errors when encountering any.
+        1: Allow missing ops but emit warnings.
+        2: Allow missing ops and silently ignore them.
+
+    Returns
+    -------
+    ret : tvm.transform.Pass
+        The registered pass.
+    """
+    if missing_op_mode < 0 or missing_op_mode > 2:
+        raise ValueError("Missing op mode is either 0, 1, or 2")
+    return _ffi_api.ToMixedPrecision(mixed_precision_type, missing_op_mode)
diff --git a/python/tvm/topi/nn/conv2d.py b/python/tvm/topi/nn/conv2d.py
@@ -18,13 +18,15 @@
 # pylint: disable=unused-argument, redefined-builtin
 """Conv2D operators"""
 from __future__ import absolute_import as _abs
+
 from collections import namedtuple
+
 import tvm
-from tvm import te, auto_scheduler
+from tvm import auto_scheduler, te
 
+from ..utils import get_const_int, get_const_tuple, simplify, tag
 from .pad import pad
 from .utils import get_pad_tuple
-from ..utils import simplify, get_const_tuple, get_const_int, tag
 from .winograd_util import winograd_transform_matrices
 
 # workload description of conv2d
@@ -548,7 +550,9 @@ def conv2d_NCHWc(data, kernel, stride, padding, dilation, layout, out_layout, ou
                 ow * WSTR + kw * dilation_w,
                 idxmod(ic, ic_bn),
             ].astype(out_dtype)
-            * kernel[oc_chunk, idxdiv(ic, ic_bn), kh, kw, idxmod(ic, ic_bn), oc_block],
+            * kernel[oc_chunk, idxdiv(ic, ic_bn), kh, kw, idxmod(ic, ic_bn), oc_block].astype(
+                out_dtype
+            ),
             axis=[ic, kh, kw],
         ),
         name="conv2d_NCHWc",