apache · masahi · Jun 10, 2022 · Jun 9, 2022 · Jun 10, 2022 · Jun 10, 2022
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -84,8 +84,7 @@ tvm_option(PICOJSON_PATH "Path to PicoJSON" "3rdparty/picojson")
 tvm_option(USE_BYODT_POSIT "Build with BYODT software emulated posit custom datatype" OFF)
 tvm_option(USE_BLAS "The blas library to be linked" none)
 tvm_option(USE_MKL "MKL root path when use MKL blas" OFF)
-tvm_option(USE_MKLDNN "Build with MKLDNN" OFF)
-tvm_option(USE_DNNL_CODEGEN "Enable MKLDNN (DNNL) codegen" OFF)
+tvm_option(USE_DNNL "Enable DNNL codegen" OFF)
 tvm_option(USE_CUDNN "Build with cuDNN" OFF)
 tvm_option(USE_CUBLAS "Build with cuBLAS" OFF)
 tvm_option(USE_CUTLASS "Build with CUTLASS" OFF)

diff --git a/cmake/config.cmake b/cmake/config.cmake
@@ -160,8 +160,18 @@ set(USE_BLAS none)
 # set(USE_MKL <path to venv or site-packages directory>) if using `pip install mkl`
 set(USE_MKL OFF)
 
-# Whether use MKLDNN library, choices: ON, OFF, path to mkldnn library
-set(USE_MKLDNN OFF)
+# Whether use DNNL library, aka Intel OneDNN: https://oneapi-src.github.io/oneDNN
+#
+# Now matmul/dense/conv2d supported by -libs=dnnl,
+# and more OP patterns supported in DNNL codegen(json runtime)
+#
+# choices:
+# - ON: Enable DNNL in BYOC and -libs=dnnl, by default using json runtime in DNNL codegen
+# - JSON: same as above.
+# - C_SRC: use c source runtime in DNNL codegen
+# - path/to/oneDNN：oneDNN root path
+# - OFF: Disable DNNL
+set(USE_DNNL OFF)
 
 # Whether use OpenMP thread pool, choices: gnu, intel
 # Note: "gnu" uses gomp library, "intel" uses iomp5 library
@@ -212,9 +222,6 @@ set(USE_ROCBLAS OFF)
 # Whether use contrib sort
 set(USE_SORT ON)
 
-# Whether use MKL-DNN (DNNL) codegen
-set(USE_DNNL_CODEGEN OFF)
-
 # Whether to use Arm Compute Library (ACL) codegen
 # We provide 2 separate flags since we cannot build the ACL runtime on x86.
 # This is useful for cases where you want to cross-compile a relay graph

diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake
@@ -65,7 +65,7 @@ function(add_lib_info src_file)
     TVM_INFO_USE_CUDNN="${USE_CUDNN}"
     TVM_INFO_USE_CUSTOM_LOGGING="${USE_CUSTOM_LOGGING}"
     TVM_INFO_USE_CUTLASS="${USE_CUTLASS}"
-    TVM_INFO_USE_DNNL_CODEGEN="${USE_DNNL_CODEGEN}"
+    TVM_INFO_USE_DNNL="${USE_DNNL}"
     TVM_INFO_USE_ETHOSN="${USE_ETHOSN}"
     TVM_INFO_USE_FALLBACK_STL_MAP="${USE_FALLBACK_STL_MAP}"
     TVM_INFO_USE_GRAPH_EXECUTOR_CUDA_GRAPH="${USE_GRAPH_EXECUTOR_CUDA_GRAPH}"
@@ -85,7 +85,6 @@ function(add_lib_info src_file)
     TVM_INFO_USE_MICRO="${USE_MICRO}"
     TVM_INFO_USE_MIOPEN="${USE_MIOPEN}"
     TVM_INFO_USE_MKL="${USE_MKL}"
-    TVM_INFO_USE_MKLDNN="${USE_MKLDNN}"
     TVM_INFO_USE_MSVC_MT="${USE_MSVC_MT}"
     TVM_INFO_USE_NNPACK="${USE_NNPACK}"
     TVM_INFO_USE_OPENCL="${USE_OPENCL}"

diff --git a/cmake/modules/contrib/BLAS.cmake b/cmake/modules/contrib/BLAS.cmake
@@ -63,32 +63,3 @@ if(USE_MKL OR USE_MKL_PATH)
   add_definitions(-DUSE_MKL_BLAS=1)
   message(STATUS "Use MKL library " ${BLAS_LIBRARY_MKL})
 endif()
-
-if(IS_DIRECTORY ${USE_MKLDNN})
-  find_library(MKLDNN_LIBRARY NAMES dnnl HINTS ${USE_MKLDNN}/lib/)
-  if (MKLDNN_LIBRARY STREQUAL "MKLDNN_LIBRARY-NOTFOUND")
-    message(WARNING "Cannot find MKLDNN library at ${USE_MKLDNN}.")
-  else()
-    include_directories(SYSTEM ${USE_MKLDNN}/include)
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${MKLDNN_LIBRARY})
-    list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/mkldnn.cc)
-    list(APPEND RUNTIME_SRCS src/runtime/contrib/dnnl/dnnl.cc)
-    add_definitions(-DUSE_DNNL=1)
-    message(STATUS "Use MKLDNN library " ${MKLDNN_LIBRARY})
-  endif()
-elseif(USE_MKLDNN STREQUAL "ON")
-  find_library(MKLDNN_LIBRARY dnnl)
-  if (MKLDNN_LIBRARY STREQUAL "MKLDNN_LIBRARY-NOTFOUND")
-    message(WARNING "Cannot find MKLDNN library. Try to specify the path to MKLDNN library.")
-  else()
-    list(APPEND TVM_RUNTIME_LINKER_LIBS ${MKLDNN_LIBRARY})
-    add_definitions(-DUSE_DNNL=1)
-    message(STATUS "Use MKLDNN library " ${MKLDNN_LIBRARY})
-    list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/mkldnn.cc)
-    list(APPEND RUNTIME_SRCS src/runtime/contrib/dnnl/dnnl.cc)
-  endif()
-elseif(USE_MKLDNN STREQUAL "OFF")
-  # pass
-else()
-  message(FATAL_ERROR "Invalid option: USE_MKLDNN=" ${USE_MKLDNN})
-endif()
diff --git a/cmake/modules/contrib/DNNL.cmake b/cmake/modules/contrib/DNNL.cmake
@@ -15,25 +15,49 @@
 # specific language governing permissions and limitations
 # under the License.
 
-if((USE_DNNL_CODEGEN STREQUAL "ON") OR (USE_DNNL_CODEGEN STREQUAL "JSON"))
+if(IS_DIRECTORY ${USE_DNNL})
+  find_library(EXTERN_LIBRARY_DNNL NAMES dnnl HINTS ${USE_DNNL}/lib/)
+  if (EXTERN_LIBRARY_DNNL STREQUAL "EXTERN_LIBRARY_DNNL-NOTFOUND")
+    message(WARNING "Cannot find DNNL library at ${USE_DNNL}.")
+  else()
+    add_definitions(-DUSE_JSON_RUNTIME=1)
+    tvm_file_glob(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
+    list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})
+
+    list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
+    tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl_json_runtime.cc
+                                        src/runtime/contrib/dnnl/dnnl_utils.cc
+                                        src/runtime/contrib/dnnl/dnnl.cc
+                                        src/runtime/contrib/cblas/dnnl_blas.cc)
+    list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
+    message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL})
+  endif()
+elseif((USE_DNNL STREQUAL "ON") OR (USE_DNNL STREQUAL "JSON"))
   add_definitions(-DUSE_JSON_RUNTIME=1)
   tvm_file_glob(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
   list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})
 
   find_library(EXTERN_LIBRARY_DNNL dnnl)
   list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
   tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl_json_runtime.cc
-                                      src/runtime/contrib/dnnl/dnnl_utils.cc)
+                                      src/runtime/contrib/dnnl/dnnl_utils.cc
+                                      src/runtime/contrib/dnnl/dnnl.cc
+                                      src/runtime/contrib/cblas/dnnl_blas.cc)
   list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
   message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL})
-elseif(USE_DNNL_CODEGEN STREQUAL "C_SRC")
+elseif(USE_DNNL STREQUAL "C_SRC")
   tvm_file_glob(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
   list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})
 
   find_library(EXTERN_LIBRARY_DNNL dnnl)
   list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
-  tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl.cc)
+  tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl.cc
+                                      src/runtime/contrib/cblas/dnnl_blas.cc)
   list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
   message(STATUS "Build with DNNL C source module: " ${EXTERN_LIBRARY_DNNL})
+elseif(USE_DNNL STREQUAL "OFF")
+  # pass
+else()
+  message(FATAL_ERROR "Invalid option: USE_DNNL=" ${USE_DNNL})
 endif()
 
diff --git a/docs/dev/how_to/relay_bring_your_own_codegen.rst b/docs/dev/how_to/relay_bring_your_own_codegen.rst
@@ -21,7 +21,7 @@
 Bring Your Own Codegen To TVM
 =============================
 
-As the number of hardware devices targeted by deep learning workloads keeps increasing, the required knowledge for users to achieve high performance on various devices keeps increasing as well. To free data scientists from worrying about the performance when developing a new model, hardware backend providers either provide libraries such as MKLDNN or cuDNN with many commonly used deep learning operators, or provide frameworks such as TensorRT to let users describe their models in a certain way to achieve high performance. However, users have to learn a new programming interface when they attempt to work on a new library or device. As a result, the demand for a unified programming interface becomes more and more important to 1) let all users and hardware backend providers stand on the same page, and 2) provide a feasible solution to allow specialized hardware or library to only support widely used operators with extremely high performance, but fallback unsupported operators to general devices like CPU/GPU.
+As the number of hardware devices targeted by deep learning workloads keeps increasing, the required knowledge for users to achieve high performance on various devices keeps increasing as well. To free data scientists from worrying about the performance when developing a new model, hardware backend providers either provide libraries such as DNNL(Intel OneDNN) or cuDNN with many commonly used deep learning operators, or provide frameworks such as TensorRT to let users describe their models in a certain way to achieve high performance. However, users have to learn a new programming interface when they attempt to work on a new library or device. As a result, the demand for a unified programming interface becomes more and more important to 1) let all users and hardware backend providers stand on the same page, and 2) provide a feasible solution to allow specialized hardware or library to only support widely used operators with extremely high performance, but fallback unsupported operators to general devices like CPU/GPU.
 
 In this developer guide, we demonstrate how you, as a hardware backend provider, can easily implement your own codegen and register it as a Relay backend compiler to support your hardware device/library. This guide covers two types of codegen based on different graph representations you need:
 

diff --git a/python/tvm/contrib/mkldnn.py → python/tvm/contrib/dnnl.py b/python/tvm/contrib/mkldnn.py → python/tvm/contrib/dnnl.py
@@ -46,7 +46,7 @@ def matmul(lhs, rhs, transa=False, transb=False, **kwargs):
         (n, m),
         [lhs, rhs],
         lambda ins, outs: tvm.tir.call_packed(
-            "tvm.contrib.mkldnn.matmul", ins[0], ins[1], outs[0], transa, transb
+            "tvm.contrib.dnnl.matmul", ins[0], ins[1], outs[0], transa, transb
         ),
         name="C",
         **kwargs,
@@ -138,7 +138,7 @@ def dnnl_conv2d(
         out_shape,
         [src, weights],
         lambda ins, outs: tvm.tir.call_packed(
-            "tvm.contrib.mkldnn.conv2d",
+            "tvm.contrib.dnnl.conv2d",
             ins[0],
             ins[1],
             outs[0],

diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py
@@ -120,11 +120,11 @@ def conv2d_strategy_cpu(attrs, inputs, out_type, target):
                     wrap_topi_schedule(topi.x86.schedule_conv2d_nchw_int8),
                     name="conv2d_nchw_int8.x86",
                 )
-            elif "mkldnn" in target.libs:
+            elif "dnnl" in target.libs:
                 strategy.add_implementation(
-                    wrap_compute_conv2d(topi.x86.conv2d_nchw_mkldnn),
-                    wrap_topi_schedule(topi.x86.schedule_conv2d_nchw_mkldnn),
-                    name="conv2d_nchw_mkldnn.x86",
+                    wrap_compute_conv2d(topi.x86.conv2d_nchw_dnnl),
+                    wrap_topi_schedule(topi.x86.schedule_conv2d_nchw_dnnl),
+                    name="conv2d_nchw_dnnl.x86",
                 )
             else:
                 strategy.add_implementation(
@@ -139,11 +139,11 @@ def conv2d_strategy_cpu(attrs, inputs, out_type, target):
             assert kernel_layout == "HWIO"
             if not is_auto_scheduler_enabled():
                 logger.warning("conv2d NHWC layout is not optimized for x86 with autotvm.")
-            if "mkldnn" in target.libs:
+            if "dnnl" in target.libs:
                 strategy.add_implementation(
-                    wrap_compute_conv2d(topi.x86.conv2d_nhwc_mkldnn),
-                    wrap_topi_schedule(topi.x86.schedule_conv2d_nhwc_mkldnn),
-                    name="conv2d_nhwc_mkldnn.x86",
+                    wrap_compute_conv2d(topi.x86.conv2d_nhwc_dnnl),
+                    wrap_topi_schedule(topi.x86.schedule_conv2d_nhwc_dnnl),
+                    name="conv2d_nhwc_dnnl.x86",
                 )
             else:
                 strategy.add_implementation(
@@ -443,18 +443,18 @@ def matmul_strategy_cpu(attrs, inputs, out_type, target):
                 "Currently mkl only support the data type to be float32, float64 or input with "
                 "uint8 and int8 while output wiht int32. Skip."
             )
-    if "mkldnn" in target.libs:
+    if "dnnl" in target.libs:
         length_before = len(strategy.specializations) if strategy.specializations else 0
         with SpecializedCondition(same_type and dtype == "float32"):
             strategy.add_implementation(
-                wrap_compute_matmul(topi.x86.matmul_mkldnn),
-                wrap_topi_schedule(topi.x86.schedule_matmul_mkldnn),
-                name="matmul_mkldnn.x86",
+                wrap_compute_matmul(topi.x86.matmul_dnnl),
+                wrap_topi_schedule(topi.x86.schedule_matmul_dnnl),
+                name="matmul_dnnl.x86",
                 plevel=15,
             )
         length_after = len(strategy.specializations) if strategy.specializations else 0
         if length_before == length_after:
-            logger.warning("Currently mkldnn only support the data type to be float32. Skip.")
+            logger.warning("Currently dnnl only support the data type to be float32. Skip.")
 
     if is_auto_scheduler_enabled():
         strategy.add_implementation(
@@ -464,11 +464,11 @@ def matmul_strategy_cpu(attrs, inputs, out_type, target):
             plevel=11,
         )
     else:
-        # If no cblas/mkl/mkldnn strategy choosed
+        # If no cblas/mkl/dnnl strategy choosed
         if not strategy.specializations:
             logger.warning(
                 "Matmul is not optimized for x86. "
-                "Recommend to use cblas/mkl/mkldnn for better performance."
+                "Recommend to use cblas/mkl/dnnl for better performance."
             )
         strategy.add_implementation(
             wrap_compute_matmul(topi.nn.matmul),
@@ -523,12 +523,12 @@ def dense_strategy_cpu(attrs, inputs, out_type, target):
                 name="dense_mkl.x86",
                 plevel=14,
             )
-    if "mkldnn" in target.libs:
+    if "dnnl" in target.libs:
         with SpecializedCondition(same_type and dtype == "float32"):
             strategy.add_implementation(
-                wrap_compute_dense(topi.x86.dense_mkldnn),
-                wrap_topi_schedule(topi.x86.schedule_dense_mkldnn),
-                name="dense_mkldnn.x86",
+                wrap_compute_dense(topi.x86.dense_dnnl),
+                wrap_topi_schedule(topi.x86.schedule_dense_dnnl),
+                name="dense_dnnl.x86",
                 plevel=15,
             )
     return strategy

diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
@@ -108,6 +108,12 @@ def __init__(self, target, host=None):
             When using a dictionary or json string to configure target, the possible values are
             same as target.
         """
+        if isinstance(target, str) and "-libs=mkldnn" in target:
+            target = target.replace("mkldnn", "dnnl")
+            warnings.warn(
+                "legacy supoort of mkldnn will be eprecated in the next release."
+                " Please replace -libs=mkldnn to -libs=dnnl to enable Intel OneDNN.",
+            )
         if isinstance(target, (dict, str)):
             target = convert(target)
         if isinstance(host, (dict, str)):

diff --git a/python/tvm/topi/x86/conv2d.py b/python/tvm/topi/x86/conv2d.py
@@ -23,7 +23,7 @@
 import tvm
 from tvm import te
 from tvm import autotvm
-from tvm.contrib import mkldnn
+from tvm.contrib import dnnl
 from .. import nn
 from ..generic import schedule_extern
 from ..nn.conv2d import conv2d_infer_layout, _get_workload as _get_conv2d_workload
@@ -269,31 +269,31 @@ def _callback(op):
     return s
 
 
-@autotvm.register_topi_compute("conv2d_nchw_mkldnn.x86")
-def conv2d_nchw_mkldnn(cfg, data, kernel, strides, padding, dilation, out_dtype):
-    """Compute conv2d in NCHW format using mkldnn."""
+@autotvm.register_topi_compute("conv2d_nchw_dnnl.x86")
+def conv2d_nchw_dnnl(cfg, data, kernel, strides, padding, dilation, out_dtype):
+    """Compute conv2d in NCHW format using dnnl."""
     groups = 1
-    _out = mkldnn.dnnl_conv2d(data, kernel, strides, padding, dilation, groups, False, out_dtype)
+    _out = dnnl.dnnl_conv2d(data, kernel, strides, padding, dilation, groups, False, out_dtype)
     return _out
 
 
-@autotvm.register_topi_schedule("conv2d_nchw_mkldnn.x86")
-def schedule_conv2d_nchw_mkldnn(_, outs):
-    """Create schedule for conv2d_nchw_mkldnn"""
+@autotvm.register_topi_schedule("conv2d_nchw_dnnl.x86")
+def schedule_conv2d_nchw_dnnl(_, outs):
+    """Create schedule for conv2d_nchw_dnnl"""
     return schedule_extern(outs)
 
 
-@autotvm.register_topi_compute("conv2d_nhwc_mkldnn.x86")
-def conv2d_nhwc_mkldnn(cfg, data, kernel, strides, padding, dilation, out_dtype):
-    """Compute conv2d in NHWC format using mkldnn."""
+@autotvm.register_topi_compute("conv2d_nhwc_dnnl.x86")
+def conv2d_nhwc_dnnl(cfg, data, kernel, strides, padding, dilation, out_dtype):
+    """Compute conv2d in NHWC format using dnnl."""
     groups = 1
-    _out = mkldnn.dnnl_conv2d(data, kernel, strides, padding, dilation, groups, True, out_dtype)
+    _out = dnnl.dnnl_conv2d(data, kernel, strides, padding, dilation, groups, True, out_dtype)
     return _out
 
 
-@autotvm.register_topi_schedule("conv2d_nhwc_mkldnn.x86")
-def schedule_conv2d_nhwc_mkldnn(_, outs):
-    """Create schedule for conv2d_nhwc_mkldnn"""
+@autotvm.register_topi_schedule("conv2d_nhwc_dnnl.x86")
+def schedule_conv2d_nhwc_dnnl(_, outs):
+    """Create schedule for conv2d_nhwc_dnnl"""
     return schedule_extern(outs)
 
 

diff --git a/python/tvm/topi/x86/dense.py b/python/tvm/topi/x86/dense.py
@@ -24,7 +24,7 @@
 from tvm.autotvm.task.space import SplitEntity
 from tvm.contrib import cblas
 from tvm.contrib import mkl
-from tvm.contrib import mkldnn
+from tvm.contrib import dnnl
 
 from .utils import get_simd_32bit_lanes
 from .. import generic, tag
@@ -424,15 +424,15 @@ def schedule_dense_mkl(_, outs):
     return generic.schedule_extern(outs)
 
 
-@autotvm.register_topi_compute("dense_mkldnn.x86")
-def dense_mkldnn(cfg, data, weight, bias=None, out_dtype=None):
-    """Compute dense using mkldnn. This is an alias of matmul_nt operator."""
-    return matmul_blas_common(cfg, data, weight, bias, out_dtype, False, True, mkldnn)
+@autotvm.register_topi_compute("dense_dnnl.x86")
+def dense_dnnl(cfg, data, weight, bias=None, out_dtype=None):
+    """Compute dense using dnnl. This is an alias of matmul_nt operator."""
+    return matmul_blas_common(cfg, data, weight, bias, out_dtype, False, True, dnnl)
 
 
-@autotvm.register_topi_schedule("dense_mkldnn.x86")
-def schedule_dense_mkldnn(_, outs):
-    """Create schedule for dense_mkldnn. This is an alias of matmul_nt operator."""
+@autotvm.register_topi_schedule("dense_dnnl.x86")
+def schedule_dense_dnnl(_, outs):
+    """Create schedule for dense_dnnl. This is an alias of matmul_nt operator."""
     return generic.schedule_extern(outs)
 
 
@@ -468,17 +468,17 @@ def schedule_matmul_mkl(_, outs):
     return generic.schedule_extern(outs)
 
 
-@autotvm.register_topi_compute("matmul_mkldnn.x86")
-def matmul_mkldnn(
+@autotvm.register_topi_compute("matmul_dnnl.x86")
+def matmul_dnnl(
     cfg, tensor_a, tensor_b, bias=None, out_dtype=None, transpose_a=False, transpose_b=False
 ):
-    """Compute matmul using mkldnn."""
+    """Compute matmul using dnnl."""
     return matmul_blas_common(
-        cfg, tensor_a, tensor_b, bias, out_dtype, transpose_a, transpose_b, mkldnn
+        cfg, tensor_a, tensor_b, bias, out_dtype, transpose_a, transpose_b, dnnl
     )
 
 
-@autotvm.register_topi_schedule("matmul_mkldnn.x86")
-def schedule_matmul_mkldnn(_, outs):
-    """Create schedule for matmul_mkldnn."""
+@autotvm.register_topi_schedule("matmul_dnnl.x86")
+def schedule_matmul_dnnl(_, outs):
+    """Create schedule for matmul_dnnl."""
     return generic.schedule_extern(outs)