Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DNNL][CBLAS][BYOC] Unifles all MKLDNN/DNNL to DNNL #11638

Merged
merged 4 commits into from
Jun 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,7 @@ tvm_option(PICOJSON_PATH "Path to PicoJSON" "3rdparty/picojson")
tvm_option(USE_BYODT_POSIT "Build with BYODT software emulated posit custom datatype" OFF)
tvm_option(USE_BLAS "The blas library to be linked" none)
tvm_option(USE_MKL "MKL root path when use MKL blas" OFF)
tvm_option(USE_MKLDNN "Build with MKLDNN" OFF)
tvm_option(USE_DNNL_CODEGEN "Enable MKLDNN (DNNL) codegen" OFF)
tvm_option(USE_DNNL "Enable DNNL codegen" OFF)
tvm_option(USE_CUDNN "Build with cuDNN" OFF)
tvm_option(USE_CUBLAS "Build with cuBLAS" OFF)
tvm_option(USE_CUTLASS "Build with CUTLASS" OFF)
Expand Down
17 changes: 12 additions & 5 deletions cmake/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,18 @@ set(USE_BLAS none)
# set(USE_MKL <path to venv or site-packages directory>) if using `pip install mkl`
set(USE_MKL OFF)

# Whether use MKLDNN library, choices: ON, OFF, path to mkldnn library
set(USE_MKLDNN OFF)
# Whether use DNNL library, aka Intel OneDNN: https://oneapi-src.github.io/oneDNN
#
# Now matmul/dense/conv2d supported by -libs=dnnl,
# and more OP patterns supported in DNNL codegen(json runtime)
#
# choices:
# - ON: Enable DNNL in BYOC and -libs=dnnl, by default using json runtime in DNNL codegen
# - JSON: same as above.
# - C_SRC: use c source runtime in DNNL codegen
# - path/to/oneDNN:oneDNN root path
# - OFF: Disable DNNL
set(USE_DNNL OFF)

# Whether use OpenMP thread pool, choices: gnu, intel
# Note: "gnu" uses gomp library, "intel" uses iomp5 library
Expand Down Expand Up @@ -212,9 +222,6 @@ set(USE_ROCBLAS OFF)
# Whether use contrib sort
set(USE_SORT ON)

# Whether use MKL-DNN (DNNL) codegen
set(USE_DNNL_CODEGEN OFF)

# Whether to use Arm Compute Library (ACL) codegen
# We provide 2 separate flags since we cannot build the ACL runtime on x86.
# This is useful for cases where you want to cross-compile a relay graph
Expand Down
3 changes: 1 addition & 2 deletions cmake/modules/LibInfo.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ function(add_lib_info src_file)
TVM_INFO_USE_CUDNN="${USE_CUDNN}"
TVM_INFO_USE_CUSTOM_LOGGING="${USE_CUSTOM_LOGGING}"
TVM_INFO_USE_CUTLASS="${USE_CUTLASS}"
TVM_INFO_USE_DNNL_CODEGEN="${USE_DNNL_CODEGEN}"
TVM_INFO_USE_DNNL="${USE_DNNL}"
TVM_INFO_USE_ETHOSN="${USE_ETHOSN}"
TVM_INFO_USE_FALLBACK_STL_MAP="${USE_FALLBACK_STL_MAP}"
TVM_INFO_USE_GRAPH_EXECUTOR_CUDA_GRAPH="${USE_GRAPH_EXECUTOR_CUDA_GRAPH}"
Expand All @@ -85,7 +85,6 @@ function(add_lib_info src_file)
TVM_INFO_USE_MICRO="${USE_MICRO}"
TVM_INFO_USE_MIOPEN="${USE_MIOPEN}"
TVM_INFO_USE_MKL="${USE_MKL}"
TVM_INFO_USE_MKLDNN="${USE_MKLDNN}"
TVM_INFO_USE_MSVC_MT="${USE_MSVC_MT}"
TVM_INFO_USE_NNPACK="${USE_NNPACK}"
TVM_INFO_USE_OPENCL="${USE_OPENCL}"
Expand Down
29 changes: 0 additions & 29 deletions cmake/modules/contrib/BLAS.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -63,32 +63,3 @@ if(USE_MKL OR USE_MKL_PATH)
add_definitions(-DUSE_MKL_BLAS=1)
message(STATUS "Use MKL library " ${BLAS_LIBRARY_MKL})
endif()

if(IS_DIRECTORY ${USE_MKLDNN})
find_library(MKLDNN_LIBRARY NAMES dnnl HINTS ${USE_MKLDNN}/lib/)
if (MKLDNN_LIBRARY STREQUAL "MKLDNN_LIBRARY-NOTFOUND")
message(WARNING "Cannot find MKLDNN library at ${USE_MKLDNN}.")
else()
include_directories(SYSTEM ${USE_MKLDNN}/include)
list(APPEND TVM_RUNTIME_LINKER_LIBS ${MKLDNN_LIBRARY})
list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/mkldnn.cc)
list(APPEND RUNTIME_SRCS src/runtime/contrib/dnnl/dnnl.cc)
add_definitions(-DUSE_DNNL=1)
message(STATUS "Use MKLDNN library " ${MKLDNN_LIBRARY})
endif()
elseif(USE_MKLDNN STREQUAL "ON")
find_library(MKLDNN_LIBRARY dnnl)
if (MKLDNN_LIBRARY STREQUAL "MKLDNN_LIBRARY-NOTFOUND")
message(WARNING "Cannot find MKLDNN library. Try to specify the path to MKLDNN library.")
else()
list(APPEND TVM_RUNTIME_LINKER_LIBS ${MKLDNN_LIBRARY})
add_definitions(-DUSE_DNNL=1)
message(STATUS "Use MKLDNN library " ${MKLDNN_LIBRARY})
list(APPEND RUNTIME_SRCS src/runtime/contrib/cblas/mkldnn.cc)
list(APPEND RUNTIME_SRCS src/runtime/contrib/dnnl/dnnl.cc)
endif()
elseif(USE_MKLDNN STREQUAL "OFF")
# pass
else()
message(FATAL_ERROR "Invalid option: USE_MKLDNN=" ${USE_MKLDNN})
endif()
32 changes: 28 additions & 4 deletions cmake/modules/contrib/DNNL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,49 @@
# specific language governing permissions and limitations
# under the License.

if((USE_DNNL_CODEGEN STREQUAL "ON") OR (USE_DNNL_CODEGEN STREQUAL "JSON"))
if(IS_DIRECTORY ${USE_DNNL})
find_library(EXTERN_LIBRARY_DNNL NAMES dnnl HINTS ${USE_DNNL}/lib/)
if (EXTERN_LIBRARY_DNNL STREQUAL "EXTERN_LIBRARY_DNNL-NOTFOUND")
message(WARNING "Cannot find DNNL library at ${USE_DNNL}.")
else()
add_definitions(-DUSE_JSON_RUNTIME=1)
tvm_file_glob(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})

list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl_json_runtime.cc
src/runtime/contrib/dnnl/dnnl_utils.cc
src/runtime/contrib/dnnl/dnnl.cc
src/runtime/contrib/cblas/dnnl_blas.cc)
list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL})
endif()
elseif((USE_DNNL STREQUAL "ON") OR (USE_DNNL STREQUAL "JSON"))
add_definitions(-DUSE_JSON_RUNTIME=1)
tvm_file_glob(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})

find_library(EXTERN_LIBRARY_DNNL dnnl)
list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl_json_runtime.cc
src/runtime/contrib/dnnl/dnnl_utils.cc)
src/runtime/contrib/dnnl/dnnl_utils.cc
src/runtime/contrib/dnnl/dnnl.cc
src/runtime/contrib/cblas/dnnl_blas.cc)
list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
message(STATUS "Build with DNNL JSON runtime: " ${EXTERN_LIBRARY_DNNL})
elseif(USE_DNNL_CODEGEN STREQUAL "C_SRC")
elseif(USE_DNNL STREQUAL "C_SRC")
tvm_file_glob(GLOB DNNL_RELAY_CONTRIB_SRC src/relay/backend/contrib/dnnl/*.cc)
list(APPEND COMPILER_SRCS ${DNNL_RELAY_CONTRIB_SRC})

find_library(EXTERN_LIBRARY_DNNL dnnl)
list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_LIBRARY_DNNL})
tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl.cc)
tvm_file_glob(GLOB DNNL_CONTRIB_SRC src/runtime/contrib/dnnl/dnnl.cc
src/runtime/contrib/cblas/dnnl_blas.cc)
list(APPEND RUNTIME_SRCS ${DNNL_CONTRIB_SRC})
message(STATUS "Build with DNNL C source module: " ${EXTERN_LIBRARY_DNNL})
elseif(USE_DNNL STREQUAL "OFF")
# pass
else()
message(FATAL_ERROR "Invalid option: USE_DNNL=" ${USE_DNNL})
endif()

2 changes: 1 addition & 1 deletion docs/dev/how_to/relay_bring_your_own_codegen.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
Bring Your Own Codegen To TVM
=============================

As the number of hardware devices targeted by deep learning workloads keeps increasing, the required knowledge for users to achieve high performance on various devices keeps increasing as well. To free data scientists from worrying about the performance when developing a new model, hardware backend providers either provide libraries such as MKLDNN or cuDNN with many commonly used deep learning operators, or provide frameworks such as TensorRT to let users describe their models in a certain way to achieve high performance. However, users have to learn a new programming interface when they attempt to work on a new library or device. As a result, the demand for a unified programming interface becomes more and more important to 1) let all users and hardware backend providers stand on the same page, and 2) provide a feasible solution to allow specialized hardware or library to only support widely used operators with extremely high performance, but fallback unsupported operators to general devices like CPU/GPU.
As the number of hardware devices targeted by deep learning workloads keeps increasing, the required knowledge for users to achieve high performance on various devices keeps increasing as well. To free data scientists from worrying about the performance when developing a new model, hardware backend providers either provide libraries such as DNNL(Intel OneDNN) or cuDNN with many commonly used deep learning operators, or provide frameworks such as TensorRT to let users describe their models in a certain way to achieve high performance. However, users have to learn a new programming interface when they attempt to work on a new library or device. As a result, the demand for a unified programming interface becomes more and more important to 1) let all users and hardware backend providers stand on the same page, and 2) provide a feasible solution to allow specialized hardware or library to only support widely used operators with extremely high performance, but fallback unsupported operators to general devices like CPU/GPU.

In this developer guide, we demonstrate how you, as a hardware backend provider, can easily implement your own codegen and register it as a Relay backend compiler to support your hardware device/library. This guide covers two types of codegen based on different graph representations you need:

Expand Down
4 changes: 2 additions & 2 deletions python/tvm/contrib/mkldnn.py → python/tvm/contrib/dnnl.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def matmul(lhs, rhs, transa=False, transb=False, **kwargs):
(n, m),
[lhs, rhs],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.mkldnn.matmul", ins[0], ins[1], outs[0], transa, transb
"tvm.contrib.dnnl.matmul", ins[0], ins[1], outs[0], transa, transb
),
name="C",
**kwargs,
Expand Down Expand Up @@ -138,7 +138,7 @@ def dnnl_conv2d(
out_shape,
[src, weights],
lambda ins, outs: tvm.tir.call_packed(
"tvm.contrib.mkldnn.conv2d",
"tvm.contrib.dnnl.conv2d",
ins[0],
ins[1],
outs[0],
Expand Down
38 changes: 19 additions & 19 deletions python/tvm/relay/op/strategy/x86.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ def conv2d_strategy_cpu(attrs, inputs, out_type, target):
wrap_topi_schedule(topi.x86.schedule_conv2d_nchw_int8),
name="conv2d_nchw_int8.x86",
)
elif "mkldnn" in target.libs:
elif "dnnl" in target.libs:
strategy.add_implementation(
wrap_compute_conv2d(topi.x86.conv2d_nchw_mkldnn),
wrap_topi_schedule(topi.x86.schedule_conv2d_nchw_mkldnn),
name="conv2d_nchw_mkldnn.x86",
wrap_compute_conv2d(topi.x86.conv2d_nchw_dnnl),
wrap_topi_schedule(topi.x86.schedule_conv2d_nchw_dnnl),
name="conv2d_nchw_dnnl.x86",
)
else:
strategy.add_implementation(
Expand All @@ -139,11 +139,11 @@ def conv2d_strategy_cpu(attrs, inputs, out_type, target):
assert kernel_layout == "HWIO"
if not is_auto_scheduler_enabled():
logger.warning("conv2d NHWC layout is not optimized for x86 with autotvm.")
if "mkldnn" in target.libs:
if "dnnl" in target.libs:
strategy.add_implementation(
wrap_compute_conv2d(topi.x86.conv2d_nhwc_mkldnn),
wrap_topi_schedule(topi.x86.schedule_conv2d_nhwc_mkldnn),
name="conv2d_nhwc_mkldnn.x86",
wrap_compute_conv2d(topi.x86.conv2d_nhwc_dnnl),
wrap_topi_schedule(topi.x86.schedule_conv2d_nhwc_dnnl),
name="conv2d_nhwc_dnnl.x86",
)
else:
strategy.add_implementation(
Expand Down Expand Up @@ -443,18 +443,18 @@ def matmul_strategy_cpu(attrs, inputs, out_type, target):
"Currently mkl only support the data type to be float32, float64 or input with "
"uint8 and int8 while output wiht int32. Skip."
)
if "mkldnn" in target.libs:
if "dnnl" in target.libs:
length_before = len(strategy.specializations) if strategy.specializations else 0
with SpecializedCondition(same_type and dtype == "float32"):
strategy.add_implementation(
wrap_compute_matmul(topi.x86.matmul_mkldnn),
wrap_topi_schedule(topi.x86.schedule_matmul_mkldnn),
name="matmul_mkldnn.x86",
wrap_compute_matmul(topi.x86.matmul_dnnl),
wrap_topi_schedule(topi.x86.schedule_matmul_dnnl),
name="matmul_dnnl.x86",
plevel=15,
)
length_after = len(strategy.specializations) if strategy.specializations else 0
if length_before == length_after:
logger.warning("Currently mkldnn only support the data type to be float32. Skip.")
logger.warning("Currently dnnl only support the data type to be float32. Skip.")

if is_auto_scheduler_enabled():
strategy.add_implementation(
Expand All @@ -464,11 +464,11 @@ def matmul_strategy_cpu(attrs, inputs, out_type, target):
plevel=11,
)
else:
# If no cblas/mkl/mkldnn strategy choosed
# If no cblas/mkl/dnnl strategy choosed
if not strategy.specializations:
logger.warning(
"Matmul is not optimized for x86. "
"Recommend to use cblas/mkl/mkldnn for better performance."
"Recommend to use cblas/mkl/dnnl for better performance."
)
strategy.add_implementation(
wrap_compute_matmul(topi.nn.matmul),
Expand Down Expand Up @@ -523,12 +523,12 @@ def dense_strategy_cpu(attrs, inputs, out_type, target):
name="dense_mkl.x86",
plevel=14,
)
if "mkldnn" in target.libs:
if "dnnl" in target.libs:
with SpecializedCondition(same_type and dtype == "float32"):
strategy.add_implementation(
wrap_compute_dense(topi.x86.dense_mkldnn),
wrap_topi_schedule(topi.x86.schedule_dense_mkldnn),
name="dense_mkldnn.x86",
wrap_compute_dense(topi.x86.dense_dnnl),
wrap_topi_schedule(topi.x86.schedule_dense_dnnl),
name="dense_dnnl.x86",
plevel=15,
)
return strategy
Expand Down
6 changes: 6 additions & 0 deletions python/tvm/target/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ def __init__(self, target, host=None):
When using a dictionary or json string to configure target, the possible values are
same as target.
"""
if isinstance(target, str) and "-libs=mkldnn" in target:
target = target.replace("mkldnn", "dnnl")
warnings.warn(
"legacy supoort of mkldnn will be eprecated in the next release."
" Please replace -libs=mkldnn to -libs=dnnl to enable Intel OneDNN.",
)
if isinstance(target, (dict, str)):
target = convert(target)
if isinstance(host, (dict, str)):
Expand Down
30 changes: 15 additions & 15 deletions python/tvm/topi/x86/conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import tvm
from tvm import te
from tvm import autotvm
from tvm.contrib import mkldnn
from tvm.contrib import dnnl
from .. import nn
from ..generic import schedule_extern
from ..nn.conv2d import conv2d_infer_layout, _get_workload as _get_conv2d_workload
Expand Down Expand Up @@ -269,31 +269,31 @@ def _callback(op):
return s


@autotvm.register_topi_compute("conv2d_nchw_mkldnn.x86")
def conv2d_nchw_mkldnn(cfg, data, kernel, strides, padding, dilation, out_dtype):
"""Compute conv2d in NCHW format using mkldnn."""
@autotvm.register_topi_compute("conv2d_nchw_dnnl.x86")
def conv2d_nchw_dnnl(cfg, data, kernel, strides, padding, dilation, out_dtype):
"""Compute conv2d in NCHW format using dnnl."""
groups = 1
_out = mkldnn.dnnl_conv2d(data, kernel, strides, padding, dilation, groups, False, out_dtype)
_out = dnnl.dnnl_conv2d(data, kernel, strides, padding, dilation, groups, False, out_dtype)
return _out


@autotvm.register_topi_schedule("conv2d_nchw_mkldnn.x86")
def schedule_conv2d_nchw_mkldnn(_, outs):
"""Create schedule for conv2d_nchw_mkldnn"""
@autotvm.register_topi_schedule("conv2d_nchw_dnnl.x86")
def schedule_conv2d_nchw_dnnl(_, outs):
"""Create schedule for conv2d_nchw_dnnl"""
return schedule_extern(outs)


@autotvm.register_topi_compute("conv2d_nhwc_mkldnn.x86")
def conv2d_nhwc_mkldnn(cfg, data, kernel, strides, padding, dilation, out_dtype):
"""Compute conv2d in NHWC format using mkldnn."""
@autotvm.register_topi_compute("conv2d_nhwc_dnnl.x86")
def conv2d_nhwc_dnnl(cfg, data, kernel, strides, padding, dilation, out_dtype):
"""Compute conv2d in NHWC format using dnnl."""
groups = 1
_out = mkldnn.dnnl_conv2d(data, kernel, strides, padding, dilation, groups, True, out_dtype)
_out = dnnl.dnnl_conv2d(data, kernel, strides, padding, dilation, groups, True, out_dtype)
return _out


@autotvm.register_topi_schedule("conv2d_nhwc_mkldnn.x86")
def schedule_conv2d_nhwc_mkldnn(_, outs):
"""Create schedule for conv2d_nhwc_mkldnn"""
@autotvm.register_topi_schedule("conv2d_nhwc_dnnl.x86")
def schedule_conv2d_nhwc_dnnl(_, outs):
"""Create schedule for conv2d_nhwc_dnnl"""
return schedule_extern(outs)


Expand Down
30 changes: 15 additions & 15 deletions python/tvm/topi/x86/dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from tvm.autotvm.task.space import SplitEntity
from tvm.contrib import cblas
from tvm.contrib import mkl
from tvm.contrib import mkldnn
from tvm.contrib import dnnl

from .utils import get_simd_32bit_lanes
from .. import generic, tag
Expand Down Expand Up @@ -424,15 +424,15 @@ def schedule_dense_mkl(_, outs):
return generic.schedule_extern(outs)


@autotvm.register_topi_compute("dense_mkldnn.x86")
def dense_mkldnn(cfg, data, weight, bias=None, out_dtype=None):
"""Compute dense using mkldnn. This is an alias of matmul_nt operator."""
return matmul_blas_common(cfg, data, weight, bias, out_dtype, False, True, mkldnn)
@autotvm.register_topi_compute("dense_dnnl.x86")
def dense_dnnl(cfg, data, weight, bias=None, out_dtype=None):
"""Compute dense using dnnl. This is an alias of matmul_nt operator."""
return matmul_blas_common(cfg, data, weight, bias, out_dtype, False, True, dnnl)


@autotvm.register_topi_schedule("dense_mkldnn.x86")
def schedule_dense_mkldnn(_, outs):
"""Create schedule for dense_mkldnn. This is an alias of matmul_nt operator."""
@autotvm.register_topi_schedule("dense_dnnl.x86")
def schedule_dense_dnnl(_, outs):
"""Create schedule for dense_dnnl. This is an alias of matmul_nt operator."""
return generic.schedule_extern(outs)


Expand Down Expand Up @@ -468,17 +468,17 @@ def schedule_matmul_mkl(_, outs):
return generic.schedule_extern(outs)


@autotvm.register_topi_compute("matmul_mkldnn.x86")
def matmul_mkldnn(
@autotvm.register_topi_compute("matmul_dnnl.x86")
def matmul_dnnl(
cfg, tensor_a, tensor_b, bias=None, out_dtype=None, transpose_a=False, transpose_b=False
):
"""Compute matmul using mkldnn."""
"""Compute matmul using dnnl."""
return matmul_blas_common(
cfg, tensor_a, tensor_b, bias, out_dtype, transpose_a, transpose_b, mkldnn
cfg, tensor_a, tensor_b, bias, out_dtype, transpose_a, transpose_b, dnnl
)


@autotvm.register_topi_schedule("matmul_mkldnn.x86")
def schedule_matmul_mkldnn(_, outs):
"""Create schedule for matmul_mkldnn."""
@autotvm.register_topi_schedule("matmul_dnnl.x86")
def schedule_matmul_dnnl(_, outs):
"""Create schedule for matmul_dnnl."""
return generic.schedule_extern(outs)
Loading