Skip to content

Commit

Permalink
Update MKL-DNN dependency (apache#12953)
Browse files Browse the repository at this point in the history
* update mkldnn and fix conv/deconv

* fix

* fix indent

* fix cmake

* fix cmake

* fix cpp test for mkldnn

* fix typo

* fix conficts after merge

* debug: remove 5d test

* debug: remove 4d test

* add comments

* debug: remove 2d test

* update mklml in ci

* fix mklml

* Revert "fix mklml"

This reverts commit 328a22a.

* Revert "update mklml in ci"

This reverts commit 9ff3687.

* Revert "debug: remove 2d test"

This reverts commit 32551b3.

* Revert "debug: remove 4d test"

This reverts commit 5412d64.

* Revert "debug: remove 5d test"

This reverts commit 1fe9f88.

* debug illegal core dump

* debug illegal core dump

* Revert "debug illegal core dump"

This reverts commit 39321d5.

* Revert "debug illegal core dump"

This reverts commit 153b068.

* change cmake

* pin mkldnn version to 0.17rc

* change format number

* remove include directories in cmake

* fix cpp test

* address cpplint complaint

* remove comment code

* update mkldnn head
  • Loading branch information
TaoLv authored and Jose Luis Contreras committed Nov 13, 2018
1 parent ea8983b commit 17c2590
Show file tree
Hide file tree
Showing 9 changed files with 181 additions and 28 deletions.
2 changes: 1 addition & 1 deletion 3rdparty/mkldnn
6 changes: 4 additions & 2 deletions CMakeLists.txt
Expand Up @@ -229,8 +229,10 @@ if(USE_MKLDNN)
if(NOT MSVC)
set(ARCH_OPT_FLAGS "-mtune=generic")
endif()
set(WITH_TEST OFF)
set(WITH_EXAMPLE OFF)
set(WITH_TEST OFF CACHE INTERNAL "" FORCE)
set(WITH_EXAMPLE OFF CACHE INTERNAL "" FORCE)
set(ARCH_OPT_FLAGS "" CACHE INTERNAL "" FORCE)

add_subdirectory(3rdparty/mkldnn)

include_directories(3rdparty/mkldnn/include)
Expand Down
12 changes: 12 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_base-inl.h
Expand Up @@ -208,6 +208,18 @@ static inline int get_mxnet_type(mkldnn_data_type_t dtype) {
}
}

static inline size_t GetMemDescSize(const mkldnn::memory::desc &md) {
if (md.data.ndims == 0) return 0;

size_t ret = 1;
for (int i = 0; i < md.data.ndims; i++) {
ret *= md.data.dims[i];
}

ret *= mshadow::mshadow_sizeof(get_mxnet_type(md.data.data_type));
return ret;
}

inline static mkldnn::memory::desc GetMemDesc(const NDArray &arr, int ndim) {
mkldnn::memory::dims dims(ndim);
for (size_t i = 0; i < dims.size(); i++) dims[i] = arr.shape()[i];
Expand Down
3 changes: 3 additions & 0 deletions src/operator/nn/mkldnn/mkldnn_base.cc
Expand Up @@ -311,9 +311,12 @@ mkldnn_memory_format_t GetDefaultFormat(const mkldnn::memory::desc &desc) {
case mkldnn_oihw:
case mkldnn_ihwo:
case mkldnn_hwio:
case mkldnn_oIhw8i:
case mkldnn_oIhw16i:
case mkldnn_OIhw8i8o:
case mkldnn_OIhw16i16o:
case mkldnn_OIhw4i16o4i:
case mkldnn_OIhw4i16o4i_s8s8:
case mkldnn_OIhw8i16o2i:
case mkldnn_OIhw8o16i2o:
case mkldnn_OIhw8o8i:
Expand Down
97 changes: 87 additions & 10 deletions src/operator/nn/mkldnn/mkldnn_convolution.cc
Expand Up @@ -85,16 +85,33 @@ mkldnn::convolution_forward::primitive_desc GetConvFwdImpl(
attr.set_int_output_round_mode(round_nearest);
}

// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
if (param.conv_param.dilate.ndim() == 0 && bias == nullptr) {
mkldnn::convolution_forward::desc desc(prop, mkldnn::algorithm::convolution_direct,
data_md, weight_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero);
return mkldnn::convolution_forward::primitive_desc(desc, attr, engine);
auto conv_pd = mkldnn::convolution_forward::primitive_desc(desc, attr, engine);
while (conv_pd.dst_primitive_desc().get_size() != GetArraySize(output) ||
conv_pd.src_primitive_desc().get_size() != GetArraySize(data) ||
conv_pd.weights_primitive_desc().get_size() != GetArraySize(weights)) {
CHECK(conv_pd.next_impl()) << "No implementation";
}
return conv_pd;
} else if (param.conv_param.dilate.ndim() == 0) {
auto bias_md = GetMemDesc(*bias);
mkldnn::convolution_forward::desc desc(prop, mkldnn::algorithm::convolution_direct,
data_md, weight_md, bias_md, out_md, strides, padding, padding,
mkldnn::padding_kind::zero);
return mkldnn::convolution_forward::primitive_desc(desc, attr, engine);
auto conv_pd = mkldnn::convolution_forward::primitive_desc(desc, attr, engine);
while (conv_pd.dst_primitive_desc().get_size() != GetArraySize(output) ||
conv_pd.src_primitive_desc().get_size() != GetArraySize(data) ||
conv_pd.weights_primitive_desc().get_size() != GetArraySize(weights)) {
CHECK(conv_pd.next_impl()) << "No implementation";
}
return conv_pd;
} else {
mkldnn::memory::dims dilates{0, 0};
dilates[0] = param.conv_param.dilate[0] - 1;
Expand All @@ -103,14 +120,26 @@ mkldnn::convolution_forward::primitive_desc GetConvFwdImpl(
mkldnn::convolution_forward::desc desc(prop, mkldnn::algorithm::convolution_direct,
data_md, weight_md, out_md, strides, dilates, padding, padding,
mkldnn::padding_kind::zero);
return mkldnn::convolution_forward::primitive_desc(desc, attr, engine);
auto conv_pd = mkldnn::convolution_forward::primitive_desc(desc, attr, engine);
while (conv_pd.dst_primitive_desc().get_size() != GetArraySize(output) ||
conv_pd.src_primitive_desc().get_size() != GetArraySize(data) ||
conv_pd.weights_primitive_desc().get_size() != GetArraySize(weights)) {
CHECK(conv_pd.next_impl()) << "No implementation";
}
return conv_pd;
} else {
auto bias_md = GetMemDesc(*bias);
mkldnn::convolution_forward::desc desc(prop, mkldnn::algorithm::convolution_direct,
data_md, weight_md, bias_md, out_md, strides,
dilates, padding, padding,
mkldnn::padding_kind::zero);
return mkldnn::convolution_forward::primitive_desc(desc, attr, engine);
auto conv_pd = mkldnn::convolution_forward::primitive_desc(desc, attr, engine);
while (conv_pd.dst_primitive_desc().get_size() != GetArraySize(output) ||
conv_pd.src_primitive_desc().get_size() != GetArraySize(data) ||
conv_pd.weights_primitive_desc().get_size() != GetArraySize(weights)) {
CHECK(conv_pd.next_impl()) << "No implementation";
}
return conv_pd;
}
}
}
Expand All @@ -131,18 +160,36 @@ static mkldnn::convolution_backward_data::primitive_desc GetConvBwdData(
mkldnn::memory::dims padding{0, 0};
padding[0] = param.pad[0];
padding[1] = param.pad[1];

// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
if (param.dilate.ndim() == 0) {
mkldnn::convolution_backward_data::desc desc(mkldnn::algorithm::convolution_direct,
data_md, weight_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_data::primitive_desc(desc, engine, fwd_pd);
auto conv_pd = mkldnn::convolution_backward_data::primitive_desc(desc, engine, fwd_pd);
while (conv_pd.diff_dst_primitive_desc().get_size() != GetArraySize(output) ||
conv_pd.diff_src_primitive_desc().get_size() != GetArraySize(data) ||
conv_pd.weights_primitive_desc().get_size() != GetArraySize(weights)) {
CHECK(conv_pd.next_impl()) << "No implementation";
}
return conv_pd;
} else {
mkldnn::memory::dims dilates{0, 0};
dilates[0] = param.dilate[0] - 1;
dilates[1] = param.dilate[1] - 1;
mkldnn::convolution_backward_data::desc desc(mkldnn::algorithm::convolution_direct,
data_md, weight_md, out_md, strides, dilates, padding, padding,
mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_data::primitive_desc(desc, engine, fwd_pd);
auto conv_pd = mkldnn::convolution_backward_data::primitive_desc(desc, engine, fwd_pd);
while (conv_pd.diff_dst_primitive_desc().get_size() != GetArraySize(output) ||
conv_pd.diff_src_primitive_desc().get_size() != GetArraySize(data) ||
conv_pd.weights_primitive_desc().get_size() != GetArraySize(weights)) {
CHECK(conv_pd.next_impl()) << "No implementation";
}
return conv_pd;
}
}

Expand All @@ -163,16 +210,34 @@ static mkldnn::convolution_backward_weights::primitive_desc GetConvBwdWeights(
mkldnn::memory::dims padding{0, 0};
padding[0] = param.pad[0];
padding[1] = param.pad[1];

// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
if (param.dilate.ndim() == 0 && bias == nullptr) {
mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct,
data_md, weight_md, out_md, strides, padding, padding, mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
auto conv_pd = mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
while (conv_pd.diff_dst_primitive_desc().get_size() != GetArraySize(output) ||
conv_pd.src_primitive_desc().get_size() != GetArraySize(data) ||
conv_pd.diff_weights_primitive_desc().get_size() != GetArraySize(weights)) {
CHECK(conv_pd.next_impl()) << "No implementation";
}
return conv_pd;
} else if (param.dilate.ndim() == 0) {
auto bias_md = GetMemDesc(*bias);
mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct,
data_md, weight_md, bias_md, out_md, strides, padding, padding,
mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
auto conv_pd = mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
while (conv_pd.diff_dst_primitive_desc().get_size() != GetArraySize(output) ||
conv_pd.src_primitive_desc().get_size() != GetArraySize(data) ||
conv_pd.diff_weights_primitive_desc().get_size() != GetArraySize(weights)) {
CHECK(conv_pd.next_impl()) << "No implementation";
}
return conv_pd;
} else {
mkldnn::memory::dims dilates{0, 0};
dilates[0] = param.dilate[0] - 1;
Expand All @@ -181,14 +246,26 @@ static mkldnn::convolution_backward_weights::primitive_desc GetConvBwdWeights(
mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct,
data_md, weight_md, out_md, strides, dilates, padding, padding,
mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
auto conv_pd = mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
while (conv_pd.diff_dst_primitive_desc().get_size() != GetArraySize(output) ||
conv_pd.src_primitive_desc().get_size() != GetArraySize(data) ||
conv_pd.diff_weights_primitive_desc().get_size() != GetArraySize(weights)) {
CHECK(conv_pd.next_impl()) << "No implementation";
}
return conv_pd;
} else {
auto bias_md = GetMemDesc(*bias);
mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct,
data_md, weight_md, bias_md, out_md,
strides, dilates, padding, padding,
mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
auto conv_pd = mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
while (conv_pd.diff_dst_primitive_desc().get_size() != GetArraySize(output) ||
conv_pd.src_primitive_desc().get_size() != GetArraySize(data) ||
conv_pd.diff_weights_primitive_desc().get_size() != GetArraySize(weights)) {
CHECK(conv_pd.next_impl()) << "No implementation";
}
return conv_pd;
}
}
}
Expand Down
56 changes: 51 additions & 5 deletions src/operator/nn/mkldnn/mkldnn_deconvolution.cc
Expand Up @@ -52,17 +52,34 @@ static mkldnn::convolution_forward::primitive_desc GetDeconvBwd_(
bool has_bias, const mkldnn::memory::desc &out_md,
const mkldnn::engine &engine, const mkldnn::memory::dims &strides,
const mkldnn::memory::dims &padding, const mkldnn::memory::dims &dilates) {
// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
if (!has_bias) {
mkldnn::convolution_forward::desc desc(mkldnn::prop_kind::forward_training,
mkldnn::algorithm::convolution_direct, out_md, weights_md, data_md, strides,
dilates, padding, padding, mkldnn::padding_kind::zero);
return mkldnn::convolution_forward::primitive_desc(desc, engine);
auto deconv_pd = mkldnn::convolution_forward::primitive_desc(desc, engine);
while (deconv_pd.dst_primitive_desc().get_size() != GetMemDescSize(data_md) ||
deconv_pd.src_primitive_desc().get_size() != GetMemDescSize(out_md) ||
deconv_pd.weights_primitive_desc().get_size() != GetMemDescSize(weights_md)) {
CHECK(deconv_pd.next_impl()) << "No implementation";
}
return deconv_pd;
} else {
auto bias_md = GetBiasDesc(data_md);
mkldnn::convolution_forward::desc desc(mkldnn::prop_kind::forward_training,
mkldnn::algorithm::convolution_direct, out_md, weights_md, bias_md,
data_md, strides, dilates, padding, padding, mkldnn::padding_kind::zero);
return mkldnn::convolution_forward::primitive_desc(desc, engine);
auto deconv_pd = mkldnn::convolution_forward::primitive_desc(desc, engine);
while (deconv_pd.dst_primitive_desc().get_size() != GetMemDescSize(data_md) ||
deconv_pd.src_primitive_desc().get_size() != GetMemDescSize(out_md) ||
deconv_pd.weights_primitive_desc().get_size() != GetMemDescSize(weights_md)) {
CHECK(deconv_pd.next_impl()) << "No implementation";
}
return deconv_pd;
}
}

Expand Down Expand Up @@ -90,7 +107,18 @@ static mkldnn::convolution_backward_data::primitive_desc GetDeconvFwdImpl(
mkldnn::convolution_backward_data::desc desc(mkldnn::algorithm::convolution_direct,
out_md, weight_md, data_md, strides, dilate, padding, padding,
mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_data::primitive_desc(desc, engine, bwd_pd);
auto deconv_pd = mkldnn::convolution_backward_data::primitive_desc(desc, engine, bwd_pd);
// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
while (deconv_pd.diff_dst_primitive_desc().get_size() != GetMemDescSize(data_md) ||
deconv_pd.diff_src_primitive_desc().get_size() != GetMemDescSize(out_md) ||
deconv_pd.weights_primitive_desc().get_size() != GetMemDescSize(weight_md)) {
CHECK(deconv_pd.next_impl()) << "No implementation";
}
return deconv_pd;
}

static mkldnn::convolution_forward::primitive_desc GetDeconvBwdDataImpl(
Expand Down Expand Up @@ -137,16 +165,34 @@ GetDeconvBwdWeightsImpl(
mkldnn::memory::dims dilate{0, 0};
dilate[0] = param.dilate[0] - 1;
dilate[1] = param.dilate[1] - 1;

// MKL-DNN introduced padded formats since 0.15 which require more memory
// for computation compared with the actual tensor size. Currently, MKL-DNN
// operators are still reusing those memory from memory planning and the
// memory size may smaller than what MKL-DNN kernels require. So here we need
// select suboptimal kernel for computation according to tensor sizes.
if (!has_bias) {
mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct,
out_md, weight_md, data_md, strides, dilate, padding, padding, mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
auto deconv_pd = mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
while (deconv_pd.diff_dst_primitive_desc().get_size() != GetMemDescSize(data_md) ||
deconv_pd.src_primitive_desc().get_size() != GetMemDescSize(out_md) ||
deconv_pd.diff_weights_primitive_desc().get_size() != GetMemDescSize(weight_md)) {
CHECK(deconv_pd.next_impl()) << "No implementation";
}
return deconv_pd;
} else {
auto bias_md = GetBiasDesc(data_md);
mkldnn::convolution_backward_weights::desc desc(mkldnn::algorithm::convolution_direct,
out_md, weight_md, bias_md, data_md, strides, dilate, padding, padding,
mkldnn::padding_kind::zero);
return mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
auto deconv_pd = mkldnn::convolution_backward_weights::primitive_desc(desc, engine, fwd_pd);
while (deconv_pd.diff_dst_primitive_desc().get_size() != GetMemDescSize(data_md) ||
deconv_pd.src_primitive_desc().get_size() != GetMemDescSize(out_md) ||
deconv_pd.diff_weights_primitive_desc().get_size() != GetMemDescSize(weight_md)) {
CHECK(deconv_pd.next_impl()) << "No implementation";
}
return deconv_pd;
}
}

Expand Down
18 changes: 14 additions & 4 deletions tests/cpp/include/test_mkldnn.h
Expand Up @@ -116,10 +116,15 @@ inline static std::vector<mkldnn::memory::format> GetMKLDNNFormat(size_t num_dim
data_md, weight_md, out_md, strides,
padding, padding, mkldnn::padding_kind::zero);
mkldnn::convolution_forward::primitive_desc pd(desc, CpuEngine::Get()->get_engine());
std::vector<mkldnn::memory::format> ret(2);
while (pd.dst_primitive_desc().get_size() != GetMemDescSize(out_md) ||
pd.src_primitive_desc().get_size() != GetMemDescSize(data_md) ||
pd.weights_primitive_desc().get_size() != GetMemDescSize(weight_md)) {
CHECK(pd.next_impl()) << "No implementation";
}

std::vector<mkldnn::memory::format> ret(1);
ret[0] = static_cast<mkldnn::memory::format>(pd.dst_primitive_desc().desc().data.format);
ret[1] = static_cast<mkldnn::memory::format>(pd.weights_primitive_desc().desc().data.format);
printf("format: %d, %d\n", ret[0], ret[1]);
printf("format: %d \n", ret[0]);
return ret;
} else if (num_dims == 5) {
mkldnn::memory::dims data_dims{1, 32, 112, 112};
Expand All @@ -139,6 +144,12 @@ inline static std::vector<mkldnn::memory::format> GetMKLDNNFormat(size_t num_dim
data_md, weight_md, out_md, strides,
padding, padding, mkldnn::padding_kind::zero);
mkldnn::convolution_forward::primitive_desc pd(desc, CpuEngine::Get()->get_engine());
while (pd.dst_primitive_desc().get_size() != GetMemDescSize(out_md) ||
pd.src_primitive_desc().get_size() != GetMemDescSize(data_md) ||
pd.weights_primitive_desc().get_size() != GetMemDescSize(weight_md)) {
CHECK(pd.next_impl()) << "No implementation";
}

std::vector<mkldnn::memory::format> ret(1);
ret[0] = static_cast<mkldnn::memory::format>(pd.weights_primitive_desc().desc().data.format);
printf("format: %d\n", ret[0]);
Expand Down Expand Up @@ -188,7 +199,6 @@ inline static TestArrayShapes GetTestArrayShapes() {

std::vector<mkldnn::memory::format> formats = GetMKLDNNFormat(4, dtype);
pds.push_back(GetMemPD(s1, dtype, formats[0]));
pds.push_back(GetMemPD(s2, dtype, formats[1]));
}
{
// 5D
Expand Down

0 comments on commit 17c2590

Please sign in to comment.