MKL feature enhance (#4128)

1. Support BM V2 API, mean & var 2. Pool Asymmetric padding filled feature Signed-off-by: lingyan <lingyan.guo@intel.com>
apache · Dec 7, 2016 · 092ff44 · 092ff44
1 parent fc874df
commit 092ff44
Show file tree

Hide file tree

Showing 28 changed files with 385 additions and 476 deletions.
diff --git a/MKL_README.md b/MKL_README.md
@@ -1,31 +1,36 @@
 # MKL2017 PLUGIN
 
 MKL2017 is an INTEL released library to accelerate Deep Neural Network (DNN) applications on Intel architecture.
+
+MKL2017_ML is a subset of MKL2017 and only contains DNN acceleration feature
+
 This README shows the user how to setup and install MKL2017 library with mxnet.
 
 ## Build/Install MXNet with MKL:
 
   1. Enable USE_MKL2017=1 in make/config.mk
 
-    1.1 USE_BLAS should be atlas by default
+    1.1 By default, MKL_2017_EXPRIEMENTAL=0. If setting MKL_2017_EXPRIEMENTAL=1, MKL buffer will be created and transferred between layers to achiever much higher performance.
 
-    1.2 if you need USE_BLAS to be mkl, please navigate here to do a full MKL installation: https://registrationcenter.intel.com/en/forms/?productid=2558&licensetype=2
+    1.2 By default, USE_BLAS=atlas, MKLML_ROOT=/usr/local
 
-    1.3 By default, MKL_2017_EXPRIEMENTAL=0. If setting MKL_2017_EXPRIEMENTAL=1, MKL buffer will be created and transferred between layers to achiever much higher performance.
+      1.2.1 when excute make, Makefile will execute "prepare_mkl.sh" to download the MKL2017_ML library under <MKLML_ROOT>
 
-  2. Run 'make -jX'
+      1.2.2 manually steps for download MKL2017_ML problem
 
-    2.1 Makefile will execute "prepare_mkl.sh" to download the mkl under root folder.e.g. <MXNET ROOTDIR> /mklml_lnx_<MKL VRSION>
+        1.2.2.1 wget https://github.com/dmlc/web-data/raw/master/mxnet/mklml-release/mklml_lnx_<MKL VERSION>.tgz
 
-    2.2 if the download failed because of proxy setting, please download it manually before make
+        1.2.2.2 tar zxvf mklml_lnx_<MKL VERSION>.tgz
+
+        1.2.2.3 cp -rf mklml_lnx_<MKL VERSION>/* <MKLML_ROOT>/
 
-    2.2.1 wget https://github.com/dmlc/web-data/raw/master/mxnet/mklml-release/mklml_lnx_<MKL VERSION>.tgz
-
-    2.2.2 tar zxvf mklml_lnx_<MKL VERSION>.tgz
+      1.2.2 If setting USE_BLAS=mkl, please navigate here to do a full MKL installation: https://registrationcenter.intel.com/en/forms/?productid=2558&licensetype=2     
 
+  2. Run 'make -jX'
+
   3. Navigate into the python directory
 
-  4. Set LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<MXNET ROOTDIR>/mklml_lnx_<MKL_VERSION>/lib
+  4. Set LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<MKLML_ROOT>/lib
 
   5. Run 'sudo python setup.py install'
 

diff --git a/Makefile b/Makefile
@@ -67,14 +67,15 @@ endif
 ifeq ($(USE_MKL2017), 1)
 	CFLAGS += -DMXNET_USE_MKL2017=1
 	CFLAGS += -DUSE_MKL=1
+	CFLAGS += -I$(ROOTDIR)/src/operator/mkl/
 ifeq ($(USE_MKL2017_EXPERIMENTAL), 1)
 	CFLAGS += -DMKL_EXPERIMENTAL=1
 else
 	CFLAGS += -DMKL_EXPERIMENTAL=0
 endif
 ifneq ($(USE_BLAS), mkl)
 	ICC_ON=0
-	RETURN_STRING=$(shell ./prepare_mkl.sh $(ICC_ON))
+	RETURN_STRING=$(shell ./prepare_mkl.sh $(ICC_ON) $(MKLML_ROOT))
 	MKLROOT=$(firstword $(RETURN_STRING))
 	MKL_LDFLAGS=-l$(word 2, $(RETURN_STRING))
 	MKL_EXTERNAL=$(lastword $(RETURN_STRING))

diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
@@ -19,7 +19,7 @@
 #include "./storage.h"
 #include "./engine.h"
 #if MKL_EXPERIMENTAL == 1
-#include "./mkl_memory.h"
+#include <mkl_memory.h>
 #endif
 // check c++11
 #if DMLC_USE_CXX11 == 0

diff --git a/include/mxnet/tensor_blob.h b/include/mxnet/tensor_blob.h
@@ -17,7 +17,7 @@
 #include <algorithm>
 #include "./base.h"
 #if MXNET_USE_MKL2017 == 1
-#include "./mkl_memory.h"
+#include <mkl_memory.h>
 #endif
 namespace mxnet {
 

diff --git a/make/config.mk b/make/config.mk
@@ -59,10 +59,20 @@ USE_OPENCV = 1
 # use openmp for parallelization
 USE_OPENMP = 1
 
+
+# MKL ML Library for Intel CPU/Xeon Phi
+# Please refer to MKL_README.md for details
+
+# MKL ML Library folder, need to be root for /usr/local
+# Change to User Home directory for standard user
+# For USE_BLAS!=mkl only
+MKLML_ROOT=/usr/local
+
 # whether use MKL2017 library
 USE_MKL2017 = 0
 
 # whether use MKL2017 experimental feature for high performance
+# Prerequisite USE_MKL2017=1
 USE_MKL2017_EXPERIMENTAL = 0
 
 # whether use NNPACK library

diff --git a/prepare_mkl.sh b/prepare_mkl.sh
@@ -1,21 +1,21 @@
 #!/bin/bash
 # set -ex
-#
+# 
 # All modification made by Intel Corporation: © 2016 Intel Corporation
-#
+# 
 # All contributions by the University of California:
 # Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
 # All rights reserved.
-#
+# 
 # All other contributions:
 # Copyright (c) 2014, 2015, the respective contributors
 # All rights reserved.
 # For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
-#
-#
+# 
+# 
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
-#
+# 
 #     * Redistributions of source code must retain the above copyright notice,
 #       this list of conditions and the following disclaimer.
 #     * Redistributions in binary form must reproduce the above copyright
@@ -24,7 +24,7 @@
 #     * Neither the name of Intel Corporation nor the names of its contributors
 #       may be used to endorse or promote products derived from this software
 #       without specific prior written permission.
-#
+# 
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -35,15 +35,15 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-FindLibrary()
+# 
+FindLibrary() 
 {
   case "$1" in
     intel|1)
-      LOCALMKL=`find $DST -name libmklml_intel.so`   # name of MKL SDL lib
+      LOCALMKL=`find $HOME_MKL -name libmklml_intel.so`   # name of MKL SDL lib
       ;;
     *)
-      LOCALMKL=`find $DST -name libmklml_gnu.so`   # name of MKL SDL lib
+      LOCALMKL=`find $HOME_MKL -name libmklml_gnu.so`   # name of MKL SDL lib
       ;;
   esac
 
@@ -62,26 +62,36 @@ echo $VERSION_LINE  # Return Version Line
 }
 
 # MKL
-DST=`dirname $0`
-OMP=0
-VERSION_MATCH=20120601
-ARCHIVE_BASENAME=mklml_lnx_2017.0.1.20161005.tgz
+HOME_MKL=$2
+if [ ! -d "$HOME_MKL" ]; then
+   mkdir $HOME_MKL
+fi
+MXNET_ROOT=`dirname $0`
+OMP=0 
+VERSION_MATCH=20161123
+ARCHIVE_BASENAME=mklml_lnx_2017.0.2.20161122.tgz
 MKL_CONTENT_DIR=`echo $ARCHIVE_BASENAME | rev | cut -d "." -f 2- | rev`
 MKLURL="https://github.com/dmlc/web-data/raw/master/mxnet/mklml-release/$ARCHIVE_BASENAME"
 # there are diffrent MKL lib to be used for GCC and for ICC
 reg='^[0-9]+$'
 VERSION_LINE=`GetVersionName $MKLROOT`
+#echo $VERSION_LINE
 # Check if MKLROOT is set if positive then set one will be used..
 if [ -z $MKLROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
-	# ..if MKLROOT is not set then check if we have MKL downloaded in proper version
-    VERSION_LINE=`GetVersionName $DST/$MKL_CONTENT_DIR`
+  # ..if MKLROOT is not set then check if we have MKL downloaded in proper version
+    VERSION_LINE=`GetVersionName $HOME_MKL`
+    #echo $VERSION_LINE
     if [ $VERSION_LINE -lt $VERSION_MATCH ] ; then
       #...If it is not then downloaded and unpacked
-      wget --no-check-certificate -P $DST $MKLURL -O $DST/$ARCHIVE_BASENAME
-      tar -xzf $DST/$ARCHIVE_BASENAME -C $DST
+      wget --no-check-certificate -P $MXNET_ROOT $MKLURL -O $MXNET_ROOT/$ARCHIVE_BASENAME
+      tar -xzf $MXNET_ROOT/$ARCHIVE_BASENAME -C $MXNET_ROOT
+      #echo $HOME_MKL
+      yes | cp -rf $MXNET_ROOT/$MKL_CONTENT_DIR/* $HOME_MKL
+      rm -rf $MXNET_ROOT/$MKL_CONTENT_DIR
     fi
   FindLibrary $1
-  MKLROOT=$PWD/`echo $LOCALMKL | sed -e 's/lib.*$//'`
+  #echo $LOCALMKL
+  MKLROOT=`echo $LOCALMKL | sed -e 's/lib.*$//'`
 fi
 
 # Check what MKL lib we have in MKLROOT
@@ -90,7 +100,7 @@ if [ -z `find $MKLROOT -name libmkl_rt.so -print -quit` ]; then
   OMP=1
 else
   LIBRARIES="mkl_rt"
-fi
+fi 
 
 
 # return value to calling script (Makefile,cmake)

diff --git a/src/operator/activation.cc b/src/operator/activation.cc
@@ -7,7 +7,7 @@
 #include "./activation-inl.h"
 #include "./mshadow_op.h"
 #if MXNET_USE_MKL2017 == 1
-#include <mxnet/mkl_memory.h>
+#include <mkl_memory.h>
 #include "./mkl/mkl_memory-inl.h"
 #include "./mkl/mkl_relu-inl.h"
 #endif  // MXNET_USE_MKL2017
@@ -28,7 +28,7 @@ Operator *CreateOp<cpu>(ActivationParam param, int dtype) {
           break;
       }
   }
-
+  LOG(INFO) << MKLReluOp<cpu, float>::getName() << " Skip MKL optimization";
 #endif
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     switch (param.act_type) {

diff --git a/src/operator/batch_norm.cc b/src/operator/batch_norm.cc
@@ -7,7 +7,7 @@
 
 #include "./batch_norm-inl.h"
 #if MXNET_USE_MKL2017 == 1
-#include <mxnet/mkl_memory.h>
+#include <mkl_memory.h>
 #include "./mkl/mkl_memory-inl.h"
 #include "./mkl/mkl_batch_norm-inl.h"
 #endif  // MXNET_USE_MKL2017
@@ -17,10 +17,12 @@ namespace op {
 template<>
 Operator *CreateOp<cpu>(BatchNormParam param, int dtype) {
 #if MXNET_USE_MKL2017 == 1
-  return new MKLBatchNormOp<cpu, float>(param);
-#else
-  return new BatchNormOp<cpu>(param);
+  if (!param.use_global_stats)
+    return new MKLBatchNormOp<cpu, float>(param);
+  else
+    LOG(INFO) << MKLBatchNormOp<cpu, float>::getName() << " Skip MKL optimization";
 #endif
+  return new BatchNormOp<cpu>(param);
 }
 
 // DO_BIND_DISPATCH comes from operator_common.h

diff --git a/src/operator/concat.cc b/src/operator/concat.cc
@@ -7,7 +7,7 @@
 
 #include "./concat-inl.h"
 #if MXNET_USE_MKL2017 == 1
-#include <mxnet/mkl_memory.h>
+#include <mkl_memory.h>
 #include "./mkl/mkl_memory-inl.h"
 #include "./mkl/mkl_concat-inl.h"
 #endif  // MXNET_USE_MKL2017
@@ -28,6 +28,7 @@ Operator* CreateOp<cpu>(ConcatParam param, int dtype) {
       break;
     }
   }
+  LOG(INFO) << MKLConcatOp<cpu, float>::getName() << " Skip MKL optimization";
 #endif
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     op = new ConcatOp<cpu, DType>(param);

diff --git a/src/operator/convolution.cc b/src/operator/convolution.cc
@@ -7,7 +7,7 @@
 
 #include "./convolution-inl.h"
 #if MXNET_USE_MKL2017 == 1
-#include <mxnet/mkl_memory.h>
+#include <mkl_memory.h>
 #include "./mkl/mkl_memory-inl.h"
 #include "./mkl/mkl_convolution-inl.h"
 #endif  // MXNET_USE_MKL2017
@@ -35,6 +35,7 @@ Operator* CreateOp<cpu>(ConvolutionParam param, int dtype,
       break;
     }
   }
+  LOG(INFO) << MKLConvolutionOp<cpu, float>::getName() << " Skip MKL optimization";
 #endif
 #if MXNET_USE_NNPACK == 1
   if ((param.dilate[0] == 1 && param.dilate[1] == 1)

diff --git a/src/operator/elementwise_sum.cc b/src/operator/elementwise_sum.cc
@@ -5,7 +5,7 @@
 */
 #include "./elementwise_sum-inl.h"
 #if MXNET_USE_MKL2017 == 1
-#include <mxnet/mkl_memory.h>
+#include <mkl_memory.h>
 #include "./mkl/mkl_memory-inl.h"
 #include "./mkl/mkl_elementwise-inl.h"
 #endif  // MXNET_USE_MKL2017
@@ -18,19 +18,18 @@ Operator* CreateOp<cpu>(ElementWiseSumParam param, int dtype) {
 #if MXNET_USE_MKL2017 == 1
   switch (dtype) {
   case mshadow::kFloat32:
-    op = new MKLElementWiseOp<cpu, float>(param, EltwiseParameter_EltwiseOp_SUM);
-    break;
+    return new MKLElementWiseOp<cpu, float>(param, EltwiseParameter_EltwiseOp_SUM);
   case mshadow::kFloat64:
-    op = new MKLElementWiseOp<cpu, double>(param, EltwiseParameter_EltwiseOp_SUM);
-    break;
+    return new MKLElementWiseOp<cpu, double>(param, EltwiseParameter_EltwiseOp_SUM);
   default:
+      LOG(INFO) << MKLElementWiseOp<cpu, float>::getName() << " Skip MKL optimization";
       break;
   }
-#else
+#endif
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     op = new ElementWiseSumOp<cpu, DType>(param);
   });
-#endif
+
   return op;
 }
 

diff --git a/src/operator/fully_connected.cc b/src/operator/fully_connected.cc
@@ -5,7 +5,7 @@
 */
 #include "./fully_connected-inl.h"
 #if MXNET_USE_MKL2017 == 1
-#include <mxnet/mkl_memory.h>
+#include <mkl_memory.h>
 #include "./mkl/mkl_memory-inl.h"
 #include "./mkl/mkl_fully_connected-inl.h"
 #endif  // MXNET_USE_MKL2017
@@ -22,9 +22,10 @@ Operator* CreateOp<cpu>(FullyConnectedParam param, int dtype) {
   case mshadow::kFloat64:
     return new MKLFullyConnectedOp<cpu, double>(param);
   default:
+    LOG(INFO) << MKLFullyConnectedOp<cpu, float>::getName() << " Skip MKL optimization";
     break;
   }
-#else
+#endif
   switch (dtype) {
   case mshadow::kFloat32:
     op = new FullyConnectedOp<cpu, float>(param);
@@ -39,7 +40,7 @@ Operator* CreateOp<cpu>(FullyConnectedParam param, int dtype) {
   default:
     LOG(FATAL) << "Unsupported type " << dtype;
   }
-#endif
+
   return op;
 }
 

diff --git a/src/operator/lrn.cc b/src/operator/lrn.cc
@@ -10,7 +10,7 @@
 #include "./cudnn_lrn-inl.h"
 #endif
 #if MXNET_USE_MKL2017 == 1
-#include <mxnet/mkl_memory.h>
+#include <mkl_memory.h>
 #include "./mkl/mkl_memory-inl.h"
 #include "./mkl/mkl_lrn-inl.h"
 #endif
@@ -21,9 +21,8 @@ template<>
 Operator* CreateOp<cpu>(LRNParam param, int dtype) {
 #if MXNET_USE_MKL2017 == 1
   return new MKLLRNOp<cpu, float>(param);
-#else
-  return new LocalResponseNormOp<cpu>(param);
 #endif
+  return new LocalResponseNormOp<cpu>(param);
 }
 
 // DO_BIND_DISPATCH comes from operator_common.h