Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

libnd4j: Link with MKL-DNN and OpenBLAS when available from Maven #6204

Merged
merged 7 commits into from
Sep 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions libnd4j/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@ set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS OFF)

option(BUILD_TESTS "Build tests" OFF)
if(BUILD_TESTS)
# tests are always compiled with all ops included
set(LIBND4J_ALL_OPS true)
set(LIBND4J_BUILD_MINIFIER true)
add_subdirectory(tests_cpu)
endif()

# -fsanitize=address
# -fsanitize=leak
Expand All @@ -35,11 +29,34 @@ if(NATIVE)
ENDIF()
endif()

if(NOT CUDA_BLAS)
if (NOT "${MKLDNN_PATH}" STREQUAL "")
add_definitions(-DHAVE_MKLDNN)
include_directories(${MKLDNN_PATH}/include/)
link_directories(${MKLDNN_PATH} ${MKLDNN_PATH}/lib/)
IF(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
set(MKLDNN_LIBRARIES mkldnn mklml_intel)
else()
set(MKLDNN_LIBRARIES mkldnn mklml)
endif()
elseif (NOT "${OPENBLAS_PATH}" STREQUAL "")
add_definitions(-DHAVE_OPENBLAS)
include_directories(${OPENBLAS_PATH}/include/)
link_directories(${OPENBLAS_PATH} ${OPENBLAS_PATH}/lib/)
set(OPENBLAS_LIBRARIES openblas)
endif()
endif()

if (NOT DEFINED ENV{CLION_IDE})
message("NOT CLION")
include_directories(blas/ include/ include/helpers include/loops include/graph include/ops include/types include/array include/cnpy)
add_subdirectory(blas)
if(BUILD_TESTS)
# tests are always compiled with all ops included
set(LIBND4J_ALL_OPS true)
set(LIBND4J_BUILD_MINIFIER true)
add_subdirectory(tests_cpu)
endif()
endif ()

if ($ENV{CLION_IDE})
Expand Down
12 changes: 4 additions & 8 deletions libnd4j/blas/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,13 @@ elseif(CPU_BLAS)
add_library(${LIBND4J_NAME}static STATIC $<TARGET_OBJECTS:nd4jobj>)
add_library(${LIBND4J_NAME} SHARED $<TARGET_OBJECTS:nd4jobj>)
endif()

target_link_libraries(${LIBND4J_NAME} ${MKLDNN_LIBRARIES} ${OPENBLAS_LIBRARIES})

if ("${LIBND4J_ALL_OPS}" AND "${LIBND4J_BUILD_MINIFIER}")
message(STATUS "Building minifier...")
add_executable(minifier ../minifier/minifier.cpp ../minifier/graphopt.cpp)
target_link_libraries(minifier ${LIBND4J_NAME}static)
target_link_libraries(minifier ${LIBND4J_NAME}static ${MKLDNN_LIBRARIES} ${OPENBLAS_LIBRARIES})
endif()

if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 4.9)
Expand All @@ -250,13 +253,6 @@ elseif(CPU_BLAS)
endif()
endif()

if(APPLE)
# We cannot use those to propagate C++ exceptions across shared libraries.
# SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc -static-libstdc++")
elseif(MSYS)
SET( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static")
endif()

#install(TARGETS mySharedLib DESTINATION /some/full/path)
install(TARGETS ${LIBND4J_NAME} DESTINATION .)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/cpu)
Expand Down
39 changes: 38 additions & 1 deletion libnd4j/buildnativeoperations.sh
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,41 @@ if [ "$CHIP" == "cuda" ] && [ -n "$CHIP_VERSION" ]; then
esac
fi

if [ "$OS" == "$HOST" ]; then
MKLDNN_PATH="$HOME/.javacpp/cache/mkl-dnn-0.16-1.4.3-SNAPSHOT-$HOST-x86_64.jar/org/bytedeco/javacpp/$HOST-x86_64/"
OPENBLAS_PATH="$HOME/.javacpp/cache/openblas-0.3.0-1.4.2-$HOST-x86_64.jar/org/bytedeco/javacpp/$HOST-x86_64/"
else
MKLDNN_PATH=""
OPENBLAS_PATH=""
fi

if [[ -n "${BUILD_PATH:-}" ]]; then
PREVIFS="$IFS"
IFS="$BUILD_PATH_SEPARATOR"
for P in $BUILD_PATH; do
if [[ -f "$P/include/mkldnn.h" ]]; then
MKLDNN_PATH="$P"
fi
if [[ -f "$P/include/openblas_config.h" ]]; then
OPENBLAS_PATH="$P"
fi
done
IFS="$PREVIFS"
fi

if [ ! -d "$MKLDNN_PATH" ]; then
echo "Could not find MKL-DNN, please make sure to run the build with Maven"
MKLDNN_PATH=""
fi

if [ ! -d "$OPENBLAS_PATH" ]; then
echo "Could not find OpenBLAS, please make sure to run the build with Maven"
OPENBLAS_PATH=""
fi

MKLDNN_PATH="${MKLDNN_PATH//\\//}"
OPENBLAS_PATH="${OPENBLAS_PATH//\\//}"

mkbuilddir() {
if [ "$CLEAN" == "true" ]; then
echo "Removing blasbuild"
Expand All @@ -473,9 +508,11 @@ echo OPERATIONS = "${OPERATIONS_ARG}"
echo MINIFIER = "${MINIFIER_ARG}"
echo TESTS = "${TESTS_ARG}"
echo NAME = "${NAME_ARG}"
echo MKLDNN_PATH = "$MKLDNN_PATH"
echo OPENBLAS_PATH = "$OPENBLAS_PATH"
mkbuilddir
pwd
eval $CMAKE_COMMAND "$BLAS_ARG" "$ARCH_ARG" "$NAME_ARG" "$SHARED_LIBS_ARG" "$MINIFIER_ARG" "$OPERATIONS_ARG" "$BUILD_TYPE" "$PACKAGING_ARG" "$EXPERIMENTAL_ARG" "$TESTS_ARG" "$CUDA_COMPUTE" -DDEV=FALSE -DCMAKE_NEED_RESPONSE=YES -DMKL_MULTI_THREADED=TRUE ../..
eval $CMAKE_COMMAND "$BLAS_ARG" "$ARCH_ARG" "$NAME_ARG" "$SHARED_LIBS_ARG" "$MINIFIER_ARG" "$OPERATIONS_ARG" "$BUILD_TYPE" "$PACKAGING_ARG" "$EXPERIMENTAL_ARG" "$TESTS_ARG" "$CUDA_COMPUTE" -DMKLDNN_PATH="$MKLDNN_PATH" -DOPENBLAS_PATH="$OPENBLAS_PATH" -DDEV=FALSE -DCMAKE_NEED_RESPONSE=YES -DMKL_MULTI_THREADED=TRUE ../..
if [ "$PARALLEL" == "true" ]; then
eval $MAKE_COMMAND -j $MAKEJ && cd ../../..
else
Expand Down
18 changes: 18 additions & 0 deletions libnd4j/include/cblas.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,24 @@

#ifndef NATIVEOPERATIONS_CBLAS_H
#define NATIVEOPERATIONS_CBLAS_H

#ifdef __MKL_CBLAS_H__
// CBLAS from MKL is already included
#define CBLAS_H
#endif

#ifdef HAVE_MKLDNN
// include CBLAS from MKL-DNN
#include <mkl_cblas.h>
#define CBLAS_H
#endif

#ifdef HAVE_OPENBLAS
// include CBLAS from OpenBLAS
#include <cblas.h>
#define CBLAS_H
#endif

#ifndef CBLAS_H
#include <dll.h>

Expand Down
11 changes: 11 additions & 0 deletions libnd4j/include/graph/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
#include <graph/ContextPrototype.h>
#include <memory/Workspace.h>

#ifdef HAVE_MKLDNN
#include <MKLDNNStream.h>
#endif

// CUDA-specific includes
#ifdef __CUDACC__
Expand All @@ -53,6 +56,10 @@ namespace nd4j {

// branch for divergent_op
int _branch = 0;

#ifdef HAVE_MKLDNN
MKLDNNStream<T>* _mkldnnStream = nullptr;
#endif
public:
// TODO: maybe override new here as well?

Expand Down Expand Up @@ -106,6 +113,10 @@ namespace nd4j {
int getBranch();
void setBranch(int branch);

#ifdef HAVE_MKLDNN
MKLDNNStream<T> *getMKLDNNStream() { return _mkldnnStream; }
void setMKLDNNStream(MKLDNNStream<T> *mkldnnStream) { _mkldnnStream = mkldnnStream; }
#endif
/**
*
* @return
Expand Down
5 changes: 5 additions & 0 deletions libnd4j/include/graph/ContextPrototype.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ namespace nd4j {
// opNum for legacy XYZ ops
int _opNum = -1;

bool _useMKLDNN = true;

public:
explicit ContextPrototype(int nodeId = 1, bool inPlace = false);
~ContextPrototype() = default;
Expand Down Expand Up @@ -71,6 +73,9 @@ namespace nd4j {
int opNum();
void setOpNum(int opNum);

bool isUseMKLDNN() { return _useMKLDNN; }
void setUseMKLDNN(bool useMKLDNN) { _useMKLDNN = useMKLDNN; }

/**
* This method returns number of inputs available in this block
* @return
Expand Down
6 changes: 6 additions & 0 deletions libnd4j/include/graph/impl/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ namespace nd4j {
this->_opNum = prototype->opNum();
this->_isInplace = prototype->isInplace();
this->_nodeId = prototype->nodeId();
this->_useMKLDNN = prototype->isUseMKLDNN();
}


Expand Down Expand Up @@ -79,6 +80,11 @@ namespace nd4j {
this->_iArgs.clear();
this->_tArgs.clear();
this->_inputs.clear();
#ifdef HAVE_MKLDNN
if (_mkldnnStream != nullptr) {
delete _mkldnnStream;
}
#endif
}

template <typename T>
Expand Down
7 changes: 7 additions & 0 deletions libnd4j/include/helpers/BlasHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ namespace nd4j {
double *beta_Array, double **C_Array, int *ldc_Array,
int group_count, int *group_size);

#ifdef LAPACK_ROW_MAJOR
#undef LAPACK_ROW_MAJOR
#endif

#ifdef LAPACK_COL_MAJOR
#undef LAPACK_COL_MAJOR
#endif
enum LAPACK_LAYOUT { LAPACK_ROW_MAJOR=101, LAPACK_COL_MAJOR=102 };

typedef int (*LapackeSgesvd)(LAPACK_LAYOUT matrix_layout, char jobu, char jobvt,
Expand Down
79 changes: 79 additions & 0 deletions libnd4j/include/helpers/MKLDNNStream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*******************************************************************************
* Copyright (c) 2018 Skymind, Inc.
*
* This program and the accompanying materials are made available under the
* terms of the Apache License, Version 2.0 which is available at
* https://www.apache.org/licenses/LICENSE-2.0.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* SPDX-License-Identifier: Apache-2.0
******************************************************************************/

//
// Created by saudet on 8/30/2018.
//

#ifndef LIBND4J_MKLDNNSTREAM_H
#define LIBND4J_MKLDNNSTREAM_H

#ifdef HAVE_MKLDNN
#include <mkldnn.hpp>

namespace nd4j {
template <typename T> class MKLDNNStream {
protected:
std::string _opName;

std::vector<NDArray<T>*> _inputs;
std::vector<NDArray<T>*> _outputs;
std::vector<T> _floatArguments;
std::vector<int> _intArguments;

mkldnn::engine _engine = mkldnn::engine(mkldnn::engine::cpu, 0);
std::vector<mkldnn::memory> _memory;
mkldnn::primitive _operation;

public:
static bool isSupported() { return typeid(T) == typeid(float); }

MKLDNNStream(const std::string &opName) : _opName(opName) { }

bool checkAndReset(const std::vector<NDArray<T>*> &inputs, const std::vector<NDArray<T>*> &outputs,
const std::vector<T> &floatArguments, const std::vector<int> &intArguments) {
if (inputs != _inputs || outputs != _outputs || floatArguments != _floatArguments || intArguments != _intArguments) {
_inputs = inputs;
_outputs = outputs;
_floatArguments = floatArguments;
_intArguments = intArguments;
_operation.reset(nullptr);
_memory.clear();
return true;
}
return false;
}

const mkldnn::engine &getEngine() { return _engine; }
void setEngine(const mkldnn::engine &engine) { _engine = engine; }

const std::vector<mkldnn::memory> &getMemory() { return _memory; }
void setMemory(const std::vector<mkldnn::memory> &memory) { _memory = memory; }

const mkldnn::primitive &getOperation() { return _operation; }
void setOperation(const mkldnn::primitive &operation) { _operation = operation; }

bool submitAndWait(mkldnn::stream::kind kind = mkldnn::stream::kind::eager) {
nd4j_debug("Executing %s with MKL-DNN\n", _opName.c_str());
// need to create a new one because already executed streams become unusable
mkldnn::stream stream(kind);
return stream.submit({_operation}).wait();
}
};
}
#endif

#endif //LIBND4J_MKLDNNSTREAM_H
14 changes: 12 additions & 2 deletions libnd4j/include/ops/declarable/generic/convo/conv2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,19 @@ CUSTOM_OP_IMPL(conv2d, 2, 1, false, 0, 9) {
if (bias)
REQUIRE_TRUE(bias->rankOf() <= 2 && oC == bias->lengthOf(), 0, "CUSTOM CONV2D OP: wrong shape of array with biases, expected rank, length: <=2, %i, but got %i, %i instead !", oC, bias->rankOf(), bias->lengthOf());

#ifdef HAVE_MKLDNN
if (block.isUseMKLDNN() && MKLDNNStream<T>::isSupported()) {
if (block.getMKLDNNStream() == nullptr) {
block.setMKLDNNStream(new MKLDNNStream<T>("conv2d"));
}
ConvolutionUtils<T>::mkldnn_conv2d(*block.getMKLDNNStream(), {input, weights, bias}, output, {kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW});
} else {
#endif
ConvolutionUtils<T>::conv2d({input, weights, bias}, output, {kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW});
#ifdef HAVE_MKLDNN
}
#endif

ConvolutionUtils<T>::conv2d({input, weights, bias}, output, {kH,kW,sH,sW,pH,pW,dH,dW,isSameMode,isNCHW});

return Status::OK();
}

Expand Down
11 changes: 9 additions & 2 deletions libnd4j/include/ops/declarable/generic/helpers/convolutions.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
#define LIBND4J_CONVOLUTIONS_H

#include <NDArray.h>
#include <graph/Context.h>

#ifdef HAVE_MKLDNN
#include <helpers/MKLDNNStream.h>
#endif

namespace nd4j {
namespace ops {
Expand Down Expand Up @@ -70,7 +75,9 @@ namespace nd4j {
static void getSizesAndIndexesConv3d(const bool isNCDHW, const NDArray<T>& input, const NDArray<T>& output, int& bS, int& iC, int& iD, int& iH, int& iW, int& oC, int& oD, int& oH, int& oW, int& indIOioC, int& indIOioD, int& indWiC, int& indWoC, int& indWkD);

static void conv2d(const std::vector<NDArray<T>*>& inArrs, NDArray<T>* output, const std::vector<int>& intArgs);

#ifdef HAVE_MKLDNN
static void mkldnn_conv2d(MKLDNNStream<T> &stream, const std::vector<NDArray<T>*>& inArrs, NDArray<T>* output, const std::vector<int>& intArgs);
#endif
static void conv2dBP(const std::vector<NDArray<T>*>& inArrs, const std::vector<NDArray<T>*>& outArrs, const std::vector<int>& intArgs);

static void depthwiseConv2d(const std::vector<NDArray<T>*>& inArrs, NDArray<T>* output, const std::vector<int>& intArgs);
Expand Down Expand Up @@ -105,4 +112,4 @@ namespace nd4j {

}
}
#endif //LIBND4J_CONVOLUTIONS_H
#endif //LIBND4J_CONVOLUTIONS_H
Loading