diff --git a/CMakeLists.txt b/CMakeLists.txt index 42f6bffb9207..0b561825feec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,11 +1,25 @@ cmake_minimum_required(VERSION 3.0.2) +message(STATUS "CMAKE_VERSION=${CMAKE_VERSION}") + +# workaround to store CMAKE_CROSSCOMPILING because is getting reset by the project command +if(CMAKE_CROSSCOMPILING) + set(__CMAKE_CROSSCOMPILING ${CMAKE_CROSSCOMPILING}) + set(__CMAKE_CROSSCOMPILING_OVERRIDE ON) +endif() + project(mxnet C CXX) +if(__CMAKE_CROSSCOMPILING_OVERRIDE) + set(CMAKE_CROSSCOMPILING ${__CMAKE_CROSSCOMPILING}) +endif() + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake) endif() +message(STATUS "CMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}") + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Utils.cmake) #Some things have order. This must be put in front alone @@ -17,10 +31,6 @@ mxnet_option(USE_OPENMP "Build with Openmp support" ON) mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path mxnet_option(USE_SSE "Build with x86 SSE instruction support" ON IF NOT ARM) mxnet_option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON -mxnet_option(USE_LAPACK "Build with lapack support" ON) -mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON) -mxnet_option(USE_MKLML_MKL "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE)) -mxnet_option(USE_MKLDNN "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND (NOT APPLE)) mxnet_option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" ON IF NOT MSVC) mxnet_option(USE_GPERFTOOLS "Build with GPerfTools support (if found)" ON) mxnet_option(USE_JEMALLOC "Build with Jemalloc support" ON) @@ -41,7 +51,123 @@ mxnet_option(USE_TENSORRT "Enable infeference optimization with TensorRT mxnet_option(USE_ASAN "Enable Clang/GCC ASAN sanitizers." OFF) mxnet_option(ENABLE_TESTCOVERAGE "Enable compilation with test coverage metric output" OFF) -message(STATUS "CMAKE_SYSTEM_NAME ${CMAKE_SYSTEM_NAME}") +if(NOT mxnet_LINKER_LIBS) + set(mxnet_LINKER_LIBS "") +endif(NOT mxnet_LINKER_LIBS) + +if(MSVC) + set(SYSTEM_ARCHITECTURE x86_64) +else() + execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE SYSTEM_ARCHITECTURE) +endif() + +set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}") + +SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH") + +if("$ENV{VERBOSE}" STREQUAL "1") + message(STATUS " Verbose Makefile ACTIVATED") + set(CMAKE_VERBOSE_MAKEFILE ON) +endif() + +# ---[ BLAS + +# Choose BLAS (Basic Linear Algebra Subprograms) computation libraries + +# MXNet supports multiple mathematical backends for computations on the CPU: +# +# * Atlas +# * OpenBLAS +# * MKL (MKL, MKLML) +# * MKLDNN +# * Apple Accelerate +# +# The default order of choice for the libraries if found follows the path from the most +# (recommended) to less performant backends. The order is as follows: +# +# For desktop platforms (x86_64): +# +# 1. MKLDNN (submodule) | USE_MKLDNN +# 2. MKL | USE_MKL_IF_AVAILABLE +# 3. MKLML (downloaded) | USE_MKLML +# 4. Apple Accelerate | USE_APPLE_ACCELERATE_IF_AVAILABLE | Mac only +# 5. OpenBLAS | BLAS | Options: Atlas, Open, MKL, Apple +# +# Note: If USE_MKL_IF_AVAILABLE is set to False then MKLML and MKLDNN will be disabled as well for configuration +# backwards compatibility. +# +# For embedded platforms (all other and if cross compiled): +# +# 1. OpenBLAS | BLAS | Options: Atlas, Open +# +# You can set the BLAS library explicitly by setting the BLAS variable to: +# +# * Atlas +# * Open +# * MKL +# * Apple +# +# See cmake/ChooseBLAS.cmake file for the options. +# +# Intel's MKL (Math Kernel Library) is one of the most powerful math libraries +# https://software.intel.com/en-us/mkl +# +# It has following flavours: +# +# * MKL is a complete full math library, containing basic and LAPACK functions. It is free under +# community support licensing (https://software.intel.com/en-us/articles/free-mkl), +# but needs to be downloaded and installed manually. +# +# * MKLML is a subset of MKL. It contains a smaller number of functions to reduce the +# size of the download and reduce the number of dynamic libraries the user needs. This +# is the most effective option since it can be downloaded and installed automatically +# by the cmake script (see cmake/DownloadMKLML.cmake). +# +# * MKLDNN is a separate open-source library, it can be used separately from MKL or MKLML. It is +# shipped as a subrepo with MXNet source code (see 3rdparty/mkldnn). +# See: https://github.com/intel/mkl-dnn +# +# Since the full MKL library is almost always faster than any other BLAS library it's turned on by default, +# however it needs to be downloaded and installed manually before doing cmake configuration. +# Register and download here https://software.seek.intel.com/performance-libraries +# +# Note: MKL is supported only for desktop builds and the framework itself supports the following +# hardware: +# +# * Intel® Xeon Phi™ processor +# * Intel® Xeon® processor +# * Intel® Core™ processor family +# * Intel Atom® processor +# +# If you have a different processor you can still try to use MKL, but performance results are +# unpredictable. +mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON) + +# If the full MKL library could not be found the thinner subset MKLML will be downloaded +# unless switched off explicitly. +# Note: The same limitation on hardware as for MKL applies for MKLML as well. +mxnet_option(USE_MKLML "Use MKLML subset of MKL instead of full MKL, will be downloaded" ON) + +# If either MKL of MKLML is present MKLDNN can be utilised from the 3rdparty/mkldnn subrepo. +# See more information here: https://github.com/intel/mkl-dnn +# Note: The same limitation on hardware as for MKL and MKLDNN applies for MKLDNN as well. +mxnet_option(USE_MKLDNN "Use MKLDNN (separate addition to MKL, MKL/MKLML not required)" ON) + +# Apple's mathematical framework, probably the best choice on a Mac if MKL/MKLML/MKLDNN +# are not available. +# https://developer.apple.com/documentation/accelerate +mxnet_option(USE_APPLE_ACCELERATE_IF_AVAILABLE "Use Apple Accelerate framework if found, \ + works if MKL not found or disabled" ON IF ${APPLE}) + +# Another important option of the math libraries is presence of additional set of +# mathematical functions gathered and named as the LAPACK (Linear Algebra Package). Some +# libraries don't include it, thus the cmake script will check the presence of an +# indicating function "cheev_" within the available choosen libraries and switch the +# functionality off if not found. +mxnet_option(USE_LAPACK "Build with LAPACK support" ON) + +include(cmake/ChooseBLAS.cmake) + if(USE_CUDA AND NOT USE_OLDCMAKECUDA) message(STATUS "CMake version '${CMAKE_VERSION}' using generator '${CMAKE_GENERATOR}'") if( @@ -64,23 +190,6 @@ else() project(mxnet C CXX) endif() - -if(MSVC) - set(SYSTEM_ARCHITECTURE x86_64) -else() - execute_process(COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE SYSTEM_ARCHITECTURE) -endif() - -set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules;${CMAKE_MODULE_PATH}") - -SET(EXTRA_OPERATORS "" CACHE PATH "EXTRA OPERATORS PATH") - -if("$ENV{VERBOSE}" STREQUAL "1") - message(STATUS " Verbose Makefile ACTIVATED") - set(CMAKE_VERBOSE_MAKEFILE ON) -endif() - - if(MSVC) add_definitions(-DWIN32_LEAN_AND_MEAN) add_definitions(-DDMLC_USE_CXX11) @@ -154,10 +263,6 @@ else(MSVC) endif() endif(MSVC) -if(NOT mxnet_LINKER_LIBS) - set(mxnet_LINKER_LIBS "") -endif(NOT mxnet_LINKER_LIBS) - if(USE_GPROF) message(STATUS "Using GPROF") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-omit-frame-pointer -g -pg") @@ -215,7 +320,7 @@ if(ENABLE_TESTCOVERAGE) if(NOT GCOV_PATH) message(FATAL_ERROR "gcov not found! Aborting...") endif() # NOT GCOV_PATH - + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} --coverage") @@ -223,25 +328,6 @@ if(ENABLE_TESTCOVERAGE) link_libraries(gcov) endif() -if(USE_MKLDNN) - include(cmake/DownloadMKLML.cmake) - # CPU architecture (e.g., C5) can't run on another architecture (e.g., g3). - if(NOT MSVC) - set(ARCH_OPT_FLAGS "-mtune=generic") - endif() - set(WITH_TEST OFF CACHE INTERNAL "" FORCE) - set(WITH_EXAMPLE OFF CACHE INTERNAL "" FORCE) - set(ARCH_OPT_FLAGS "" CACHE INTERNAL "" FORCE) - - add_subdirectory(3rdparty/mkldnn) - - include_directories(3rdparty/mkldnn/include) - add_definitions(-DUSE_MKL=1) - add_definitions(-DCUB_MKL=1) - add_definitions(-DMXNET_USE_MKLDNN=1) - list(APPEND mxnet_LINKER_LIBS mkldnn) -endif() - # Allow Cuda compiles outside of src tree to find things in 'src' and 'include' include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src) @@ -277,13 +363,22 @@ else() endif() if(USE_CUDA AND FIRST_CUDA) - include(cmake/ChooseBlas.cmake) include(3rdparty/mshadow/cmake/Utils.cmake) include(cmake/FirstClassLangCuda.cmake) include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) else() if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/mshadow/cmake) + # Workaroud to prevent mshadow from processing BLAS libraries. The main problem is with MKL - if MKLML is used it + # would still try to find MKL instead and will fail. + # BLAS libraries for MXNet are setup in cmake/ChooseBLAS.cmake + set(__BLAS ${BLAS}) + set(__USE_MKL_IF_AVAILABLE ${USE_MKL_IF_AVAILABLE}) + set(USE_MKL_IF_AVAILABLE False) + set(BLAS "Override") include(3rdparty/mshadow/cmake/mshadow.cmake) + set(BLAS ${__BLAS}) + set(USE_MKL_IF_AVAILABLE ${__USE_MKL_IF_AVAILABLE}) + include(3rdparty/mshadow/cmake/Utils.cmake) include(3rdparty/mshadow/cmake/Cuda.cmake) else() @@ -346,7 +441,7 @@ if(USE_GPERFTOOLS) include_directories(${GPERFTOOLS_INCLUDE_DIR}) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ALT_MALLOC_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ALT_MALLOC_FLAGS}") - set(mxnet_LINKER_LIBS ${mxnet_LINKER_LIBS} ${GPERFTOOLS_LIBRARIES}) + list(APPEND mxnet_LINKER_LIBS ${GPERFTOOLS_LIBRARIES}) set(USE_JEMALLOC 0) endif() endif() @@ -363,7 +458,7 @@ if(USE_JEMALLOC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ALT_MALLOC_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ALT_MALLOC_FLAGS}") include_directories(${JEMALLOC_INCLUDE_DIRS}) - set(mxnet_LINKER_LIBS ${mxnet_LINKER_LIBS} ${JEMALLOC_LIBRARIES}) + list(APPEND mxnet_LINKER_LIBS ${JEMALLOC_LIBRARIES}) endif() endif() @@ -386,55 +481,48 @@ endif() # ---[ OpenMP if(USE_OPENMP) - find_package(OpenMP REQUIRED) - # This should build on Windows, but there's some problem and I don't have a Windows box, so - # could a Windows user please fix? - if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp/CMakeLists.txt - AND SYSTEM_ARCHITECTURE STREQUAL "x86_64" - AND NOT MSVC - AND NOT CMAKE_CROSSCOMPILING) - - # Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp - set(OPENMP_STANDALONE_BUILD TRUE) - set(LIBOMP_ENABLE_SHARED TRUE) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp) - list(REMOVE_ITEM mxnet_LINKER_LIBS iomp5) - list(APPEND mxnet_LINKER_LIBS omp) - if(UNIX) - list(APPEND mxnet_LINKER_LIBS pthread) + # This should also identify whether compiler supports it (AppleClang for example doesn't) + find_package(OpenMP) + if(OpenMP_FOUND) + # This should build on Windows, but there's some problem and I don't have a Windows box, so + # could a Windows user please fix? + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp/CMakeLists.txt + AND SYSTEM_ARCHITECTURE STREQUAL "x86_64" + AND NOT MSVC + AND NOT CMAKE_CROSSCOMPILING) + + # Intel/llvm OpenMP: https://github.com/llvm-mirror/openmp + set(OPENMP_STANDALONE_BUILD TRUE) + set(LIBOMP_ENABLE_SHARED TRUE) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/openmp) + list(REMOVE_ITEM mxnet_LINKER_LIBS iomp5) + list(APPEND mxnet_LINKER_LIBS omp) + if(UNIX) + list(APPEND mxnet_LINKER_LIBS pthread) + endif() endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + else() - if(OPENMP_FOUND) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") - endif() + message(WARNING "OpenMP support could not be found, OpenMP will be disabled") + set(USE_OPENMP False) endif() + elseif(UNIX AND NOT ANDROID) list(APPEND mxnet_LINKER_LIBS pthread) endif() - -# ---[ LAPack -if(USE_LAPACK) - message("USE_LAPACK is ON") - add_definitions(-DMXNET_USE_LAPACK=1) - if (NOT MSVC) - list(APPEND mxnet_LINKER_LIBS lapack) - endif() -endif() - - # ---[ jemalloc if(USE_JEMALLOC) find_package(JeMalloc) if(JEMALLOC_FOUND) add_definitions(-DUSE_JEMALLOC) include_directories(${JEMALLOC_INCLUDE_DIRS}) - set(mxnet_LINKER_LIBS ${mxnet_LINKER_LIBS} ${JEMALLOC_LIBRARIES}) + list(APPEND mxnet_LINKER_LIBS ${JEMALLOC_LIBRARIES}) endif() endif() @@ -465,8 +553,18 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/dmlc-core/cmake) endif() if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/mshadow/cmake) + # Workaroud to prevent mshadow from processing BLAS libraries. The main problem is with MKL - if MKLML is used it + # would still try to find MKL instead and will fail. + # BLAS libraries for MXNet are setup in cmake/ChooseBLAS.cmake + set(__BLAS ${BLAS}) + set(__USE_MKL_IF_AVAILABLE ${USE_MKL_IF_AVAILABLE}) + set(USE_MKL_IF_AVAILABLE False) + set(BLAS "Override") add_subdirectory("3rdparty/mshadow") + set(BLAS ${__BLAS}) + set(USE_MKL_IF_AVAILABLE ${__USE_MKL_IF_AVAILABLE}) endif() + FILE(GLOB_RECURSE SOURCE "src/*.cc" "src/*.h" "include/*.h") FILE(GLOB_RECURSE CUDA "src/*.cu" "src/*.cuh") diff --git a/ci/docker/Dockerfile.build.android_armv7 b/ci/docker/Dockerfile.build.android_armv7 index 799e29c99127..8f1248894d76 100644 --- a/ci/docker/Dockerfile.build.android_armv7 +++ b/ci/docker/Dockerfile.build.android_armv7 @@ -18,63 +18,24 @@ # # Dockerfile to build MXNet for Android ARMv7 -FROM mxnetci/dockcross-linux-base:08212018 -MAINTAINER Pedro Larroy "pllarroy@amazon.com" +FROM mxnetci/dockcross-android-arm:08212018 -# The cross-compiling emulator -RUN apt-get update && apt-get install -y \ - unzip - -ENV CROSS_TRIPLE=arm-linux-androideabi -ENV CROSS_ROOT=/usr/${CROSS_TRIPLE} -ENV AS=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-as \ - AR=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ar \ - CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-gcc \ - CPP=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-cpp \ - CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-g++ \ - LD=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ld - -ENV ANDROID_NDK_REVISION 17b -ENV ANDROID_NDK_API 27 -ENV ANDROID_NDK_ARCH arm WORKDIR /work/deps -COPY install/android_ndk.sh /work/deps -RUN /work/deps/android_ndk.sh - -ENV DEFAULT_DOCKCROSS_IMAGE dockcross/android-arm - -# Build-time metadata as defined at http://label-schema.org -ARG BUILD_DATE -ARG IMAGE -ARG VCS_REF -ARG VCS_URL -LABEL org.label-schema.build-date=$BUILD_DATE \ - org.label-schema.name=$IMAGE \ - org.label-schema.vcs-ref=$VCS_REF \ - org.label-schema.vcs-url=$VCS_URL \ - org.label-schema.schema-version="1.0" - -ENV CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang -ENV CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang++ - -WORKDIR /work/deps - -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh -WORKDIR /work -COPY install/ubuntu_arm.sh /work/ -RUN /work/ubuntu_arm.sh +ENV ARCH armv7l +ENV TARGET ARMV7 +ENV ARM_SOFTFP_ABI 1 +ENV OSNAME Android +ENV ONLY_CBLAS 1 +ENV HOSTCC gcc COPY install/arm_openblas.sh /work/ -COPY install/android_armv7_openblas.sh /work/deps -RUN /work/deps/android_armv7_openblas.sh +RUN /work/arm_openblas.sh ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} -WORKDIR /work +COPY install/deb_ubuntu_ccache.sh /work/ +RUN /work/deb_ubuntu_ccache.sh COPY runtime_functions.sh /work/ WORKDIR /work/mxnet - diff --git a/ci/docker/Dockerfile.build.android_armv8 b/ci/docker/Dockerfile.build.android_armv8 index 2c2c71c003f0..1f2db342f29e 100644 --- a/ci/docker/Dockerfile.build.android_armv8 +++ b/ci/docker/Dockerfile.build.android_armv8 @@ -18,62 +18,24 @@ # # Dockerfile to build MXNet for Android ARM64/ARMv8 -FROM mxnetci/dockcross-linux-base:08212018 -MAINTAINER Pedro Larroy "pllarroy@amazon.com" - -RUN apt-get update && apt-get install -y \ - unzip +FROM mxnetci/dockcross-android-arm64:08212018 WORKDIR /work/deps -# Build x86 dependencies. -COPY install/deb_ubuntu_ccache.sh /work/ -RUN /work/deb_ubuntu_ccache.sh - -# Setup Android cross-compilation environment. -ENV CROSS_TRIPLE=aarch64-linux-android -ENV CROSS_ROOT=/usr/${CROSS_TRIPLE} -ENV AS=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-as \ - AR=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ar \ - CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-gcc \ - CPP=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-cpp \ - CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-g++ \ - LD=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-ld - - -ENV DEFAULT_DOCKCROSS_IMAGE dockcross/android-arm - -# Build-time metadata as defined at http://label-schema.org -ARG BUILD_DATE -ARG IMAGE -ARG VCS_REF -ARG VCS_URL -LABEL org.label-schema.build-date=$BUILD_DATE \ - org.label-schema.name=$IMAGE \ - org.label-schema.vcs-ref=$VCS_REF \ - org.label-schema.vcs-url=$VCS_URL \ - org.label-schema.schema-version="1.0" - ENV ARCH aarch64 -ENV ANDROID_NDK_REVISION 17b -ENV ANDROID_NDK_API 27 -ENV ANDROID_NDK_ARCH arm64 -WORKDIR /work/deps -COPY install/android_ndk.sh /work/deps -RUN /work/deps/android_ndk.sh - +ENV TARGET ARMV8 +ENV ARM_SOFTFP_ABI 1 +ENV OSNAME Android +ENV ONLY_CBLAS 1 +ENV HOSTCC gcc -WORKDIR /work/deps -COPY install/android_ndk.sh /work/ -RUN /work/android_ndk.sh +COPY install/arm_openblas.sh /work/ +RUN /work/arm_openblas.sh -ENV CC=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang -ENV CXX=${CROSS_ROOT}/bin/${CROSS_TRIPLE}-clang++ +ENV OpenBLAS_HOME=${CROSS_ROOT} -# Build ARM dependencies. -COPY install/android_arm64_openblas.sh /work/ -RUN /work/android_arm64_openblas.sh -ENV CPLUS_INCLUDE_PATH /work/deps/OpenBLAS -WORKDIR /work/build +COPY install/deb_ubuntu_ccache.sh /work/ +RUN /work/deb_ubuntu_ccache.sh COPY runtime_functions.sh /work/ +WORKDIR /work/mxnet diff --git a/ci/docker/Dockerfile.build.armv6 b/ci/docker/Dockerfile.build.armv6 index 78071fa33992..d19bcc54e9e3 100644 --- a/ci/docker/Dockerfile.build.armv6 +++ b/ci/docker/Dockerfile.build.armv6 @@ -33,7 +33,6 @@ COPY install/arm_openblas.sh /work/ RUN /work/arm_openblas.sh ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} COPY install/deb_ubuntu_ccache.sh /work/ RUN /work/deb_ubuntu_ccache.sh diff --git a/ci/docker/Dockerfile.build.armv7 b/ci/docker/Dockerfile.build.armv7 index 9a23a5dbefee..3207ff4450e7 100644 --- a/ci/docker/Dockerfile.build.armv7 +++ b/ci/docker/Dockerfile.build.armv7 @@ -33,7 +33,6 @@ COPY install/arm_openblas.sh /work/ RUN /work/arm_openblas.sh ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} COPY install/deb_ubuntu_ccache.sh /work/ RUN /work/deb_ubuntu_ccache.sh diff --git a/ci/docker/Dockerfile.build.armv8 b/ci/docker/Dockerfile.build.armv8 index 46cc229d5904..b7c58575945a 100644 --- a/ci/docker/Dockerfile.build.armv8 +++ b/ci/docker/Dockerfile.build.armv8 @@ -37,7 +37,6 @@ COPY install/arm_openblas.sh /work/ RUN /work/arm_openblas.sh ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} COPY install/deb_ubuntu_ccache.sh /work/ RUN /work/deb_ubuntu_ccache.sh diff --git a/ci/docker/Dockerfile.build.jetson b/ci/docker/Dockerfile.build.jetson index 15518cd6f22e..2010ff6d94b6 100644 --- a/ci/docker/Dockerfile.build.jetson +++ b/ci/docker/Dockerfile.build.jetson @@ -42,7 +42,6 @@ COPY install/arm_openblas.sh /work/ RUN /work/arm_openblas.sh ENV OpenBLAS_HOME=${CROSS_ROOT} -ENV OpenBLAS_DIR=${CROSS_ROOT} COPY install/deb_ubuntu_ccache.sh /work/ RUN /work/deb_ubuntu_ccache.sh diff --git a/ci/docker/Dockerfile.build.ubuntu_mkl b/ci/docker/Dockerfile.build.ubuntu_mkl new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/ci/docker/install/android_arm64_openblas.sh b/ci/docker/install/android_arm64_openblas.sh deleted file mode 100755 index 1c3014f6cca9..000000000000 --- a/ci/docker/install/android_arm64_openblas.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex -pushd . -git clone https://github.com/xianyi/OpenBLAS.git -cd OpenBLAS -make -j$(nproc) TARGET=ARMV8 ARM_SOFTFP_ABI=1 HOSTCC=gcc NOFORTRAN=1 libs -# Can't be run (utility not compiled for the target platform) -#make install -cp *.h /usr/include -cp libopenblas.a /usr/local/lib -popd diff --git a/ci/docker/install/android_armv7_openblas.sh b/ci/docker/install/android_armv7_openblas.sh deleted file mode 100755 index 55c098909654..000000000000 --- a/ci/docker/install/android_armv7_openblas.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex -pushd . -git clone https://github.com/xianyi/OpenBLAS.git -cd OpenBLAS -make TARGET=ARMV7 HOSTCC=gcc NOFORTRAN=1 ARM_SOFTFP_ABI=1 -j$(nproc) libs -#make PREFIX=${CROSS_ROOT} TARGET=ARMV7 HOSTCC=gcc NOFORTRAN=1 ARM_SOFTFP_ABI=1 install -cp *.h ${CROSS_ROOT}/include -cp libopenblas*.a ${CROSS_ROOT}/lib -popd diff --git a/ci/docker/install/arm64_openblas.sh b/ci/docker/install/arm64_openblas.sh deleted file mode 100755 index 88f2e98cd65b..000000000000 --- a/ci/docker/install/arm64_openblas.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env bash - -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# build and install are separated so changes to build don't invalidate -# the whole docker cache for the image - -set -ex -pushd . -wget -nv https://api.github.com/repos/xianyi/OpenBLAS/git/refs/heads/master -O openblas_version.json -echo "Using openblas:" -cat openblas_version.json -git clone https://github.com/xianyi/OpenBLAS.git -cd OpenBLAS -make -j$(nproc) TARGET=ARMV8 -make install -ln -s /opt/OpenBLAS/lib/libopenblas.so /usr/lib/libopenblas.so -ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/libopenblas.a -ln -s /opt/OpenBLAS/lib/libopenblas.a /usr/lib/liblapack.a -popd diff --git a/ci/docker/install/arm_openblas.sh b/ci/docker/install/arm_openblas.sh index fa2e5cae9cba..f3152ef1fb70 100755 --- a/ci/docker/install/arm_openblas.sh +++ b/ci/docker/install/arm_openblas.sh @@ -19,7 +19,7 @@ set -ex -git clone --recursive -b v0.2.20 https://github.com/xianyi/OpenBLAS.git +git clone --recursive -b v0.3.2 https://github.com/xianyi/OpenBLAS.git cd OpenBLAS make -j$(nproc) diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 39631f9dc7e6..c666433eaf39 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -121,15 +121,13 @@ build_armv6() { pushd . cd /work/build - # Lapack functionality will be included and statically linked to openblas. - # But USE_LAPACK needs to be set to OFF, otherwise the main CMakeLists.txt - # file tries to add -llapack. Lapack functionality though, requires -lgfortran - # to be linked additionally. + # Lapack functionality requires -lgfortran to be linked additionally. # We do not need OpenMP, since most armv6 systems have only 1 core cmake \ -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ + -DCMAKE_CROSSCOMPILING=ON \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ -DUSE_CUDA=OFF \ @@ -138,7 +136,7 @@ build_armv6() { -DUSE_SIGNAL_HANDLER=ON \ -DCMAKE_BUILD_TYPE=Release \ -DUSE_MKL_IF_AVAILABLE=OFF \ - -DUSE_LAPACK=OFF \ + -DUSE_LAPACK=ON \ -DBUILD_CPP_EXAMPLES=OFF \ -Dmxnet_LINKER_LIBS=-lgfortran \ -G Ninja /work/mxnet @@ -153,10 +151,7 @@ build_armv7() { pushd . cd /work/build - # Lapack functionality will be included and statically linked to openblas. - # But USE_LAPACK needs to be set to OFF, otherwise the main CMakeLists.txt - # file tries to add -llapack. Lapack functionality though, requires -lgfortran - # to be linked additionally. + # Lapack functionality requires -lgfortran to be linked additionally. cmake \ -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} \ @@ -169,7 +164,7 @@ build_armv7() { -DUSE_SIGNAL_HANDLER=ON \ -DCMAKE_BUILD_TYPE=Release \ -DUSE_MKL_IF_AVAILABLE=OFF \ - -DUSE_LAPACK=OFF \ + -DUSE_LAPACK=ON \ -DBUILD_CPP_EXAMPLES=OFF \ -Dmxnet_LINKER_LIBS=-lgfortran \ -G Ninja /work/mxnet @@ -183,6 +178,7 @@ build_armv8() { cmake \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CROSSCOMPILING=ON \ -DUSE_CUDA=OFF\ -DSUPPORT_F16C=OFF\ -DUSE_OPENCV=OFF\ @@ -205,18 +201,19 @@ build_android_armv7() { set -ex cd /work/build cmake \ - -DANDROID=ON\ + -DANDROID=ON \ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DUSE_CUDA=OFF\ - -DUSE_SSE=OFF\ - -DSUPPORT_F16C=OFF\ - -DUSE_LAPACK=OFF\ - -DUSE_OPENCV=OFF\ - -DUSE_OPENMP=OFF\ - -DUSE_SIGNAL_HANDLER=ON\ - -DCMAKE_BUILD_TYPE=RelWithDebInfo\ - -DUSE_MKL_IF_AVAILABLE=OFF\ + -DCMAKE_CROSSCOMPILING=ON \ + -DUSE_CUDA=OFF \ + -DUSE_SSE=OFF \ + -DSUPPORT_F16C=OFF \ + -DUSE_LAPACK=OFF \ + -DUSE_OPENCV=OFF \ + -DUSE_OPENMP=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DUSE_MKL_IF_AVAILABLE=OFF \ -G Ninja /work/mxnet ninja -v } @@ -224,16 +221,19 @@ build_android_armv7() { build_android_armv8() { set -ex cd /work/build - cmake\ + cmake \ -DANDROID=ON \ - -DUSE_CUDA=OFF\ - -DUSE_SSE=OFF\ - -DUSE_LAPACK=OFF\ - -DUSE_OPENCV=OFF\ - -DUSE_OPENMP=OFF\ - -DUSE_SIGNAL_HANDLER=ON\ - -DCMAKE_BUILD_TYPE=RelWithDebInfo\ - -DUSE_MKL_IF_AVAILABLE=OFF\ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CROSSCOMPILING=ON \ + -DUSE_CUDA=OFF \ + -DUSE_SSE=OFF \ + -DUSE_LAPACK=OFF \ + -DUSE_OPENCV=OFF \ + -DUSE_OPENMP=OFF \ + -DUSE_SIGNAL_HANDLER=ON \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DUSE_MKL_IF_AVAILABLE=OFF \ -G Ninja /work/mxnet ninja -v } @@ -266,7 +266,7 @@ build_amzn_linux_cpu() { -DUSE_SIGNAL_HANDLER=ON\ -DCMAKE_BUILD_TYPE=RelWithDebInfo\ -DUSE_MKL_IF_AVAILABLE=OFF\ - -DUSE_LAPACK=OFF\ + -DUSE_LAPACK=ON\ -DUSE_DIST_KVSTORE=ON\ -G Ninja /work/mxnet ninja -v @@ -614,18 +614,17 @@ build_ubuntu_gpu_cmake_mkldnn() { set -ex cd /work/build cmake \ - -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ - -DCMAKE_C_COMPILER_LAUNCHER=ccache \ - -DENABLE_TESTCOVERAGE=ON \ - -DUSE_CUDA=1 \ - -DUSE_CUDNN=1 \ - -DUSE_MKLML_MKL=1 \ - -DUSE_MKLDNN=1 \ - -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DENABLE_TESTCOVERAGE=ON \ + -DUSE_CUDA=1 \ + -DUSE_CUDNN=1 \ + -DUSE_MKL_IF_AVAILABLE=ON \ + -DUSE_MKLDNN=ON \ + -DCMAKE_BUILD_TYPE=Release \ -DCUDA_ARCH_NAME=Manual \ -DCUDA_ARCH_BIN=$CI_CMAKE_CUDA_ARCH_BIN \ - -G Ninja \ - /work/mxnet + -G Ninja /work/mxnet ninja -v # libmkldnn.so.0 is a link file. We need an actual binary file named libmkldnn.so.0. @@ -642,8 +641,8 @@ build_ubuntu_gpu_cmake() { -DENABLE_TESTCOVERAGE=ON \ -DUSE_CUDA=1 \ -DUSE_CUDNN=1 \ - -DUSE_MKLML_MKL=0 \ - -DUSE_MKLDNN=0 \ + -DUSE_MKL_IF_AVAILABLE=OFF \ + -DUSE_MKLDNN=OFF \ -DUSE_DIST_KVSTORE=1 \ -DCMAKE_BUILD_TYPE=Release \ -DCUDA_ARCH_NAME=Manual \ diff --git a/cmake/ChooseBLAS.cmake b/cmake/ChooseBLAS.cmake new file mode 100644 index 000000000000..a7a36c46fec2 --- /dev/null +++ b/cmake/ChooseBLAS.cmake @@ -0,0 +1,277 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(BLAS "Open" CACHE STRING "Selected BLAS library") +set_property(CACHE BLAS PROPERTY STRINGS "Atlas;Open;MKL;Apple") + +function(switch_lapack ENABLE) + if(ENABLE) + message(STATUS "Enabling LAPACK functionality") + add_definitions(-DMXNET_USE_LAPACK=1) + else() + if(USE_LAPACK) + message(WARNING "LAPACK functionality not available") + else() + message(STATUS "LAPACK functionality not available") + endif() + endif() +endfunction() + +function(try_mkldnn) + + if(NOT USE_MKLDNN) + return() + endif() + + message(STATUS "Adding MKLDNN to the build due to USE_MKLDNN=${USE_MKLDNN} and USE_MKL_IF_AVAILABLE=${USE_MKL_IF_AVAILABLE}") + + # CPU architecture (e.g., C5) can't run on another architecture (e.g., g3). + if(NOT MSVC) + set(ARCH_OPT_FLAGS ${ARCH_OPT_FLAGS} "-mtune=generic" PARENT_SCOPE) + endif() + + if(MSVC) + file(COPY ${CMAKE_SOURCE_DIR}/3rdparty/mkldnn/config_template.vcxproj.user DESTINATION ${CMAKE_SOURCE_DIR}) + endif() + + set(WITH_TEST OFF CACHE INTERNAL "" FORCE) + set(WITH_EXAMPLE OFF CACHE INTERNAL "" FORCE) + set(ARCH_OPT_FLAGS "" CACHE INTERNAL "" FORCE) + + if(NOT MKL_FOUND) + set(MKLDNN_USE_MKL "NONE") + endif() + + add_subdirectory(3rdparty/mkldnn) + + include_directories(3rdparty/mkldnn/include) + set(mxnet_LINKER_LIBS ${mxnet_LINKER_LIBS} mkldnn PARENT_SCOPE) + + add_definitions(-DMXNET_USE_MKLDNN=1) + +endfunction() + +function(try_mkl) + + message(STATUS "Trying to enable MKL framework due to USE_MKL_IF_AVAILABLE=${USE_MKL_IF_AVAILABLE}") + + if(CMAKE_CROSSCOMPILING) + message(WARNING "MKL with cross compilation is not supported, MKL will not be available") + return() + endif() + + if(NOT SYSTEM_ARCHITECTURE STREQUAL "x86_64") + message(WARNING "MKL is supported only for desktop platforms (SYSTEM_ARCHITECTURE=${SYSTEM_ARCHITECTURE}), MKL will not be available") + return() + endif() + + find_package(MKL) + + if(MKL_FOUND) + message(STATUS "MKL framework found") + + set(MKL_FOUND ${MKL_FOUND} PARENT_SCOPE) + set(MKL_INCLUDE_DIR ${MKL_INCLUDE_DIR} PARENT_SCOPE) + set(MKL_LIBRARIES ${MKL_LIBRARIES} PARENT_SCOPE) + set(MKLDNN_USE_MKL "FULL" PARENT_SCOPE) + set(MKLROOT ${MKLROOT} PARENT_SCOPE) + + set(__BLAS mkl PARENT_SCOPE) + else() + message(STATUS "MKL framework not found") + endif() + +endfunction() + +function(try_mklml) + if(NOT USE_MKLML) + return() + endif() + + if(MKL_FOUND) + return() + endif() + + message(STATUS "Trying to enable MKLML framework due to USE_MKLML=${USE_MKLML} and USE_MKL_IF_AVAILABLE=${USE_MKL_IF_AVAILABLE}") + + if(CMAKE_CROSSCOMPILING) + message(WARNING "MKLML with cross compilation is not supported, MKL will not be available") + return() + endif() + + if(NOT SYSTEM_ARCHITECTURE STREQUAL "x86_64") + message(WARNING "MKL is supported only for desktop platforms (SYSTEM_ARCHITECTURE=${SYSTEM_ARCHITECTURE}), MKL will not be available") + return() + endif() + + find_package(MKLML) + + if(NOT MKL_FOUND) + include(${CMAKE_CURRENT_LIST_DIR}/DownloadMKLML.cmake) + find_package(MKLML REQUIRED) + endif() + + set(MKL_FOUND ${MKL_FOUND} PARENT_SCOPE) + set(MKL_INCLUDE_DIR ${MKL_INCLUDE_DIR} PARENT_SCOPE) + set(MKL_LIBRARIES ${MKL_LIBRARIES} PARENT_SCOPE) + set(MKL_USE_INTEL_OMP ${MKL_USE_INTEL_OMP} PARENT_SCOPE) + set(MKLDNN_USE_MKL "ML" PARENT_SCOPE) + set(MKLROOT ${MKLROOT} PARENT_SCOPE) + + set(__BLAS mkl PARENT_SCOPE) + + message(STATUS "MKLML framework found") + +endfunction() + +function(try_accelerate) + if(NOT APPLE) + return() + endif() + + if(__BLAS MATCHES "mkl") + return() + endif() + + if(USE_APPLE_ACCELERATE_IF_AVAILABLE) + message(STATUS "Trying to enable Apple Accelerate framework due to USE_ACCELERATE_IF_AVAILABLE") + find_package(Accelerate) + if(Accelerate_FOUND) + message(STATUS "Apple Accelerate framework found") + set(BLAS Accelerate PARENT_SCOPE) + else() + message(STATUS "Apple Accelerate framework not found") + endif() + endif() +endfunction() + +set(__BLAS) +string(TOLOWER "${BLAS}" __BLAS) +mark_as_advanced(__BLAS) + +if(USE_MKL_IF_AVAILABLE) + set(MKL_FOUND) + + try_mkl() + try_mklml() + try_mkldnn() +else() + try_accelerate() +endif() + +if(__BLAS MATCHES "atlas") + message(STATUS "Using Atlas for BLAS") + + set(Atlas_NEED_LAPACK ${USE_LAPACK}) + find_package(Atlas REQUIRED) + + include_directories(SYSTEM ${Atlas_INCLUDE_DIRS}) + set(mxnet_LINKER_LIBS ${mxnet_LINKER_LIBS} ${Atlas_LIBRARIES}) + + add_definitions(-DMSHADOW_USE_CBLAS=1) + add_definitions(-DMSHADOW_USE_MKL=0) + + if(USE_LAPACK AND Atlas_LAPACK_FOUND) + switch_lapack(True) + else() + switch_lapack(False) + endif() + + return() + +endif() + +if(__BLAS MATCHES "open") + message(STATUS "Using OpenBLAS for BLAS") + + set(OpenBLAS_NEED_LAPACK ${USE_LAPACK}) + + find_package(OpenBLAS REQUIRED) + + include_directories(SYSTEM ${OpenBLAS_INCLUDE_DIRS}) + set(mxnet_LINKER_LIBS ${mxnet_LINKER_LIBS} ${OpenBLAS_LIBRARIES}) + + add_definitions(-DMSHADOW_USE_CBLAS=1) + add_definitions(-DMSHADOW_USE_MKL=0) + + if(USE_LAPACK AND OpenBLAS_LAPACK_FOUND) + switch_lapack(True) + else() + switch_lapack(False) + endif() + + return() +endif() + +if(__BLAS MATCHES "mkl") + message(STATUS "Using MKL for BLAS") + + if(NOT MKL_FOUND) + message(FATAL_ERROR "Blas set to MKL, but it could not be found") + endif() + + include_directories(SYSTEM ${MKL_INCLUDE_DIR}) + set(mxnet_LINKER_LIBS ${MKL_LIBRARIES} ${mxnet_LINKER_LIBS}) + + add_definitions(-DMSHADOW_USE_CBLAS=0) + add_definitions(-DMSHADOW_USE_MKL=1) + + if(USE_LAPACK) + include(CheckFunctionExists) + set(CMAKE_REQUIRED_LIBRARIES ${MKL_LIBRARIES}) + check_function_exists("cheev_" LAPACK_FOUND) + + if(LAPACK_FOUND) + switch_lapack(True) + else() + switch_lapack(False) + endif() + endif() + + return() + +endif() + +if(__BLAS MATCHES "(apple|accelerate)") + if(NOT APPLE) + message(FATAL_ERROR "Apple Accelerate framework's BLAS feature is available only on macOS") + return() + endif() + + message(STATUS "Using Apple Accelerate for BLAS") + + # Accelerate framework documentation + # https://developer.apple.com/documentation/accelerate?changes=_2 + set(Accelerate_NEED_LAPACK ${USE_LAPACK}) + find_package(Accelerate REQUIRED) + + include_directories(SYSTEM ${Accelerate_INCLUDE_DIR}) + set(mxnet_LINKER_LIBS ${mxnet_LINKER_LIBS} ${Accelerate_LIBRARIES}) + + add_definitions(-DMSHADOW_USE_CBLAS=1) + add_definitions(-DMSHADOW_USE_MKL=0) + + if(USE_LAPACK AND Accelerate_LAPACK_FOUND) + switch_lapack(True) + else() + switch_lapack(False) + endif() + + return() +endif() + +message(FATAL_ERROR "BLAS ${__BLAS} not recognized") diff --git a/cmake/ChooseBlas.cmake b/cmake/ChooseBlas.cmake deleted file mode 100644 index 13d7083f3d12..000000000000 --- a/cmake/ChooseBlas.cmake +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set(BLAS "Open" CACHE STRING "Selected BLAS library") -set_property(CACHE BLAS PROPERTY STRINGS "Atlas;Open;MKL") - -if(USE_MKL_IF_AVAILABLE) - if(NOT MKL_FOUND) - find_package(MKL) - endif() - if(MKL_FOUND) - if(USE_MKLDNN) - set(BLAS "open") - else() - set(BLAS "MKL") - endif() - endif() -endif() - -if(BLAS STREQUAL "Atlas" OR BLAS STREQUAL "atlas") - find_package(Atlas REQUIRED) - include_directories(SYSTEM ${Atlas_INCLUDE_DIR}) - list(APPEND mshadow_LINKER_LIBS ${Atlas_LIBRARIES}) - add_definitions(-DMSHADOW_USE_CBLAS=1) - add_definitions(-DMSHADOW_USE_MKL=0) -elseif(BLAS STREQUAL "Open" OR BLAS STREQUAL "open") - find_package(OpenBLAS REQUIRED) - include_directories(SYSTEM ${OpenBLAS_INCLUDE_DIR}) - list(APPEND mshadow_LINKER_LIBS ${OpenBLAS_LIB}) - add_definitions(-DMSHADOW_USE_CBLAS=1) - add_definitions(-DMSHADOW_USE_MKL=0) -elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl") - find_package(MKL REQUIRED) - include_directories(SYSTEM ${MKL_INCLUDE_DIR}) - list(APPEND mshadow_LINKER_LIBS ${MKL_LIBRARIES}) - add_definitions(-DMSHADOW_USE_CBLAS=0) - add_definitions(-DMSHADOW_USE_MKL=1) -elseif(BLAS STREQUAL "apple") - find_package(Accelerate REQUIRED) - include_directories(SYSTEM ${Accelerate_INCLUDE_DIR}) - list(APPEND mshadow_LINKER_LIBS ${Accelerate_LIBRARIES}) - add_definitions(-DMSHADOW_USE_MKL=0) - add_definitions(-DMSHADOW_USE_CBLAS=1) -endif() diff --git a/cmake/DownloadMKLML.cmake b/cmake/DownloadMKLML.cmake index c2c1cd6916f6..a25f69e0c559 100644 --- a/cmake/DownloadMKLML.cmake +++ b/cmake/DownloadMKLML.cmake @@ -40,8 +40,6 @@ if(MSVC) message(STATUS "Setting MKLROOT path to ${MKLROOT}") - include_directories(${MKLROOT}/include) - elseif(APPLE) set(MKL_NAME "mklml_mac_${MKLML_RELEASE_FILE_SUFFIX}") diff --git a/cmake/Modules/FindAccelerate.cmake b/cmake/Modules/FindAccelerate.cmake index 8bdc665a3aaf..4dcb7e411c87 100644 --- a/cmake/Modules/FindAccelerate.cmake +++ b/cmake/Modules/FindAccelerate.cmake @@ -19,20 +19,53 @@ # # The following are set after configuration is done: # Accelerate_FOUND -# Accelerate_INCLUDE_DIRS +# Accelerate_LAPACK_FOUND +# Accelerate_INCLUDE_DIR # Accelerate_LIBRARIES -file(TO_CMAKE_PATH "$ENV{Accelerate_HOME}" Accelerate_HOME) +if(Accelerate_FOUND) + return() +endif() + set(Accelerate_INCLUDE_SEARCH_PATHS - /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Versions/Current - ${Accelerate_HOME} -) + ${Accelerate_INCLUDE_SEARCH_PATHS} + + /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Versions/Current -find_path(Accelerate_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Accelerate_INCLUDE_SEARCH_PATHS} PATH_SUFFIXES Headers) + $ENV{Accelerate_HOME} + ${Accelerate_HOME} + ) + +find_path(Accelerate_CBLAS_INCLUDE_DIR + NAMES cblas.h + PATHS ${Accelerate_INCLUDE_SEARCH_PATHS} + PATH_SUFFIXES Headers) set(LOOKED_FOR Accelerate_CBLAS_INCLUDE_DIR -) + ) + +set(Accelerate_LAPACK_FOUND) + +if(Accelerate_NEED_LAPACK) + message(STATUS "Looking for LAPACK support...") + + # Apple's vecLib should contain LAPACK functionalities included in the Accelerate + # framework, but we will double check + # https://developer.apple.com/documentation/accelerate/veclib?changes=_2 + include(CheckFunctionExists) + set(CMAKE_REQUIRED_LIBRARIES "-framework Accelerate") + check_function_exists("cgees_" LAPACK_FOUND) + + if(LAPACK_FOUND) + set(Accelerate_LAPACK_FOUND True) + message(STATUS "LAPACK found") + else() + message(WARNING "Apple's Accelerate LAPACK support could not be identified, \ + LAPACK functionality will not be available") + endif() + +endif() include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Accelerate DEFAULT_MSG ${LOOKED_FOR}) @@ -40,8 +73,9 @@ find_package_handle_standard_args(Accelerate DEFAULT_MSG ${LOOKED_FOR}) if(Accelerate_FOUND) set(Accelerate_INCLUDE_DIR ${Accelerate_CBLAS_INCLUDE_DIR}) set(Accelerate_LIBRARIES "-framework Accelerate") + mark_as_advanced(${LOOKED_FOR}) - message(STATUS "Found Accelerate (include: ${Accelerate_CBLAS_INCLUDE_DIR}, library: ${Accelerate_BLAS_LIBRARY})") -endif(Accelerate_FOUND) + message(STATUS "Found Apple Accelerate (include: ${Accelerate_CBLAS_INCLUDE_DIR}, library: ${Accelerate_LIBRARIES})") +endif() diff --git a/cmake/Modules/FindAtlas.cmake b/cmake/Modules/FindAtlas.cmake index 27aaa0e856ab..2cb48bbf7713 100644 --- a/cmake/Modules/FindAtlas.cmake +++ b/cmake/Modules/FindAtlas.cmake @@ -18,54 +18,127 @@ # Find the Atlas (and Lapack) libraries # # The following variables are optionally searched for defaults -# Atlas_ROOT_DIR: Base directory where all Atlas components are found -# Atlas_NEED_LAPACK: Whether need lapack libraries +# +# Atlas_ROOT_DIR: Base directory where all Atlas components are found +# Atlas_NEED_LAPACK: Whether need LAPACK libraries # # The following are set after configuration is done: -# Atlas_FOUND -# Atlas_INCLUDE_DIRS -# Atlas_LIBRARIES -# Atlas_LIBRARYRARY_DIRS +# +# Atlas_FOUND +# Atlas_LAPACK_FOUND +# Atlas_INCLUDE_DIRS +# Atlas_LIBRARIES + +if(Atlas_FOUND) + return() +endif() + +if(CMAKE_CROSSCOMPILING) + set(Atlas_INCLUDE_SEARCH_PATHS + ${Atlas_INCLUDE_SEARCH_PATHS} + + "$ENV{CROSS_ROOT}" + "${CROSS_ROOT}" + ) +endif() set(Atlas_INCLUDE_SEARCH_PATHS - /usr/include/atlas - /usr/include/atlas-base - $ENV{Atlas_ROOT_DIR} - $ENV{Atlas_ROOT_DIR}/include - $ENV{Atlas_ROOT_DIR}/include/atlas -) + ${Atlas_INCLUDE_SEARCH_PATHS} + + "$ENV{Atlas_ROOT_DIR}" + "${Atlas_ROOT_DIR}" + + /usr/include/atlas + /usr/include/atlas-base + ) + +if(${CMAKE_CROSSCOMPILING}) + set(Atlas_LIB_SEARCH_PATHS + ${Atlas_LIB_SEARCH_PATHS} + + "$ENV{CROSS_ROOT}" + "${CROSS_ROOT}" + ) +endif() set(Atlas_LIB_SEARCH_PATHS - /usr/lib/atlas - /usr/lib/atlas-base - $ENV{Atlas_ROOT_DIR} - $ENV{Atlas_ROOT_DIR}/lib -) + ${Atlas_LIB_SEARCH_PATHS} + + "$ENV{Atlas_ROOT_DIR}" + "${Atlas_ROOT_DIR}" + + /usr/lib/atlas + /usr/lib/atlas-base + ) -find_path(Atlas_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS}) -find_library(Atlas_CBLAS_LIBRARY NAMES ptcblas_r ptcblas cblas_r cblas PATHS ${Atlas_LIB_SEARCH_PATHS}) -find_library(Atlas_BLAS_LIBRARY NAMES atlas_r atlas PATHS ${Atlas_LIB_SEARCH_PATHS}) +find_path(Atlas_CBLAS_INCLUDE_DIR + NAMES cblas.h + PATHS ${Atlas_INCLUDE_SEARCH_PATHS} + PATH_SUFFIXES include) + +find_library(Atlas_CBLAS_LIBRARY + NAMES ptcblas_r ptcblas cblas_r cblas + PATHS ${Atlas_LIB_SEARCH_PATHS} + PATH_SUFFIXES lib) +find_library(Atlas_BLAS_LIBRARY + NAMES atlas_r atlas + PATHS ${Atlas_LIB_SEARCH_PATHS} + PATH_SUFFIXES lib) set(LOOKED_FOR - Atlas_CBLAS_INCLUDE_DIR + Atlas_CBLAS_INCLUDE_DIR + + Atlas_CBLAS_LIBRARY + Atlas_BLAS_LIBRARY + ) - Atlas_CBLAS_LIBRARY - Atlas_BLAS_LIBRARY -) +set(Atlas_LAPACK_FOUND) +set(Atlas_CLAPACK_INCLUDE_DIR) +set(Atlas_LAPACK_LIBRARY) if(Atlas_NEED_LAPACK) - find_path(Atlas_CLAPACK_INCLUDE_DIR NAMES clapack.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS}) - find_library(Atlas_LAPACK_LIBRARY NAMES alapack_r alapack lapack_atlas PATHS ${Atlas_LIB_SEARCH_PATHS}) - set(LOOKED_FOR ${LOOKED_FOR} Atlas_CLAPACK_INCLUDE_DIR Atlas_LAPACK_LIBRARY) -endif(Atlas_NEED_LAPACK) + message(STATUS "Looking for LAPACK support...") + + # we need another variables (starting with __) because cmake will not overwrite it if already set + find_path(__Atlas_CLAPACK_INCLUDE_DIR + NAMES clapack.h + PATHS ${Atlas_INCLUDE_SEARCH_PATHS} + PATH_SUFFIXES include) + + find_library(__Atlas_LAPACK_LIBRARY + NAMES lapack_r lapack lapack_atlas + PATHS ${Atlas_LIB_SEARCH_PATHS} + PATH_SUFFIXES lib) + + set(CMAKE_REQUIRED_LIBRARIES ${Atlas_LAPACK_LIBRARY}) + include(CheckFunctionExists) + check_function_exists("cgees_" LAPACK_FOUND) + + if(LAPACK_FOUND) + set(Atlas_LAPACK_FOUND True) + set(Atlas_CLAPACK_INCLUDE_DIR ${__Atlas_CLAPACK_INCLUDE_DIR}) + set(Atlas_LAPACK_LIBRARY ${__Atlas_LAPACK_LIBRARY}) + + set(LOOKED_FOR + ${LOOKED_FOR} + Atlas_CLAPACK_INCLUDE_DIR + Atlas_LAPACK_LIBRARY) + + message(STATUS "LAPACK found") + else() + message(WARNING "LAPACK with Atlas could not be found, LAPACK functionality will not be available") + endif() + +endif() include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Atlas DEFAULT_MSG ${LOOKED_FOR}) -if(ATLAS_FOUND) - set(Atlas_INCLUDE_DIR ${Atlas_CBLAS_INCLUDE_DIR} ${Atlas_CLAPACK_INCLUDE_DIR}) - set(Atlas_LIBRARIES ${Atlas_LAPACK_LIBRARY} ${Atlas_CBLAS_LIBRARY} ${Atlas_BLAS_LIBRARY}) +if(Atlas_FOUND) + set(Atlas_INCLUDE_DIRS "${Atlas_CBLAS_INCLUDE_DIR}" "${Atlas_CLAPACK_INCLUDE_DIR}") + set(Atlas_LIBRARIES "${Atlas_LAPACK_LIBRARY}" "${Atlas_CBLAS_LIBRARY}" "${Atlas_BLAS_LIBRARY}") + mark_as_advanced(${LOOKED_FOR}) - message(STATUS "Found Atlas (include: ${Atlas_CBLAS_INCLUDE_DIR}, library: ${Atlas_BLAS_LIBRARY})") -endif(ATLAS_FOUND) + message(STATUS "Found Atlas (include: ${Atlas_INCLUDE_DIRS}, libraries: ${Atlas_LIBRARIES})") +endif() diff --git a/cmake/Modules/FindMKL.cmake b/cmake/Modules/FindMKL.cmake index 70405566d8ae..4c2692c28f67 100644 --- a/cmake/Modules/FindMKL.cmake +++ b/cmake/Modules/FindMKL.cmake @@ -19,8 +19,6 @@ # # Options: # -# USE_MKLDNN : Search for MKL:ML library variant -# # MKL_USE_SINGLE_DYNAMIC_LIBRARY : use single dynamic library interface # MKL_USE_STATIC_LIBS : use static libraries # MKL_MULTI_THREADED : use multi-threading @@ -30,170 +28,153 @@ # This module defines the following variables: # # MKL_FOUND : True mkl is found -# MKL_INCLUDE_DIR : unclude directory -# MKL_LIBRARIES : the libraries to link against. -# -# cjolivier01: Changed to also look for MKLDNN library (subset of mkl) instead of standard MKL package +# MKL_INCLUDE_DIR : include directory +# MKL_LIBRARIES : the libraries to link against # if(MKL_FOUND) return() endif() +# ---[ Options +mxnet_option(MKL_USE_SINGLE_DYNAMIC_LIBRARY "Use single dynamic library interface" ON) +mxnet_option(MKL_USE_STATIC_LIBS "Use static libraries" OFF IF NOT ${MKL_USE_SINGLE_DYNAMIC_LIBRARY}) +mxnet_option(MKL_MULTI_THREADED "Use multi-threading" ON IF NOT ${MKL_USE_SINGLE_DYNAMIC_LIBRARY}) +mxnet_option(MKL_USE_ILP64 "Use ilp64 data model" OFF) +mxnet_option(MKL_USE_CLUSTER "Use cluster functions" OFF IF ${CMAKE_SIZEOF_VOID_P} EQUAL 4) + # ---[ Root folders set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs") -if(USE_MKLDNN) - - find_path(MKL_ROOT include/mkl_blas.h - PATHS $ENV{MKL_ROOT} - ${INTEL_ROOT}/mklml - ${DIRECT_DEPENDENCY_ROOTS} - DOC "Folder contains MKL" - ) - - # ---[ Find include dir - find_path(MKL_INCLUDE_DIR mkl_blas.h PATHS ${MKL_ROOT} PATH_SUFFIXES include) - set(__looked_for MKL_INCLUDE_DIR) +if(NOT MKLROOT) + get_filename_component(MKLROOT "${MKL_INCLUDE_DIR}" DIRECTORY) +endif() - # ---[ Find libraries - if(CMAKE_SIZEOF_VOID_P EQUAL 4) - set(__path_suffixes lib lib/ia32) - else() - set(__path_suffixes lib lib/intel64) - endif() +set(MKL_INCLUDE_SEARCH_PATHS + ${MKL_INCLUDE_SEARCH_PATHS} - set(__mkl_libs "") + "$ENV{MKLROOT}" + "${MKLROOT}" - if(WIN32) - list(APPEND __mkl_libs mklml_intel) - else() - list(APPEND __mkl_libs mklml_gnu) - endif() - list(APPEND __mkl_libs mkldnn) + "${INTEL_ROOT}/mkl" - foreach (__lib ${__mkl_libs}) - set(__mkl_lib "${__lib}") - string(TOUPPER ${__mkl_lib} __mkl_lib_upper) + /usr + /usr/local + ) - if(MKL_USE_STATIC_LIBS) - set(__mkl_lib "lib${__mkl_lib}.a") - endif() +# ---[ Find include dir +find_path(MKL_INCLUDE_DIR mkl.h + PATHS ${MKL_INCLUDE_SEARCH_PATHS} + PATH_SUFFIXES include) - find_library(${__mkl_lib_upper}_LIBRARY - NAMES ${__mkl_lib} - PATHS ${MKL_ROOT} "${MKL_INCLUDE_DIR}/.." - PATH_SUFFIXES ${__path_suffixes} - DOC "The path to Intel(R) MKL ${__mkl_lib} library") - mark_as_advanced(${__mkl_lib_upper}_LIBRARY) +set(MKL_LIB_SEARCH_PATHS + ${MKL_LIB_SEARCH_PATHS} - list(APPEND __looked_for ${__mkl_lib_upper}_LIBRARY) - list(APPEND MKL_LIBRARIES ${${__mkl_lib_upper}_LIBRARY}) - endforeach() + "$ENV{MKLROOT}" + "${MKLROOT}" -else(USE_MKLDNN) + "${INTEL_RTL_ROOT}" + "${INTEL_ROOT}" + "${INTEL_ROOT}/mkl" + "${INTEL_ROOT}/compiler" + "${MKLROOT}/.." + "${MKLROOT}/../compiler" - # ---[ Options - mxnet_option(MKL_USE_SINGLE_DYNAMIC_LIBRARY "Use single dynamic library interface" ON) - mxnet_option(MKL_USE_STATIC_LIBS "Use static libraries" OFF IF NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY) - mxnet_option(MKL_MULTI_THREADED "Use multi-threading" ON IF NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY) - mxnet_option(MKL_USE_ILP64 "Use ilp64 data model" OFF) - mxnet_option(MKL_USE_CLUSTER "Use cluster functions" OFF IF CMAKE_SIZEOF_VOID_P EQUAL 4) + /usr + /usr/local + ) - find_path(MKL_ROOT include/mkl.h PATHS $ENV{MKL_ROOT} ${INTEL_ROOT}/mkl - DOC "Folder contains MKL") +set(__looked_for MKL_INCLUDE_DIR) - # ---[ Find include dir - find_path(MKL_INCLUDE_DIR mkl.h PATHS ${MKL_ROOT} PATH_SUFFIXES include) - set(__looked_for MKL_INCLUDE_DIR) +# ---[ Find libraries +if(CMAKE_SIZEOF_VOID_P EQUAL 4) + set(__path_suffixes lib lib/ia32) +else() + set(__path_suffixes lib lib64 lib/intel64) +endif() - # ---[ Find libraries +set(__mkl_libs "") +if(MKL_USE_SINGLE_DYNAMIC_LIBRARY) + list(APPEND __mkl_libs rt) +else() if(CMAKE_SIZEOF_VOID_P EQUAL 4) - set(__path_suffixes lib lib/ia32) - else() - set(__path_suffixes lib lib/intel64) - endif() - - set(__mkl_libs "") - if(MKL_USE_SINGLE_DYNAMIC_LIBRARY) - list(APPEND __mkl_libs rt) - else() - if(CMAKE_SIZEOF_VOID_P EQUAL 4) - if(WIN32) - list(APPEND __mkl_libs intel_c) - else() - list(APPEND __mkl_libs intel) - if(CMAKE_COMPILER_IS_GNUFORTRAN) - list(APPEND __mkl_libs gf) - endif() - endif() + if(WIN32) + list(APPEND __mkl_libs intel_c) else() - set(__mkl_lib64_suffix "lp64") - if(MKL_USE_ILP64) - set(__mkl_lib64_suffix "ilp64") - add_definitions(-DMKL_ILP64) - endif() - list(APPEND __mkl_libs "intel_${__mkl_lib64_suffix}") + list(APPEND __mkl_libs intel) if(CMAKE_COMPILER_IS_GNUFORTRAN) - list(APPEND __mkl_libs "gf_${__mkl_lib64_suffix}") + list(APPEND __mkl_libs gf) endif() endif() - - if(MKL_MULTI_THREADED) - list(APPEND __mkl_libs intel_thread) - else() - list(APPEND __mkl_libs sequential) + else() + set(__mkl_lib64_suffix "lp64") + if(MKL_USE_ILP64) + set(__mkl_lib64_suffix "ilp64") + add_definitions(-DMKL_ILP64) endif() - - list(APPEND __mkl_libs core) - if(CMAKE_SIZEOF_VOID_P EQUAL 8 AND MKL_USE_CLUSTER) - list(APPEND __mkl_libs cdft_core) + list(APPEND __mkl_libs "intel_${__mkl_lib64_suffix}") + if(CMAKE_COMPILER_IS_GNUFORTRAN) + list(APPEND __mkl_libs "gf_${__mkl_lib64_suffix}") endif() endif() + if(MKL_MULTI_THREADED) + list(APPEND __mkl_libs intel_thread) + else() + list(APPEND __mkl_libs sequential) + endif() - foreach (__lib ${__mkl_libs}) - set(__mkl_lib "mkl_${__lib}") - string(TOUPPER ${__mkl_lib} __mkl_lib_upper) + list(APPEND __mkl_libs core) + if(CMAKE_SIZEOF_VOID_P EQUAL 8 AND MKL_USE_CLUSTER) + list(APPEND __mkl_libs cdft_core) + endif() +endif() - if(MKL_USE_STATIC_LIBS) - set(__mkl_lib "lib${__mkl_lib}.a") - endif() - find_library(${__mkl_lib_upper}_LIBRARY - NAMES ${__mkl_lib} - PATHS ${MKL_ROOT} "${MKL_INCLUDE_DIR}/.." - PATH_SUFFIXES ${__path_suffixes} - DOC "The path to Intel(R) MKL ${__mkl_lib} library") - mark_as_advanced(${__mkl_lib_upper}_LIBRARY) +foreach(__lib ${__mkl_libs}) + set(__mkl_lib "mkl_${__lib}") + string(TOUPPER ${__mkl_lib} __mkl_lib_upper) - list(APPEND __looked_for ${__mkl_lib_upper}_LIBRARY) - list(APPEND MKL_LIBRARIES ${${__mkl_lib_upper}_LIBRARY}) - endforeach() + if(MKL_USE_STATIC_LIBS) + set(__mkl_lib "lib${__mkl_lib}.a") + endif() + message(WARNING "Finding ${__mkl_lib_upper}_LIBRARY in ${MKL_LIB_SEARCH_PATHS}") - if(NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY) - if (MKL_USE_STATIC_LIBS) - set(__iomp5_libs iomp5 libiomp5mt.lib) - else() - set(__iomp5_libs iomp5 libiomp5md.lib) - endif() + find_library(${__mkl_lib_upper}_LIBRARY + NAMES ${__mkl_lib} mkl_${__mkl_lib} + PATHS ${MKL_LIB_SEARCH_PATHS} + PATH_SUFFIXES ${__path_suffixes} + DOC "The path to Intel(R) MKL ${__mkl_lib} library") + mark_as_advanced(${__mkl_lib_upper}_LIBRARY) - if(WIN32) - find_path(INTEL_INCLUDE_DIR omp.h PATHS ${INTEL_ROOT} PATH_SUFFIXES include) - list(APPEND __looked_for INTEL_INCLUDE_DIR) - endif() + list(APPEND __looked_for ${__mkl_lib_upper}_LIBRARY) + list(APPEND MKL_LIBRARIES ${${__mkl_lib_upper}_LIBRARY}) +endforeach() - find_library(MKL_RTL_LIBRARY ${__iomp5_libs} - PATHS ${INTEL_RTL_ROOT} ${INTEL_ROOT}/compiler ${MKL_ROOT}/.. ${MKL_ROOT}/../compiler - PATH_SUFFIXES ${__path_suffixes} - DOC "Path to Path to OpenMP runtime library") - list(APPEND __looked_for MKL_RTL_LIBRARY) - list(APPEND MKL_LIBRARIES ${MKL_RTL_LIBRARY}) +if(NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY) + if(MKL_USE_STATIC_LIBS) + set(__iomp5_libs iomp5 libiomp5mt.lib) + else() + set(__iomp5_libs iomp5 libiomp5md.lib) endif() -endif(USE_MKLDNN) + if(WIN32) + find_path(INTEL_INCLUDE_DIR omp.h + PATHS "${INTEL_ROOT}" + PATH_SUFFIXES include) + list(APPEND __looked_for INTEL_INCLUDE_DIR) + endif() + + find_library(MKL_RTL_LIBRARY ${__iomp5_libs} + PATHS ${MKL_LIB_SEARCH_PATHS} + PATH_SUFFIXES ${__path_suffixes} + DOC "Path to OpenMP runtime library") + + list(APPEND __looked_for MKL_RTL_LIBRARY) + list(APPEND MKL_LIBRARIES ${MKL_RTL_LIBRARY}) +endif() include(FindPackageHandleStandardArgs) find_package_handle_standard_args(MKL DEFAULT_MSG ${__looked_for}) diff --git a/cmake/Modules/FindMKLML.cmake b/cmake/Modules/FindMKLML.cmake new file mode 100644 index 000000000000..23c89f33cafc --- /dev/null +++ b/cmake/Modules/FindMKLML.cmake @@ -0,0 +1,137 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Find the MKLML (subset of MKL) libraries +# +# The following variables are optionally searched for defaults +# +# MKLROOT: Base directory where all MKL/MKLML components are found +# +# The following are set after configuration is done: +# +# MKL_FOUND +# MKL_INCLUDE_DIR +# MKL_LIBRARIES +# MKL_USE_INTEL_OMP + +if(MKL_FOUND) + return() +endif() + +set(MKLML_INCLUDE_SEARCH_PATHS + ${MKLML_INCLUDE_SEARCH_PATHS} + + "$ENV{MKLROOT}" + "${MKLROOT}" + + "${PROJECT_SOURCE_DIR}/3rdparty/MKLML" + + /usr + /usr/local + ) + +# ---[ Find libraries +if(CMAKE_SIZEOF_VOID_P EQUAL 4) + set(__PATH_SUFFIXES lib lib/ia32) +else() + set(__PATH_SUFFIXES lib lib64 lib/intel64) +endif() + +mark_as_advanced(__PATH_SUFFIXES) + +set(MKLML_LIB_SEARCH_PATHS + ${MKLML_LIB_SEARCH_PATHS} + + "$ENV{MKLROOT}" + "${MKLROOT}" + + "${PROJECT_SOURCE_DIR}/3rdparty/MKLML" + + /usr + /usr/local + ) + +find_path(MKLML_INCLUDE_DIR + NAMES mkl_blas.h + PATHS ${MKLML_INCLUDE_SEARCH_PATHS} + PATH_SUFFIXES include) + +set(LOOKED_FOR + MKLML_INCLUDE_DIR + ) + +set(MKLML_LIBRARIES) + +# Find Intel OpenMP +set(MKL_USE_INTEL_OMP) + +find_library(IOMP5_LIBRARY + NAMES iomp5 libiomp5.lib libiomp5md.lib + PATHS ${MKLML_LIB_SEARCH_PATHS} + PATH_SUFFIXES ${__PATH_SUFFIXES} + ) +mark_as_advanced(IOMP5_LIBRARY) + +if(IOMP5_LIBRARY) + list(APPEND LOOKED_FOR IOMP5_LIBRARY) + list(APPEND MKLML_LIBRARIES ${IOMP5_LIBRARY}) + + set(MKL_USE_INTEL_OMP True) +endif() + +# add static windows libs first +set(__MKL_LIB_NAMES libmklml.lib libmklmlmd.lib) + +if(MKL_USE_INTEL_OMP) + list(APPEND __MKL_LIB_NAMES mklml_intel) +endif() + +list(APPEND __MKL_LIB_NAMES mklml_gnu mklml) + +mark_as_advanced(__MKL_LIB_NAMES) + +find_library(MKLML_LIBRARY + NAMES ${__MKL_LIB_NAMES} + PATHS ${MKLML_LIB_SEARCH_PATHS} + PATH_SUFFIXES ${__PATH_SUFFIXES} + ) + +mark_as_advanced(MKLML_LIBRARY) + +if(MKLML_LIBRARY MATCHES "mklml_intel") + set(MKL_USE_INTEL_OMP True) + message("MKLML uses Intel OpenMP libraries") +else() + set(MKL_USE_INTEL_OMP False) + message("MKLML uses platform provided OpenMP libraries") +endif() + +list(APPEND LOOKED_FOR MKLML_LIBRARY) +list(APPEND MKLML_LIBRARIES ${MKLML_LIBRARY}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(MKLML DEFAULT_MSG ${LOOKED_FOR}) + +if(MKLML_FOUND) + set(MKL_FOUND ${MKLML_FOUND}) + set(MKL_LIBRARIES ${MKLML_LIBRARIES}) + set(MKL_INCLUDE_DIR "${MKLML_INCLUDE_DIR}") + + mark_as_advanced(${LOOKED_FOR}) + + message(STATUS "Found MKLML (include: ${MKL_INCLUDE_DIR}, libraries: ${MKL_LIBRARIES})") +endif() diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake index a3a79caae461..f2f2587e653b 100644 --- a/cmake/Modules/FindOpenBLAS.cmake +++ b/cmake/Modules/FindOpenBLAS.cmake @@ -15,77 +15,137 @@ # specific language governing permissions and limitations # under the License. -file(TO_CMAKE_PATH "$ENV{OpenBLAS_HOME}" OpenBLAS_HOME) -file(TO_CMAKE_PATH "$ENV{OpenBLAS}" OpenBLAS_DIR) - -SET(Open_BLAS_INCLUDE_SEARCH_PATHS - /usr/include - /usr/include/openblas - /usr/include/openblas-base - /usr/local/include - /usr/local/include/openblas - /usr/local/include/openblas-base - /opt/OpenBLAS/include - /usr/local/opt/openblas/include - ${PROJECT_SOURCE_DIR}/3rdparty/OpenBLAS/include - ${PROJECT_SOURCE_DIR}/thirdparty/OpenBLAS/include - ${OpenBLAS_HOME} - ${OpenBLAS_HOME}/include -) - -SET(Open_BLAS_LIB_SEARCH_PATHS - /lib/ - /lib/openblas-base - /lib64/ - /usr/lib - /usr/lib/openblas-base - /usr/lib64 - /usr/local/lib - /usr/local/lib64 - /opt/OpenBLAS/lib - /usr/local/opt/openblas/lib - ${PROJECT_SOURCE_DIR}/3rdparty/OpenBLAS/lib - ${PROJECT_SOURCE_DIR}/thirdparty/OpenBLAS/lib - ${OpenBLAS_DIR} - ${OpenBLAS_DIR}/lib - ${OpenBLAS_HOME} - ${OpenBLAS_HOME}/lib - ) - -FIND_PATH(OpenBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Open_BLAS_INCLUDE_SEARCH_PATHS}) -FIND_LIBRARY(OpenBLAS_LIB NAMES openblas PATHS ${Open_BLAS_LIB_SEARCH_PATHS}) -IF(NOT OpenBLAS_LIB) - FIND_FILE(OpenBLAS_LIB NAMES libopenblas.dll.a PATHS ${Open_BLAS_LIB_SEARCH_PATHS}) -ENDIF() - -SET(OpenBLAS_FOUND ON) - -# Check include files -IF(NOT OpenBLAS_INCLUDE_DIR) - SET(OpenBLAS_FOUND OFF) - MESSAGE(STATUS "Could not find OpenBLAS include. Turning OpenBLAS_FOUND off") -ENDIF() - -# Check libraries -IF(NOT OpenBLAS_LIB) - SET(OpenBLAS_FOUND OFF) - MESSAGE(STATUS "Could not find OpenBLAS lib. Turning OpenBLAS_FOUND off") -ENDIF() - -IF (OpenBLAS_FOUND) - IF (NOT OpenBLAS_FIND_QUIETLY) - MESSAGE(STATUS "Found OpenBLAS libraries: ${OpenBLAS_LIB}") - MESSAGE(STATUS "Found OpenBLAS include: ${OpenBLAS_INCLUDE_DIR}") - ENDIF (NOT OpenBLAS_FIND_QUIETLY) -ELSE (OpenBLAS_FOUND) - IF (OpenBLAS_FIND_REQUIRED) - MESSAGE(FATAL_ERROR "Could not find OpenBLAS") - ENDIF (OpenBLAS_FIND_REQUIRED) -ENDIF (OpenBLAS_FOUND) - -MARK_AS_ADVANCED( +# Find the OpenBLAS libraries +# +# The following variables are optionally searched for defaults +# +# OpenBLAS_HOME: Base directory where all OpenBLAS components are found +# OpenBLAS_NEED_LAPACK: Whether need lapack libraries +# +# The following are set after configuration is done: +# +# OpenBLAS_FOUND +# OpenBLAS_LAPACK_FOUND +# OpenBLAS_INCLUDE_DIRS +# OpenBLAS_LIBRARIES + +if(OpenBLAS_FOUND) + return() +endif() + +if(CMAKE_CROSSCOMPILING) + set(OpenBLAS_INCLUDE_SEARCH_PATHS + ${OpenBLAS_INCLUDE_SEARCH_PATHS} + + "$ENV{CROSS_ROOT}" + "${CROSS_ROOT}" + ) +endif() + +set(OpenBLAS_INCLUDE_SEARCH_PATHS + ${OpenBLAS_INCLUDE_SEARCH_PATHS} + + "$ENV{OpenBLAS_HOME}" + "${OpenBLAS_HOME}" + + /usr + /usr/include/openblas + /usr/include/openblas-base + /usr/local + /usr/local/include/openblas + /usr/local/include/openblas-base + /opt/OpenBLAS + /usr/local/opt/openblas + + "${PROJECT_SOURCE_DIR}/3rdparty/OpenBLAS" + "${PROJECT_SOURCE_DIR}/thirdparty/OpenBLAS" + ) + +if(CMAKE_CROSSCOMPILING) + set(Open_BLAS_LIB_SEARCH_PATHS + ${Open_BLAS_LIB_SEARCH_PATHS} + + "$ENV{CROSS_ROOT}" + "${CROSS_ROOT}" + ) +endif() + +set(OpenBLAS_LIB_SEARCH_PATHS + ${OpenBLAS_LIB_SEARCH_PATHS} + + "$ENV{OpenBLAS_HOME}" + "${OpenBLAS_HOME}" + + / + /lib/openblas-base + /usr + /usr/lib/openblas-base + /usr/local/ + /opt/OpenBLAS + /usr/local/opt/openblas + + "${PROJECT_SOURCE_DIR}/3rdparty/OpenBLAS" + "${PROJECT_SOURCE_DIR}/thirdparty/OpenBLAS" + ) + +find_path(OpenBLAS_INCLUDE_DIR + NAMES cblas.h + PATHS ${OpenBLAS_INCLUDE_SEARCH_PATHS} + PATH_SUFFIXES include) + +set(OpenBLAS_LIB_NAMES openblas libopenblas.dll.a libopenblas.dll) + +if(CMAKE_CROSSCOMPILING) + message(STATUS "Will try to link to OpenBLAS statically") + set(OpenBLAS_LIB_NAMES libopenblas.a ${OpenBLAS_LIB_NAMES}) +endif() + +# For some reason setting this is really important and on windows the library is not found even given exact file name +if(MSVC) + set(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) +endif() + +find_library(OpenBLAS_LIBRARY + NAMES ${OpenBLAS_LIB_NAMES} + PATHS ${OpenBLAS_LIB_SEARCH_PATHS} + PATH_SUFFIXES lib lib64) + +set(LOOKED_FOR OpenBLAS_INCLUDE_DIR - OpenBLAS_LIB - OpenBLAS -) + OpenBLAS_LIBRARY + ) + +set(OpenBLAS_LAPACK_FOUND) +set(OpenBLAS_LAPACK_LIBRARY) +set(OpenBLAS_LAPACK_INCLUDE_DIR) + +if(OpenBLAS_NEED_LAPACK) + message(STATUS "Looking for LAPACK support...") + + # OpenBLAS does not have a separate LAPACK library: https://github.com/xianyi/OpenBLAS/issues/296 + # LAPACK if present in OpenBLAS build is included into libopenblas + + set(CMAKE_REQUIRED_LIBRARIES ${OpenBLAS_LIBRARY}) + include(CheckFunctionExists) + check_function_exists("cheev_" LAPACK_FOUND) + + if(LAPACK_FOUND) + set(OpenBLAS_LAPACK_FOUND True) + message(STATUS "LAPACK found") + else() + message(WARNING "OpenBlas has not been compiled with LAPACK support, LAPACK functionality will not be available") + endif() + +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(OpenBLAS DEFAULT_MSG ${LOOKED_FOR}) + +if(OpenBLAS_FOUND) + set(OpenBLAS_INCLUDE_DIRS "${OpenBLAS_INCLUDE_DIR}") + set(OpenBLAS_LIBRARIES "${OpenBLAS_LIBRARY}") + + mark_as_advanced(${LOOKED_FOR}) + message(STATUS "Found OpenBLAS (include: ${OpenBLAS_INCLUDE_DIRS}, libraries: ${OpenBLAS_LIBRARIES})") +endif() diff --git a/src/operator/rnn_impl.h b/src/operator/rnn_impl.h index e1b4a2b79c0a..9b2c000b3a27 100644 --- a/src/operator/rnn_impl.h +++ b/src/operator/rnn_impl.h @@ -34,6 +34,7 @@ #include #include #include +#include #include "./math.h" #include "./math_functions-inl.h" #include "./operator_common.h" @@ -149,7 +150,6 @@ void LstmForwardTraining(DType* ws, const int r_size = D * T * N * H * 6; const int y_offset = T * N * H * 5; const int cell_size = N * H; - unsigned int seed_ = 17 + rand() % 4096; // NOLINT(runtime/threadsafe_fn) int idx = 0; // state & cell state's idx; const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); for (int i = 0; i < L; ++i) { @@ -174,17 +174,21 @@ void LstmForwardTraining(DType* ws, w_ptr += w_size; b_ptr += b_size; if (dropout > 0.0f) { - #pragma omp parallel for num_threads(omp_threads) - for (int j = 0; j < T * N * H * D; j++) { - int rand_data = rand_r(&seed_); - if (static_cast(rand_data % 1000) < static_cast(1000 * dropout)) { - dropout_random[i * T * N * H * D + j] = 0; - y.dptr_[j] = 0; - } else { - dropout_random[i * T * N * H * D + j] = 1.0f - dropout; - y.dptr_[j] = y.dptr_[j] / (1.0f - dropout); + #pragma omp parallel for num_threads(omp_threads) + for (int j = 0; j < T * N * H * D; j++) { + static thread_local std::random_device device; + static thread_local std::default_random_engine generator(device()); + static thread_local std::uniform_int_distribution distribution; + static thread_local auto dice = std::bind(distribution, generator); + int rand_data = dice(); + if (static_cast(rand_data % 1000) < static_cast(1000 * dropout)) { + dropout_random[i * T * N * H * D + j] = 0; + y.dptr_[j] = 0; + } else { + dropout_random[i * T * N * H * D + j] = 1.0f - dropout; + y.dptr_[j] = y.dptr_[j] / (1.0f - dropout); + } } - } } x_ptr = y.dptr_; rs2 += r_size; @@ -994,7 +998,6 @@ void GruForwardTraining(DType* ws, DType* bx_l = bx; DType* bh_l = bh; DType* y_tmp = x_ptr; - unsigned int seed_ = 17 + rand() % 4096; // NOLINT(runtime/threadsafe_fn) for (int l = 0; l < L; l++) { if (l != 0) { y_tmp = y_l; @@ -1004,7 +1007,11 @@ void GruForwardTraining(DType* ws, const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); #pragma omp parallel for num_threads(omp_threads) for (int i = 0; i < T * N * I; i++) { - int rand_data = rand_r(&seed_); + static thread_local std::random_device device; + static thread_local std::default_random_engine generator(device()); + static thread_local std::uniform_int_distribution distribution; + static thread_local auto dice = std::bind(distribution, generator); + int rand_data = dice(); if (static_cast(rand_data % 1000) < static_cast(1000 * dropout)) { dropout_random[(l - 1) * T * N * I + i] = 0; y_tmp[i] = 0; @@ -1881,7 +1888,6 @@ void VanillaRNNForwardTraining(DType* ws, DType* bh_l = bh; DType* y_tmp = x_ptr; const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); - unsigned int seed_ = 17 + rand() % 4096; // NOLINT(runtime/threadsafe_fn) for (int l = 0; l < L; l++) { if (l != 0) { y_tmp = y_l; @@ -1890,7 +1896,11 @@ void VanillaRNNForwardTraining(DType* ws, if (dropout > 0.0f && l > 0) { #pragma omp parallel for num_threads(omp_threads) for (int i = 0; i < T * N * I; i++) { - int rand_data = rand_r(&seed_); + static thread_local std::random_device device; + static thread_local std::default_random_engine generator(device()); + static thread_local std::uniform_int_distribution distribution; + static thread_local auto dice = std::bind(distribution, generator); + int rand_data = dice(); if (static_cast(rand_data % 1000) < static_cast(1000 * dropout)) { dropout_random[(l - 1) * T * N * I + i] = 0; y_tmp[i] = 0; diff --git a/tests/cpp/engine/threaded_engine_test.cc b/tests/cpp/engine/threaded_engine_test.cc index 6d669c19bcaa..963fb5a865e2 100644 --- a/tests/cpp/engine/threaded_engine_test.cc +++ b/tests/cpp/engine/threaded_engine_test.cc @@ -33,6 +33,7 @@ #include #include #include +#include #include "../src/engine/engine_impl.h" #include "../include/test_util.h" @@ -58,17 +59,21 @@ void GenerateWorkload(int num_workloads, int num_var, int min_read, int max_read, int min_time, int max_time, std::vector* workloads) { + static thread_local std::default_random_engine generator(seed_); + static thread_local std::uniform_int_distribution distribution; + static thread_local auto dice = std::bind(distribution, generator); + workloads->clear(); workloads->resize(num_workloads); for (int i = 0; i < num_workloads; ++i) { auto& wl = workloads->at(i); - wl.write = rand_r(&seed_) % num_var; - int r = rand_r(&seed_); + wl.write = dice() % num_var; + int r = dice(); int num_read = min_read + (r % (max_read - min_read)); for (int j = 0; j < num_read; ++j) { - wl.reads.push_back(rand_r(&seed_) % num_var); + wl.reads.push_back(dice() % num_var); } - wl.time = min_time + rand_r(&seed_) % (max_time - min_time); + wl.time = min_time + dice() % (max_time - min_time); } } diff --git a/tests/cpp/include/test_ndarray_utils.h b/tests/cpp/include/test_ndarray_utils.h index f5ab96794ada..c0bcc61d2430 100644 --- a/tests/cpp/include/test_ndarray_utils.h +++ b/tests/cpp/include/test_ndarray_utils.h @@ -29,6 +29,8 @@ #include #include #include +#include + #include "test_util.h" #include "test_op.h" @@ -47,16 +49,13 @@ inline void CheckDataRegion(const TBlob &src, const TBlob &dst) { EXPECT_EQ(equals, 0); } -inline unsigned gen_rand_seed() { - time_t timer; - ::time(&timer); - return static_cast(timer); -} - inline float RandFloat() { - static unsigned seed = gen_rand_seed(); - double v = rand_r(&seed) * 1.0 / RAND_MAX; - return static_cast(v); + static thread_local std::random_device device; + static thread_local std::default_random_engine generator(device()); + static thread_local std::uniform_real_distribution distribution; + static thread_local auto dice = std::bind(distribution, generator); + + return dice(); } // Get an NDArray with provided indices, prepared for a RowSparse NDArray.