diff --git a/CMakeLists.txt b/CMakeLists.txt index e3db6c6..8fc6cc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,9 +102,14 @@ endif() if(IPCL_ENABLE_OMP) add_compile_definitions(IPCL_USE_OMP) - if(IPCL_THREAD_COUNT) - add_compile_definitions(IPCL_NUM_THREADS=${IPCL_THREAD_COUNT}) + ipcl_get_core_thread_count(num_cores num_threads num_nodes) + if(IPCL_THREAD_COUNT) + # if thread_count is invalid, set to maximum threads + if(IPCL_THREAD_COUNT GREATER num_threads) + set(IPCL_THREAD_COUNT ${num_threads}) endif() + add_compile_definitions(IPCL_NUM_THREADS=${IPCL_THREAD_COUNT}) + endif() endif() if(IPCL_DETECT_CPU_RUNTIME) @@ -112,22 +117,22 @@ if(IPCL_DETECT_CPU_RUNTIME) add_compile_definitions(IPCL_RUNTIME_DETECT_CPU_FEATURES) set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH};$ORIGIN/cpufeatures") else() + # set cpu node count parsed from lscpu precompile + add_compile_definitions(IPCL_NUM_NODES=${num_nodes}) + # check whether cpu support avx512ifma instructions - ipcl_detect_lscpu_flag("avx512ifma" FALSE) + ipcl_detect_lscpu_flag("avx512ifma") if(IPCL_FOUND_avx512ifma) add_compile_definitions(IPCL_CRYPTO_MB_MOD_EXP) - message(STATUS "Support AVX512IFMA instruction: True") endif() # check whether cpu support rdseed/rdrand instructions - ipcl_detect_lscpu_flag("rdseed" FALSE) + ipcl_detect_lscpu_flag("rdseed") if(IPCL_FOUND_rdseed) - message(STATUS "Support RDSEED instruction: True") add_compile_definitions(IPCL_RNG_INSTR_RDSEED) else() - ipcl_detect_lscpu_flag("rdrand" FALSE) + ipcl_detect_lscpu_flag("rdrand") if(IPCL_FOUND_rdrand) - message(STATUS "Support RDRAND instruction: True") add_compile_definitions(IPCL_RNG_INSTR_RDRAND) else() message(WARNING diff --git a/README.md b/README.md index 932bf11..03cfcea 100644 --- a/README.md +++ b/README.md @@ -60,21 +60,17 @@ The following libraries and tools are also required, ``` nasm >= 2.15 OpenSSL >= 1.1.0 -numa >= 2.0.12 ``` -On Ubuntu, ```OpenSSL``` and ```numa``` can be installed with: +```OpenSSL``` can be installed with: ```bash -sudo apt update -sudo apt install nasm # for Ubuntu 20.04 or higher -sudo apt install libssl-dev libnuma-dev +# Ubuntu +sudo apt install libssl-dev +# Fedora (RHEL 8, Centos) +sudo dnf install openssl-devel ``` -For Ubuntu 18.04, RHEL and CentOS, please refer to the [Netwide Assembler webpage](https://nasm.us/) for installation details. -For RHEL and CentOS, the required libraries can be installed via: -``` -sudo yum install numactl-devel openssl-devel -``` +In order to install ```nasm```, please refer to the [Netwide Assembler webpage](https://nasm.us/) for download and installation details. ### Instructions The library can be built using the following commands: diff --git a/cmake/ipcl/ipcl-util.cmake b/cmake/ipcl/ipcl-util.cmake index 625c150..65c1a74 100644 --- a/cmake/ipcl/ipcl-util.cmake +++ b/cmake/ipcl/ipcl-util.cmake @@ -31,19 +31,15 @@ function(ipcl_create_archive target dependency) endfunction() -function(ipcl_detect_lscpu_flag flag verbose) +function(ipcl_detect_lscpu_flag flag) # Detect IFMA by parsing lscpu set(LSCPU_FLAG ${flag}) execute_process(COMMAND lscpu COMMAND grep ${LSCPU_FLAG} OUTPUT_VARIABLE LSCPU_FLAG) if("${LSCPU_FLAG}" STREQUAL "") - if(verbose) - message(STATUS "Support ${flag}: False") - endif() + message(STATUS "Support ${flag}: False") set(IPCL_FOUND_${flag} FALSE PARENT_SCOPE) else() - if(verbose) - message(STATUS "Support ${flag}: True") - endif() + message(STATUS "Support ${flag}: True") set(IPCL_FOUND_${flag} TRUE PARENT_SCOPE) endif() endfunction() @@ -118,3 +114,31 @@ function(ipcl_define_icp_variables OutVariable) ${ICP_API_DIR}/include/lac PARENT_SCOPE) endfunction() + +function(ipcl_get_core_thread_count cores threads nodes) + include(ProcessorCount) + + # Get number threads + ProcessorCount(n_threads) + set(${threads} ${n_threads} PARENT_SCOPE) + message(STATUS "# of threads: ${n_threads}") + + # check hyperthreading + execute_process(COMMAND cat /sys/devices/system/cpu/smt/active OUTPUT_VARIABLE IS_HYPERTHREADING OUTPUT_STRIP_TRAILING_WHITESPACE) + if("${IS_HYPERTHREADING}" STREQUAL "1") + math(EXPR n_cores "${n_threads} / 2" ) + set(${cores} ${n_cores} PARENT_SCOPE) + else() + set(n_cores ${n_threads}) + endif() + + set(${cores} ${n_cores} PARENT_SCOPE) + message(STATUS "# of physical cores: ${n_cores}") + + # check number of nodes + execute_process(COMMAND lscpu COMMAND grep Socket OUTPUT_VARIABLE output_nodes OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" output_nodes_list "${output_nodes}") + list(GET output_nodes_list -1 n_nodes) + message(STATUS "# of nodes: ${n_nodes}") + set(${nodes} ${n_nodes} PARENT_SCOPE) +endfunction() diff --git a/ipcl/CMakeLists.txt b/ipcl/CMakeLists.txt index 57ddf25..b3a9e07 100644 --- a/ipcl/CMakeLists.txt +++ b/ipcl/CMakeLists.txt @@ -12,6 +12,7 @@ set(IPCL_SRCS pri_key.cpp utils/context.cpp utils/util.cpp utils/common.cpp + utils/parse_cpuinfo.cpp ) if(IPCL_SHARED) @@ -114,7 +115,7 @@ endif() find_package(OpenSSL REQUIRED) find_package(Threads REQUIRED) -target_link_libraries(ipcl PUBLIC OpenSSL::SSL OpenSSL::Crypto Threads::Threads -lnuma) +target_link_libraries(ipcl PUBLIC OpenSSL::SSL OpenSSL::Crypto Threads::Threads) if(IPCL_ENABLE_OMP) find_package(OpenMP REQUIRED) diff --git a/ipcl/include/ipcl/utils/parse_cpuinfo.hpp b/ipcl/include/ipcl/utils/parse_cpuinfo.hpp new file mode 100644 index 0000000..d0313c0 --- /dev/null +++ b/ipcl/include/ipcl/utils/parse_cpuinfo.hpp @@ -0,0 +1,74 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#ifndef IPCL_INCLUDE_IPCL_UTILS_PARSE_CPUINFO_HPP_ +#define IPCL_INCLUDE_IPCL_UTILS_PARSE_CPUINFO_HPP_ + +#include +#include +#include +#include +#include + +namespace ipcl { +// trim from start (in place) +static inline void ltrim(std::string& s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) { + return !std::isspace(ch); + })); +} + +// trim from end (in place) +static inline void rtrim(std::string& s) { + s.erase(std::find_if(s.rbegin(), s.rend(), + [](unsigned char ch) { return !std::isspace(ch); }) + .base(), + s.end()); +} + +static inline void trim(std::string& s) { + ltrim(s); + rtrim(s); +} + +typedef struct { + int n_processors = 0; + int n_cores = 0; + int n_nodes = 0; +} linuxCPUInfo; + +static void parseCPUInfo(linuxCPUInfo& info) { + std::ifstream cpuinfo; + cpuinfo.exceptions(std::ifstream::badbit); + + try { + cpuinfo.open("/proc/cpuinfo", std::ios::in); + std::string line; + while (std::getline(cpuinfo, line)) { + std::stringstream ss(line); + std::string key, val; + if (std::getline(ss, key, ':') && std::getline(ss, val)) { + trim(key); + trim(val); + if (key == "processor") + info.n_processors++; + else if (key == "core id") + info.n_cores = std::max(info.n_cores, std::stoi(val)); + else if (key == "physical id") + info.n_nodes = std::max(info.n_nodes, std::stoi(val)); + } + } + info.n_nodes++; + info.n_cores = (info.n_cores + 1) * info.n_nodes; + } catch (const std::ifstream::failure& e) { + std::ostringstream log; + log << "\nFile: " << __FILE__ << "\nLine: " << __LINE__ << "\nError: " + << "cannot parse /proc/cpuinfo"; + throw std::runtime_error(log.str()); + } +} +linuxCPUInfo GetLinuxCPUInfo(void); + +} // namespace ipcl + +#endif // IPCL_INCLUDE_IPCL_UTILS_PARSE_CPUINFO_HPP_ diff --git a/ipcl/include/ipcl/utils/util.hpp b/ipcl/include/ipcl/utils/util.hpp index 7a4b510..a9f5ca6 100644 --- a/ipcl/include/ipcl/utils/util.hpp +++ b/ipcl/include/ipcl/utils/util.hpp @@ -6,6 +6,8 @@ #ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES #include + +#include "ipcl/utils/parse_cpuinfo.hpp" #endif // IPCL_RUNTIME_DETECT_CPU_FEATURES #include @@ -41,6 +43,22 @@ inline void vec_size_check(const std::vector& v, const char* file, #define VEC_SIZE_CHECK(v) vec_size_check(v, __FILE__, __LINE__) +#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES +static const bool disable_avx512ifma = + (std::getenv("IPCL_DISABLE_AVX512IFMA") != nullptr); +static const bool prefer_rdrand = + (std::getenv("IPCL_PREFER_RDRAND") != nullptr); +static const bool prefer_ipp_prng = + (std::getenv("IPCL_PREFER_IPP_PRNG") != nullptr); +static const cpu_features::X86Features features = + cpu_features::GetX86Info().features; +static const bool has_avx512ifma = features.avx512ifma && !disable_avx512ifma; +static const bool has_rdseed = + features.rdseed && !prefer_rdrand && !prefer_ipp_prng; +static const bool has_rdrand = features.rdrnd && prefer_rdrand; + +#endif // IPCL_RUNTIME_DETECT_CPU_FEATURES + #ifdef IPCL_USE_OMP class OMPUtilities { public: @@ -57,9 +75,20 @@ class OMPUtilities { } private: +#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES + static const linuxCPUInfo cpuinfo; + static const linuxCPUInfo getLinuxCPUInfo() { return GetLinuxCPUInfo(); } +#endif static const int nodes; static const int cpus; + static int getNodes() { +#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES + return cpuinfo.n_nodes; +#else + return IPCL_NUM_NODES; +#endif // IPCL_RUNTIME_DETECT_CPU_FEATURES + } static int getMaxThreads() { #ifdef IPCL_NUM_THREADS return IPCL_NUM_THREADS; @@ -71,22 +100,6 @@ class OMPUtilities { #endif // IPCL_USE_OMP -#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES -static const bool disable_avx512ifma = - (std::getenv("IPCL_DISABLE_AVX512IFMA") != nullptr); -static const bool prefer_rdrand = - (std::getenv("IPCL_PREFER_RDRAND") != nullptr); -static const bool prefer_ipp_prng = - (std::getenv("IPCL_PREFER_IPP_PRNG") != nullptr); -static const cpu_features::X86Features features = - cpu_features::GetX86Info().features; -static const bool has_avx512ifma = features.avx512ifma && !disable_avx512ifma; -static const bool has_rdseed = - features.rdseed && !prefer_rdrand && !prefer_ipp_prng; -static const bool has_rdrand = features.rdrnd && prefer_rdrand; - -#endif // IPCL_RUNTIME_DETECT_CPU_FEATURES - } // namespace ipcl #endif // IPCL_INCLUDE_IPCL_UTILS_UTIL_HPP_ diff --git a/ipcl/utils/parse_cpuinfo.cpp b/ipcl/utils/parse_cpuinfo.cpp new file mode 100644 index 0000000..2c1b135 --- /dev/null +++ b/ipcl/utils/parse_cpuinfo.cpp @@ -0,0 +1,13 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "ipcl/utils/parse_cpuinfo.hpp" + +#include +#include + +ipcl::linuxCPUInfo ipcl::GetLinuxCPUInfo(void) { + ipcl::linuxCPUInfo info; + ipcl::parseCPUInfo(info); + return info; +} diff --git a/ipcl/utils/util.cpp b/ipcl/utils/util.cpp index 06a1adc..d0ffdfe 100644 --- a/ipcl/utils/util.cpp +++ b/ipcl/utils/util.cpp @@ -3,17 +3,17 @@ #include "ipcl/utils/util.hpp" -#ifdef IPCL_USE_OMP -#include -#endif // IPCL_USE_OMP +#include // NOLINT [build/c++11] namespace ipcl { #ifdef IPCL_USE_OMP -const int OMPUtilities::nodes = numa_num_configured_nodes(); -const int OMPUtilities::cpus = numa_num_configured_cpus(); +#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES +const linuxCPUInfo OMPUtilities::cpuinfo = OMPUtilities::getLinuxCPUInfo(); +#endif // IPCL_RUNTIME_DETECT_CPU_FEATURES +const int OMPUtilities::cpus = std::thread::hardware_concurrency(); +const int OMPUtilities::nodes = OMPUtilities::getNodes(); const int OMPUtilities::MaxThreads = OMPUtilities::getMaxThreads(); - #endif // IPCL_USE_OMP } // namespace ipcl