From a7d1c257abef241af8eb76512905c1f9a8c7367b Mon Sep 17 00:00:00 2001 From: Sejun Kim Date: Wed, 16 Nov 2022 17:11:15 -0800 Subject: [PATCH 01/11] Refactor thread count implementation --- CMakeLists.txt | 7 ++++--- cmake/ipcl/ipcl-util.cmake | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e3db6c6..4cb4165 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,9 +102,10 @@ endif() if(IPCL_ENABLE_OMP) add_compile_definitions(IPCL_USE_OMP) - if(IPCL_THREAD_COUNT) - add_compile_definitions(IPCL_NUM_THREADS=${IPCL_THREAD_COUNT}) - endif() + ipcl_get_core_thread_count(IPCL_CORE_COUNT IPCL_THREAD_COUNT ON) + if(IPCL_THREAD_COUNT) + add_compile_definitions(IPCL_NUM_THREADS=${IPCL_THREAD_COUNT}) + endif() endif() if(IPCL_DETECT_CPU_RUNTIME) diff --git a/cmake/ipcl/ipcl-util.cmake b/cmake/ipcl/ipcl-util.cmake index 625c150..e795f74 100644 --- a/cmake/ipcl/ipcl-util.cmake +++ b/cmake/ipcl/ipcl-util.cmake @@ -118,3 +118,26 @@ function(ipcl_define_icp_variables OutVariable) ${ICP_API_DIR}/include/lac PARENT_SCOPE) endfunction() + +function(ipcl_get_core_thread_count cores threads verbose) + include(ProcessorCount) + + # Get number threads + ProcessorCount(N) + set(${threads} ${N} PARENT_SCOPE) + + # parse smt active + execute_process(COMMAND cat /sys/devices/system/cpu/smt/active OUTPUT_VARIABLE IS_HYPERTHREADING) + if("${IS_HYPERTHREADING}" STREQUAL "1") + math(EXPR n_cores "${N} / 2" ) + if(verbose) + message(STATUS "# of physical cores: ${n_cores}") + endif() + set(${cores} ${n_cores} PARENT_SCOPE) + else() + if(verbose) + message(STATUS "# of physical cores: ${N}") + endif() + set(${cores} ${N} PARENT_SCOPE) + endif() +endfunction() From 0e9595f38e9f643bb47569f2c4d2f90a3a7303b1 Mon Sep 17 00:00:00 2001 From: Sejun Kim Date: Wed, 16 Nov 2022 17:14:08 -0800 Subject: [PATCH 02/11] Add hyperthreading message --- cmake/ipcl/ipcl-util.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/ipcl/ipcl-util.cmake b/cmake/ipcl/ipcl-util.cmake index e795f74..370f989 100644 --- a/cmake/ipcl/ipcl-util.cmake +++ b/cmake/ipcl/ipcl-util.cmake @@ -131,11 +131,13 @@ function(ipcl_get_core_thread_count cores threads verbose) if("${IS_HYPERTHREADING}" STREQUAL "1") math(EXPR n_cores "${N} / 2" ) if(verbose) + message(STATUS "Hyperthreading is ON") message(STATUS "# of physical cores: ${n_cores}") endif() set(${cores} ${n_cores} PARENT_SCOPE) else() if(verbose) + message(STATUS "Hyperthreading is OFF") message(STATUS "# of physical cores: ${N}") endif() set(${cores} ${N} PARENT_SCOPE) From 9ff6fcd15a2eb9d9ff51c83e994e09378eb3bca9 Mon Sep 17 00:00:00 2001 From: sejunkim Date: Wed, 16 Nov 2022 17:14:25 -0800 Subject: [PATCH 03/11] Added output_strip_trailing_whitespace --- cmake/ipcl/ipcl-util.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/ipcl/ipcl-util.cmake b/cmake/ipcl/ipcl-util.cmake index 370f989..6ac01f3 100644 --- a/cmake/ipcl/ipcl-util.cmake +++ b/cmake/ipcl/ipcl-util.cmake @@ -127,17 +127,17 @@ function(ipcl_get_core_thread_count cores threads verbose) set(${threads} ${N} PARENT_SCOPE) # parse smt active - execute_process(COMMAND cat /sys/devices/system/cpu/smt/active OUTPUT_VARIABLE IS_HYPERTHREADING) + execute_process(COMMAND cat /sys/devices/system/cpu/smt/active OUTPUT_VARIABLE IS_HYPERTHREADING OUTPUT_STRIP_TRAILING_WHITESPACE) if("${IS_HYPERTHREADING}" STREQUAL "1") math(EXPR n_cores "${N} / 2" ) if(verbose) - message(STATUS "Hyperthreading is ON") + message(STATUS "Hyperthreading - ON") message(STATUS "# of physical cores: ${n_cores}") endif() set(${cores} ${n_cores} PARENT_SCOPE) else() if(verbose) - message(STATUS "Hyperthreading is OFF") + message(STATUS "Hyperthreading - OFF") message(STATUS "# of physical cores: ${N}") endif() set(${cores} ${N} PARENT_SCOPE) From a4916fe3cad0530883b561552b595ddcec58aad2 Mon Sep 17 00:00:00 2001 From: sejunkim Date: Wed, 16 Nov 2022 21:10:48 -0800 Subject: [PATCH 04/11] Remove numa and add lscpu parser to get number of threads/cores --- CMakeLists.txt | 19 +++++++++------ README.md | 11 ++++----- cmake/ipcl/ipcl-util.cmake | 41 ++++++++++++++++---------------- ipcl/CMakeLists.txt | 2 +- ipcl/include/ipcl/utils/util.hpp | 11 +-------- ipcl/utils/util.cpp | 7 ------ 6 files changed, 39 insertions(+), 52 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cb4165..7c356a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,9 +102,17 @@ endif() if(IPCL_ENABLE_OMP) add_compile_definitions(IPCL_USE_OMP) - ipcl_get_core_thread_count(IPCL_CORE_COUNT IPCL_THREAD_COUNT ON) + ipcl_get_core_thread_count(num_cores num_threads num_sockets) if(IPCL_THREAD_COUNT) + if(IPCL_THREAD_COUNT GREATER num_threads) + set(IPCL_THREAD_COUNT ${num_threads}) + endif() add_compile_definitions(IPCL_NUM_THREADS=${IPCL_THREAD_COUNT}) + else() + # if thread_count not explicitly set, use all available physical cores + # (no hyperthreading) + set(IPCL_THREAD_COUNT ${num_cores}) + add_compile_definitions(IPCL_NUM_THREADS=${num_cores}) endif() endif() @@ -114,21 +122,18 @@ if(IPCL_DETECT_CPU_RUNTIME) set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH};$ORIGIN/cpufeatures") else() # check whether cpu support avx512ifma instructions - ipcl_detect_lscpu_flag("avx512ifma" FALSE) + ipcl_detect_lscpu_flag("avx512ifma") if(IPCL_FOUND_avx512ifma) add_compile_definitions(IPCL_CRYPTO_MB_MOD_EXP) - message(STATUS "Support AVX512IFMA instruction: True") endif() # check whether cpu support rdseed/rdrand instructions - ipcl_detect_lscpu_flag("rdseed" FALSE) + ipcl_detect_lscpu_flag("rdseed") if(IPCL_FOUND_rdseed) - message(STATUS "Support RDSEED instruction: True") add_compile_definitions(IPCL_RNG_INSTR_RDSEED) else() - ipcl_detect_lscpu_flag("rdrand" FALSE) + ipcl_detect_lscpu_flag("rdrand") if(IPCL_FOUND_rdrand) - message(STATUS "Support RDRAND instruction: True") add_compile_definitions(IPCL_RNG_INSTR_RDRAND) else() message(WARNING diff --git a/README.md b/README.md index 932bf11..a6f7723 100644 --- a/README.md +++ b/README.md @@ -60,20 +60,19 @@ The following libraries and tools are also required, ``` nasm >= 2.15 OpenSSL >= 1.1.0 -numa >= 2.0.12 ``` -On Ubuntu, ```OpenSSL``` and ```numa``` can be installed with: +On Ubuntu 22.04, ```OpenSSL``` and ```nasm``` can be installed with: ```bash sudo apt update -sudo apt install nasm # for Ubuntu 20.04 or higher -sudo apt install libssl-dev libnuma-dev +sudo apt install nasm +sudo apt install libssl-dev ``` -For Ubuntu 18.04, RHEL and CentOS, please refer to the [Netwide Assembler webpage](https://nasm.us/) for installation details. +For Ubuntu 20.04 or lower, RHEL and CentOS, please refer to the [Netwide Assembler webpage](https://nasm.us/) for installation details. For RHEL and CentOS, the required libraries can be installed via: ``` -sudo yum install numactl-devel openssl-devel +sudo yum install openssl-devel ``` ### Instructions diff --git a/cmake/ipcl/ipcl-util.cmake b/cmake/ipcl/ipcl-util.cmake index 6ac01f3..152d90b 100644 --- a/cmake/ipcl/ipcl-util.cmake +++ b/cmake/ipcl/ipcl-util.cmake @@ -31,19 +31,15 @@ function(ipcl_create_archive target dependency) endfunction() -function(ipcl_detect_lscpu_flag flag verbose) +function(ipcl_detect_lscpu_flag flag) # Detect IFMA by parsing lscpu set(LSCPU_FLAG ${flag}) execute_process(COMMAND lscpu COMMAND grep ${LSCPU_FLAG} OUTPUT_VARIABLE LSCPU_FLAG) if("${LSCPU_FLAG}" STREQUAL "") - if(verbose) - message(STATUS "Support ${flag}: False") - endif() + message(STATUS "Support ${flag}: False") set(IPCL_FOUND_${flag} FALSE PARENT_SCOPE) else() - if(verbose) - message(STATUS "Support ${flag}: True") - endif() + message(STATUS "Support ${flag}: True") set(IPCL_FOUND_${flag} TRUE PARENT_SCOPE) endif() endfunction() @@ -119,27 +115,30 @@ function(ipcl_define_icp_variables OutVariable) PARENT_SCOPE) endfunction() -function(ipcl_get_core_thread_count cores threads verbose) +function(ipcl_get_core_thread_count cores threads sockets) include(ProcessorCount) # Get number threads - ProcessorCount(N) - set(${threads} ${N} PARENT_SCOPE) + ProcessorCount(n_threads) + set(${threads} ${n_threads} PARENT_SCOPE) + message(STATUS "# of threads: ${n_threads}") - # parse smt active + # check hyperthreading execute_process(COMMAND cat /sys/devices/system/cpu/smt/active OUTPUT_VARIABLE IS_HYPERTHREADING OUTPUT_STRIP_TRAILING_WHITESPACE) if("${IS_HYPERTHREADING}" STREQUAL "1") - math(EXPR n_cores "${N} / 2" ) - if(verbose) - message(STATUS "Hyperthreading - ON") - message(STATUS "# of physical cores: ${n_cores}") - endif() + math(EXPR n_cores "${n_threads} / 2" ) set(${cores} ${n_cores} PARENT_SCOPE) else() - if(verbose) - message(STATUS "Hyperthreading - OFF") - message(STATUS "# of physical cores: ${N}") - endif() - set(${cores} ${N} PARENT_SCOPE) + set(n_cores ${n_threads}) endif() + + set(${cores} ${n_cores} PARENT_SCOPE) + message(STATUS "# of physical cores: ${n_cores}") + + # check number of nodes + execute_process(COMMAND lscpu COMMAND grep Socket OUTPUT_VARIABLE output_sockets OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" output_sockets_list "${output_sockets}") + list(GET output_sockets_list -1 n_sockets) + message(STATUS "* of sockets: ${n_sockets}") + set(${sockets} ${n_sockets} PARENT_SCOPE) endfunction() diff --git a/ipcl/CMakeLists.txt b/ipcl/CMakeLists.txt index 57ddf25..d6ec688 100644 --- a/ipcl/CMakeLists.txt +++ b/ipcl/CMakeLists.txt @@ -114,7 +114,7 @@ endif() find_package(OpenSSL REQUIRED) find_package(Threads REQUIRED) -target_link_libraries(ipcl PUBLIC OpenSSL::SSL OpenSSL::Crypto Threads::Threads -lnuma) +target_link_libraries(ipcl PUBLIC OpenSSL::SSL OpenSSL::Crypto Threads::Threads) if(IPCL_ENABLE_OMP) find_package(OpenMP REQUIRED) diff --git a/ipcl/include/ipcl/utils/util.hpp b/ipcl/include/ipcl/utils/util.hpp index 7a4b510..ed4b668 100644 --- a/ipcl/include/ipcl/utils/util.hpp +++ b/ipcl/include/ipcl/utils/util.hpp @@ -57,16 +57,7 @@ class OMPUtilities { } private: - static const int nodes; - static const int cpus; - - static int getMaxThreads() { -#ifdef IPCL_NUM_THREADS - return IPCL_NUM_THREADS; -#else - return cpus / nodes; -#endif // IPCL_NUM_THREADS - } + static int getMaxThreads() { return IPCL_NUM_THREADS; } }; #endif // IPCL_USE_OMP diff --git a/ipcl/utils/util.cpp b/ipcl/utils/util.cpp index 06a1adc..1dc0030 100644 --- a/ipcl/utils/util.cpp +++ b/ipcl/utils/util.cpp @@ -3,17 +3,10 @@ #include "ipcl/utils/util.hpp" -#ifdef IPCL_USE_OMP -#include -#endif // IPCL_USE_OMP - namespace ipcl { #ifdef IPCL_USE_OMP -const int OMPUtilities::nodes = numa_num_configured_nodes(); -const int OMPUtilities::cpus = numa_num_configured_cpus(); const int OMPUtilities::MaxThreads = OMPUtilities::getMaxThreads(); - #endif // IPCL_USE_OMP } // namespace ipcl From f767e1f399a6df88e6b1a7617537f5a882aa8d88 Mon Sep 17 00:00:00 2001 From: sejunkim Date: Wed, 16 Nov 2022 21:16:28 -0800 Subject: [PATCH 05/11] Updated README section on dependency installation --- README.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index a6f7723..03cfcea 100644 --- a/README.md +++ b/README.md @@ -62,18 +62,15 @@ nasm >= 2.15 OpenSSL >= 1.1.0 ``` -On Ubuntu 22.04, ```OpenSSL``` and ```nasm``` can be installed with: +```OpenSSL``` can be installed with: ```bash -sudo apt update -sudo apt install nasm +# Ubuntu sudo apt install libssl-dev +# Fedora (RHEL 8, Centos) +sudo dnf install openssl-devel ``` -For Ubuntu 20.04 or lower, RHEL and CentOS, please refer to the [Netwide Assembler webpage](https://nasm.us/) for installation details. -For RHEL and CentOS, the required libraries can be installed via: -``` -sudo yum install openssl-devel -``` +In order to install ```nasm```, please refer to the [Netwide Assembler webpage](https://nasm.us/) for download and installation details. ### Instructions The library can be built using the following commands: From 5d72cfb1c26da0a8460f959e98b8e78f3b13b0e7 Mon Sep 17 00:00:00 2001 From: Sejun Kim Date: Thu, 17 Nov 2022 16:42:58 -0800 Subject: [PATCH 06/11] * Get thread count using std::thread::hardware_concurrency instead of hardcoded fixed value * Set ```IPCL_NUM_NODES``` macro after parsing ```lscpu``` --- CMakeLists.txt | 7 ++----- ipcl/include/ipcl/utils/util.hpp | 11 ++++++++++- ipcl/utils/util.cpp | 4 ++++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c356a0..6132981 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,16 +103,13 @@ endif() if(IPCL_ENABLE_OMP) add_compile_definitions(IPCL_USE_OMP) ipcl_get_core_thread_count(num_cores num_threads num_sockets) + add_compile_definitions(IPCL_NUM_NODES=${num_sockets}) if(IPCL_THREAD_COUNT) + # if thread_count is invalid, set to maximum threads if(IPCL_THREAD_COUNT GREATER num_threads) set(IPCL_THREAD_COUNT ${num_threads}) endif() add_compile_definitions(IPCL_NUM_THREADS=${IPCL_THREAD_COUNT}) - else() - # if thread_count not explicitly set, use all available physical cores - # (no hyperthreading) - set(IPCL_THREAD_COUNT ${num_cores}) - add_compile_definitions(IPCL_NUM_THREADS=${num_cores}) endif() endif() diff --git a/ipcl/include/ipcl/utils/util.hpp b/ipcl/include/ipcl/utils/util.hpp index ed4b668..7a4b510 100644 --- a/ipcl/include/ipcl/utils/util.hpp +++ b/ipcl/include/ipcl/utils/util.hpp @@ -57,7 +57,16 @@ class OMPUtilities { } private: - static int getMaxThreads() { return IPCL_NUM_THREADS; } + static const int nodes; + static const int cpus; + + static int getMaxThreads() { +#ifdef IPCL_NUM_THREADS + return IPCL_NUM_THREADS; +#else + return cpus / nodes; +#endif // IPCL_NUM_THREADS + } }; #endif // IPCL_USE_OMP diff --git a/ipcl/utils/util.cpp b/ipcl/utils/util.cpp index 1dc0030..e83a2a8 100644 --- a/ipcl/utils/util.cpp +++ b/ipcl/utils/util.cpp @@ -3,9 +3,13 @@ #include "ipcl/utils/util.hpp" +#include // NOLINT [build/c++11] + namespace ipcl { #ifdef IPCL_USE_OMP +const int OMPUtilities::cpus = std::thread::hardware_concurrency(); +const int OMPUtilities::nodes = IPCL_NUM_NODES; const int OMPUtilities::MaxThreads = OMPUtilities::getMaxThreads(); #endif // IPCL_USE_OMP From c74534cfdcbdc9869f04ec8cfb836377a59b12b7 Mon Sep 17 00:00:00 2001 From: sejunkim Date: Thu, 17 Nov 2022 21:57:22 -0800 Subject: [PATCH 07/11] Added cpu_parser --- ipcl/CMakeLists.txt | 1 + ipcl/include/ipcl/utils/parse_cpuinfo.hpp | 74 +++++++++++++++++++++++ ipcl/include/ipcl/utils/util.hpp | 15 +++++ ipcl/utils/parse_cpuinfo.cpp | 13 ++++ ipcl/utils/util.cpp | 2 + 5 files changed, 105 insertions(+) create mode 100644 ipcl/include/ipcl/utils/parse_cpuinfo.hpp create mode 100644 ipcl/utils/parse_cpuinfo.cpp diff --git a/ipcl/CMakeLists.txt b/ipcl/CMakeLists.txt index d6ec688..b3a9e07 100644 --- a/ipcl/CMakeLists.txt +++ b/ipcl/CMakeLists.txt @@ -12,6 +12,7 @@ set(IPCL_SRCS pri_key.cpp utils/context.cpp utils/util.cpp utils/common.cpp + utils/parse_cpuinfo.cpp ) if(IPCL_SHARED) diff --git a/ipcl/include/ipcl/utils/parse_cpuinfo.hpp b/ipcl/include/ipcl/utils/parse_cpuinfo.hpp new file mode 100644 index 0000000..1cd11c7 --- /dev/null +++ b/ipcl/include/ipcl/utils/parse_cpuinfo.hpp @@ -0,0 +1,74 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#ifndef IPCL_INCLUDE_IPCL_UTILS_PARSE_CPUINFO_HPP_ +#define IPCL_INCLUDE_IPCL_UTILS_PARSE_CPUINFO_HPP_ + +#include +#include +#include +#include +#include + +namespace ipcl { +// trim from start (in place) +static inline void ltrim(std::string& s) { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char ch) { + return !std::isspace(ch); + })); +} + +// trim from end (in place) +static inline void rtrim(std::string& s) { + s.erase(std::find_if(s.rbegin(), s.rend(), + [](unsigned char ch) { return !std::isspace(ch); }) + .base(), + s.end()); +} + +static inline void trim(std::string& s) { + ltrim(s); + rtrim(s); +} + +typedef struct { + int n_processors = 0; + int n_cores = 0; + int n_sockets = 0; +} linuxCPUInfo; + +static void parseCPUInfo(linuxCPUInfo& info) { + std::ifstream cpuinfo; + cpuinfo.exceptions(std::ifstream::badbit); + + try { + cpuinfo.open("/proc/cpuinfo", std::ios::in); + std::string line; + while (std::getline(cpuinfo, line)) { + std::stringstream ss(line); + std::string key, val; + if (std::getline(ss, key, ':') && std::getline(ss, val)) { + trim(key); + trim(val); + if (key == "processor") + info.n_processors++; + else if (key == "core id") + info.n_cores = std::max(info.n_cores, std::stoi(val)); + else if (key == "physical id") + info.n_sockets = std::max(info.n_sockets, std::stoi(val)); + } + } + info.n_sockets++; + info.n_cores = (info.n_cores + 1) * info.n_sockets; + } catch (const std::ifstream::failure& e) { + std::ostringstream log; + log << "\nFile: " << __FILE__ << "\nLine: " << __LINE__ << "\nError: " + << "cannot parse /proc/cpuinfo"; + throw std::runtime_error(log.str()); + } +} +linuxCPUInfo GetLinuxCPUInfo(void); + +} // namespace ipcl + +#endif // IPCL_INCLUDE_IPCL_UTILS_PARSE_CPUINFO_HPP_ diff --git a/ipcl/include/ipcl/utils/util.hpp b/ipcl/include/ipcl/utils/util.hpp index ed4b668..20159e8 100644 --- a/ipcl/include/ipcl/utils/util.hpp +++ b/ipcl/include/ipcl/utils/util.hpp @@ -6,6 +6,8 @@ #ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES #include + +#include "ipcl/utils/parse_cpuinfo.hpp" #endif // IPCL_RUNTIME_DETECT_CPU_FEATURES #include @@ -57,7 +59,17 @@ class OMPUtilities { } private: + static const int nodes; + static const int cpus; + static int getMaxThreads() { return IPCL_NUM_THREADS; } + static int getNodes() { +#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES + return n_sockets; +#else + return IPCL_NUM_SOCKETS; +#endif + } }; #endif // IPCL_USE_OMP @@ -76,6 +88,9 @@ static const bool has_rdseed = features.rdseed && !prefer_rdrand && !prefer_ipp_prng; static const bool has_rdrand = features.rdrnd && prefer_rdrand; +static const linuxCPUInfo cpuinfo = GetLinuxCPUInfo(); +static const int n_sockets = cpuinfo.n_sockets; +static const int n_processors = cpuinfo.n_processors; #endif // IPCL_RUNTIME_DETECT_CPU_FEATURES } // namespace ipcl diff --git a/ipcl/utils/parse_cpuinfo.cpp b/ipcl/utils/parse_cpuinfo.cpp new file mode 100644 index 0000000..2c1b135 --- /dev/null +++ b/ipcl/utils/parse_cpuinfo.cpp @@ -0,0 +1,13 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "ipcl/utils/parse_cpuinfo.hpp" + +#include +#include + +ipcl::linuxCPUInfo ipcl::GetLinuxCPUInfo(void) { + ipcl::linuxCPUInfo info; + ipcl::parseCPUInfo(info); + return info; +} diff --git a/ipcl/utils/util.cpp b/ipcl/utils/util.cpp index 1dc0030..1a017c0 100644 --- a/ipcl/utils/util.cpp +++ b/ipcl/utils/util.cpp @@ -7,6 +7,8 @@ namespace ipcl { #ifdef IPCL_USE_OMP const int OMPUtilities::MaxThreads = OMPUtilities::getMaxThreads(); +const int OMPUtilities::nodes = OMPUtilities::getNodes(); +const int OMPUtilities::cpus = OMPUtilities::getCPUs(); #endif // IPCL_USE_OMP } // namespace ipcl From 745f02da33b13686c915ac4229cfdf45f740c113 Mon Sep 17 00:00:00 2001 From: sejunkim Date: Thu, 17 Nov 2022 22:20:25 -0800 Subject: [PATCH 08/11] Added runtime cpu nodes detection --- CMakeLists.txt | 4 ++- ipcl/include/ipcl/utils/util.hpp | 43 ++++++++++++++++---------------- ipcl/utils/util.cpp | 5 ++-- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6132981..79dcda3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,7 +103,6 @@ endif() if(IPCL_ENABLE_OMP) add_compile_definitions(IPCL_USE_OMP) ipcl_get_core_thread_count(num_cores num_threads num_sockets) - add_compile_definitions(IPCL_NUM_NODES=${num_sockets}) if(IPCL_THREAD_COUNT) # if thread_count is invalid, set to maximum threads if(IPCL_THREAD_COUNT GREATER num_threads) @@ -118,6 +117,9 @@ if(IPCL_DETECT_CPU_RUNTIME) add_compile_definitions(IPCL_RUNTIME_DETECT_CPU_FEATURES) set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH};$ORIGIN/cpufeatures") else() + # set cpu socket count parsed from lscpu precompile + add_compile_definitions(IPCL_NUM_NODES=${num_sockets}) + # check whether cpu support avx512ifma instructions ipcl_detect_lscpu_flag("avx512ifma") if(IPCL_FOUND_avx512ifma) diff --git a/ipcl/include/ipcl/utils/util.hpp b/ipcl/include/ipcl/utils/util.hpp index a48393e..8794b45 100644 --- a/ipcl/include/ipcl/utils/util.hpp +++ b/ipcl/include/ipcl/utils/util.hpp @@ -43,6 +43,22 @@ inline void vec_size_check(const std::vector& v, const char* file, #define VEC_SIZE_CHECK(v) vec_size_check(v, __FILE__, __LINE__) +#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES +static const bool disable_avx512ifma = + (std::getenv("IPCL_DISABLE_AVX512IFMA") != nullptr); +static const bool prefer_rdrand = + (std::getenv("IPCL_PREFER_RDRAND") != nullptr); +static const bool prefer_ipp_prng = + (std::getenv("IPCL_PREFER_IPP_PRNG") != nullptr); +static const cpu_features::X86Features features = + cpu_features::GetX86Info().features; +static const bool has_avx512ifma = features.avx512ifma && !disable_avx512ifma; +static const bool has_rdseed = + features.rdseed && !prefer_rdrand && !prefer_ipp_prng; +static const bool has_rdrand = features.rdrnd && prefer_rdrand; + +#endif // IPCL_RUNTIME_DETECT_CPU_FEATURES + #ifdef IPCL_USE_OMP class OMPUtilities { public: @@ -59,17 +75,21 @@ class OMPUtilities { } private: + static const linuxCPUInfo cpuinfo; static const int nodes; static const int cpus; + static const linuxCPUInfo getLinuxCPUInfo() { return GetLinuxCPUInfo(); } + static int getNodes() { #ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES - return n_sockets; + return cpuinfo.n_sockets; #else return IPCL_NUM_SOCKETS; -#endif +#endif // IPCL_RUNTIME_DETECT_CPU_FEATURES } static int getMaxThreads() { + std::cout << "getNodes..." << cpuinfo.n_sockets << std::endl; #ifdef IPCL_NUM_THREADS return IPCL_NUM_THREADS; #else @@ -80,25 +100,6 @@ class OMPUtilities { #endif // IPCL_USE_OMP -#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES -static const bool disable_avx512ifma = - (std::getenv("IPCL_DISABLE_AVX512IFMA") != nullptr); -static const bool prefer_rdrand = - (std::getenv("IPCL_PREFER_RDRAND") != nullptr); -static const bool prefer_ipp_prng = - (std::getenv("IPCL_PREFER_IPP_PRNG") != nullptr); -static const cpu_features::X86Features features = - cpu_features::GetX86Info().features; -static const bool has_avx512ifma = features.avx512ifma && !disable_avx512ifma; -static const bool has_rdseed = - features.rdseed && !prefer_rdrand && !prefer_ipp_prng; -static const bool has_rdrand = features.rdrnd && prefer_rdrand; - -static const linuxCPUInfo cpuinfo = GetLinuxCPUInfo(); -static const int n_sockets = cpuinfo.n_sockets; -static const int n_processors = cpuinfo.n_processors; -#endif // IPCL_RUNTIME_DETECT_CPU_FEATURES - } // namespace ipcl #endif // IPCL_INCLUDE_IPCL_UTILS_UTIL_HPP_ diff --git a/ipcl/utils/util.cpp b/ipcl/utils/util.cpp index 66dbf50..8c9db98 100644 --- a/ipcl/utils/util.cpp +++ b/ipcl/utils/util.cpp @@ -8,11 +8,10 @@ namespace ipcl { #ifdef IPCL_USE_OMP +const linuxCPUInfo OMPUtilities::cpuinfo = OMPUtilities::getLinuxCPUInfo(); const int OMPUtilities::cpus = std::thread::hardware_concurrency(); -const int OMPUtilities::nodes = IPCL_NUM_NODES; -const int OMPUtilities::MaxThreads = OMPUtilities::getMaxThreads(); const int OMPUtilities::nodes = OMPUtilities::getNodes(); -const int OMPUtilities::cpus = OMPUtilities::getCPUs(); +const int OMPUtilities::MaxThreads = OMPUtilities::getMaxThreads(); #endif // IPCL_USE_OMP } // namespace ipcl From 82cc0a67fe88d09b026dea3dadc36bb206b6b1c0 Mon Sep 17 00:00:00 2001 From: sejunkim Date: Thu, 17 Nov 2022 22:29:59 -0800 Subject: [PATCH 09/11] Add ifdef for cpuinfo parser --- ipcl/include/ipcl/utils/util.hpp | 5 +++-- ipcl/utils/util.cpp | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ipcl/include/ipcl/utils/util.hpp b/ipcl/include/ipcl/utils/util.hpp index 8794b45..68b8091 100644 --- a/ipcl/include/ipcl/utils/util.hpp +++ b/ipcl/include/ipcl/utils/util.hpp @@ -75,12 +75,13 @@ class OMPUtilities { } private: +#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES static const linuxCPUInfo cpuinfo; + static const linuxCPUInfo getLinuxCPUInfo() { return GetLinuxCPUInfo(); } +#endif static const int nodes; static const int cpus; - static const linuxCPUInfo getLinuxCPUInfo() { return GetLinuxCPUInfo(); } - static int getNodes() { #ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES return cpuinfo.n_sockets; diff --git a/ipcl/utils/util.cpp b/ipcl/utils/util.cpp index 8c9db98..d0ffdfe 100644 --- a/ipcl/utils/util.cpp +++ b/ipcl/utils/util.cpp @@ -8,7 +8,9 @@ namespace ipcl { #ifdef IPCL_USE_OMP +#ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES const linuxCPUInfo OMPUtilities::cpuinfo = OMPUtilities::getLinuxCPUInfo(); +#endif // IPCL_RUNTIME_DETECT_CPU_FEATURES const int OMPUtilities::cpus = std::thread::hardware_concurrency(); const int OMPUtilities::nodes = OMPUtilities::getNodes(); const int OMPUtilities::MaxThreads = OMPUtilities::getMaxThreads(); From f121da2b147dee462b373210a9adc8c0b2d1bc5b Mon Sep 17 00:00:00 2001 From: sejunkim Date: Thu, 17 Nov 2022 22:33:09 -0800 Subject: [PATCH 10/11] Typo fix --- cmake/ipcl/ipcl-util.cmake | 2 +- ipcl/include/ipcl/utils/parse_cpuinfo.hpp | 8 ++++---- ipcl/include/ipcl/utils/util.hpp | 5 ++--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/cmake/ipcl/ipcl-util.cmake b/cmake/ipcl/ipcl-util.cmake index 152d90b..d68dc48 100644 --- a/cmake/ipcl/ipcl-util.cmake +++ b/cmake/ipcl/ipcl-util.cmake @@ -139,6 +139,6 @@ function(ipcl_get_core_thread_count cores threads sockets) execute_process(COMMAND lscpu COMMAND grep Socket OUTPUT_VARIABLE output_sockets OUTPUT_STRIP_TRAILING_WHITESPACE) string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" output_sockets_list "${output_sockets}") list(GET output_sockets_list -1 n_sockets) - message(STATUS "* of sockets: ${n_sockets}") + message(STATUS "# of sockets: ${n_sockets}") set(${sockets} ${n_sockets} PARENT_SCOPE) endfunction() diff --git a/ipcl/include/ipcl/utils/parse_cpuinfo.hpp b/ipcl/include/ipcl/utils/parse_cpuinfo.hpp index 1cd11c7..d0313c0 100644 --- a/ipcl/include/ipcl/utils/parse_cpuinfo.hpp +++ b/ipcl/include/ipcl/utils/parse_cpuinfo.hpp @@ -34,7 +34,7 @@ static inline void trim(std::string& s) { typedef struct { int n_processors = 0; int n_cores = 0; - int n_sockets = 0; + int n_nodes = 0; } linuxCPUInfo; static void parseCPUInfo(linuxCPUInfo& info) { @@ -55,11 +55,11 @@ static void parseCPUInfo(linuxCPUInfo& info) { else if (key == "core id") info.n_cores = std::max(info.n_cores, std::stoi(val)); else if (key == "physical id") - info.n_sockets = std::max(info.n_sockets, std::stoi(val)); + info.n_nodes = std::max(info.n_nodes, std::stoi(val)); } } - info.n_sockets++; - info.n_cores = (info.n_cores + 1) * info.n_sockets; + info.n_nodes++; + info.n_cores = (info.n_cores + 1) * info.n_nodes; } catch (const std::ifstream::failure& e) { std::ostringstream log; log << "\nFile: " << __FILE__ << "\nLine: " << __LINE__ << "\nError: " diff --git a/ipcl/include/ipcl/utils/util.hpp b/ipcl/include/ipcl/utils/util.hpp index 68b8091..a9f5ca6 100644 --- a/ipcl/include/ipcl/utils/util.hpp +++ b/ipcl/include/ipcl/utils/util.hpp @@ -84,13 +84,12 @@ class OMPUtilities { static int getNodes() { #ifdef IPCL_RUNTIME_DETECT_CPU_FEATURES - return cpuinfo.n_sockets; + return cpuinfo.n_nodes; #else - return IPCL_NUM_SOCKETS; + return IPCL_NUM_NODES; #endif // IPCL_RUNTIME_DETECT_CPU_FEATURES } static int getMaxThreads() { - std::cout << "getNodes..." << cpuinfo.n_sockets << std::endl; #ifdef IPCL_NUM_THREADS return IPCL_NUM_THREADS; #else From 1ad396a4e4af85613e0a062563d638dc56f0629e Mon Sep 17 00:00:00 2001 From: sejunkim Date: Thu, 17 Nov 2022 22:38:49 -0800 Subject: [PATCH 11/11] Updated sockets to nodes for consistency --- CMakeLists.txt | 6 +++--- cmake/ipcl/ipcl-util.cmake | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 79dcda3..8fc6cc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,7 +102,7 @@ endif() if(IPCL_ENABLE_OMP) add_compile_definitions(IPCL_USE_OMP) - ipcl_get_core_thread_count(num_cores num_threads num_sockets) + ipcl_get_core_thread_count(num_cores num_threads num_nodes) if(IPCL_THREAD_COUNT) # if thread_count is invalid, set to maximum threads if(IPCL_THREAD_COUNT GREATER num_threads) @@ -117,8 +117,8 @@ if(IPCL_DETECT_CPU_RUNTIME) add_compile_definitions(IPCL_RUNTIME_DETECT_CPU_FEATURES) set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH};$ORIGIN/cpufeatures") else() - # set cpu socket count parsed from lscpu precompile - add_compile_definitions(IPCL_NUM_NODES=${num_sockets}) + # set cpu node count parsed from lscpu precompile + add_compile_definitions(IPCL_NUM_NODES=${num_nodes}) # check whether cpu support avx512ifma instructions ipcl_detect_lscpu_flag("avx512ifma") diff --git a/cmake/ipcl/ipcl-util.cmake b/cmake/ipcl/ipcl-util.cmake index d68dc48..65c1a74 100644 --- a/cmake/ipcl/ipcl-util.cmake +++ b/cmake/ipcl/ipcl-util.cmake @@ -115,7 +115,7 @@ function(ipcl_define_icp_variables OutVariable) PARENT_SCOPE) endfunction() -function(ipcl_get_core_thread_count cores threads sockets) +function(ipcl_get_core_thread_count cores threads nodes) include(ProcessorCount) # Get number threads @@ -136,9 +136,9 @@ function(ipcl_get_core_thread_count cores threads sockets) message(STATUS "# of physical cores: ${n_cores}") # check number of nodes - execute_process(COMMAND lscpu COMMAND grep Socket OUTPUT_VARIABLE output_sockets OUTPUT_STRIP_TRAILING_WHITESPACE) - string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" output_sockets_list "${output_sockets}") - list(GET output_sockets_list -1 n_sockets) - message(STATUS "# of sockets: ${n_sockets}") - set(${sockets} ${n_sockets} PARENT_SCOPE) + execute_process(COMMAND lscpu COMMAND grep Socket OUTPUT_VARIABLE output_nodes OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" output_nodes_list "${output_nodes}") + list(GET output_nodes_list -1 n_nodes) + message(STATUS "# of nodes: ${n_nodes}") + set(${nodes} ${n_nodes} PARENT_SCOPE) endfunction()