diff --git a/test/common.py b/test/common.py index fd03bdedcbaf9..274a169fca6e6 100644 --- a/test/common.py +++ b/test/common.py @@ -1844,7 +1844,7 @@ def get_library(self, name, generated_libs, configure=['sh', './configure'], # if env_init is None: env_init = {} if make_args is None: - make_args = ['-j', str(shared.get_num_cores())] + make_args = ['-j', str(utils.get_num_cores())] build_dir = self.get_build_dir() diff --git a/test/parallel_testsuite.py b/test/parallel_testsuite.py index 20e113be96767..570c70a0e880b 100644 --- a/test/parallel_testsuite.py +++ b/test/parallel_testsuite.py @@ -441,4 +441,4 @@ def __init__(self, co): def num_cores(): if NUM_CORES: return int(NUM_CORES) - return multiprocessing.cpu_count() + return utils.get_num_cores() diff --git a/tools/js_optimizer.py b/tools/js_optimizer.py index 2c99bf9ff8209..02beac71b45ea 100755 --- a/tools/js_optimizer.py +++ b/tools/js_optimizer.py @@ -244,7 +244,7 @@ def check_symbol_mapping(p): # if we are making source maps, we want our debug numbering to start from the # top of the file, so avoid breaking the JS into chunks - intended_num_chunks = round(shared.get_num_cores() * NUM_CHUNKS_PER_CORE) + intended_num_chunks = round(utils.get_num_cores() * NUM_CHUNKS_PER_CORE) chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) chunks = chunkify(funcs, chunk_size) diff --git a/tools/shared.py b/tools/shared.py index 400c9c396d362..c5c927e3e5912 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -115,10 +115,6 @@ def run_process(cmd, check=True, input=None, *args, **kw): return ret -def get_num_cores(): - return int(os.environ.get('EMCC_CORES', os.cpu_count())) - - def returncode_to_str(code): assert code != 0 if code < 0: @@ -169,7 +165,7 @@ def get_finished_process(): except subprocess.TimeoutExpired: pass - num_parallel_processes = get_num_cores() + num_parallel_processes = utils.get_num_cores() temp_files = get_temp_files() i = 0 num_completed = 0 diff --git a/tools/system_libs.py b/tools/system_libs.py index 8be52d381b33e..1d88c31a29dfd 100644 --- a/tools/system_libs.py +++ b/tools/system_libs.py @@ -159,7 +159,7 @@ def get_top_level_ninja_file(): def run_ninja(build_dir): - cmd = ['ninja', '-C', build_dir, f'-j{shared.get_num_cores()}'] + cmd = ['ninja', '-C', build_dir, f'-j{utils.get_num_cores()}'] if shared.PRINT_SUBPROCS: cmd.append('-v') shared.check_call(cmd, env=clean_env()) @@ -538,7 +538,7 @@ def build_objects(self, build_dir): # Choose a chunk size that is large enough to avoid too many subprocesses # but not too large to avoid task starvation. # For now the heuristic is to split inputs by 2x number of cores. - chunk_size = max(1, len(objects) // (2 * shared.get_num_cores())) + chunk_size = max(1, len(objects) // (2 * utils.get_num_cores())) # Convert batches to commands. for cmd, srcs in batches.items(): cmd = list(cmd) diff --git a/tools/utils.py b/tools/utils.py index 6bc0bae630584..36ffce1078daa 100644 --- a/tools/utils.py +++ b/tools/utils.py @@ -112,6 +112,20 @@ def delete_contents(dirname, exclude=None): delete_file(entry) +def get_num_cores(): + # Prefer `os.process_cpu_count` when available (3.13 and above) since + # it takes into account thread affinity. + # Fall back to `os.sched_getaffinity` where available and finally + # `os.cpu_count`, which should work everywhere. + if hasattr(os, 'process_cpu_count'): + cpu_count = os.process_cpu_count() + elif hasattr(os, 'sched_getaffinity'): + cpu_count = len(os.sched_getaffinity(0)) + else: + cpu_count = os.cpu_count() + return int(os.environ.get('EMCC_CORES', cpu_count)) + + # TODO(sbc): Replace with functools.cache, once we update to python 3.9 memoize = functools.lru_cache(maxsize=None)