From 4a67c8ed7cbfb195df0c2e047d10560919b73bf5 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Thu, 10 Sep 2020 15:50:40 -0700 Subject: [PATCH 1/4] Set num threads properly for Docker --- ml-agents/mlagents/torch_utils/cpu_utils.py | 34 +++++++++++++++++++++ ml-agents/mlagents/torch_utils/torch.py | 4 ++- 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 ml-agents/mlagents/torch_utils/cpu_utils.py diff --git a/ml-agents/mlagents/torch_utils/cpu_utils.py b/ml-agents/mlagents/torch_utils/cpu_utils.py new file mode 100644 index 0000000000..8c6fcf15b5 --- /dev/null +++ b/ml-agents/mlagents/torch_utils/cpu_utils.py @@ -0,0 +1,34 @@ +from typing import Optional + +import os + + +def get_num_threads_to_use() -> Optional[int]: + """ + Gets the number of threads to use. For most problems, 4 is all you + need, but for smaller machines, we'd like to scale to less than that. + By default, PyTorch uses 1/2 of the available cores. + """ + num_cpus = _get_num_cpus() + return max(min(num_cpus // 2, 4), 1) if num_cpus is not None else None + + +def _get_num_cpus() -> Optional[int]: + """ + Returns number of CPUs using cgroups if possible. This accounts + for Docker containers that are limited in cores. + """ + try: + period = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_period_us") + quota = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_quota_us") + num_cpus = quota // period + if num_cpus > 0: + return num_cpus + except FileNotFoundError: + pass + return os.cpu_count() + + +def _read_in_integer_file(filename: str) -> int: + with open(filename) as f: + return int(f.readlines()[0]) diff --git a/ml-agents/mlagents/torch_utils/torch.py b/ml-agents/mlagents/torch_utils/torch.py index 98463fa3b6..f2fd8d18aa 100644 --- a/ml-agents/mlagents/torch_utils/torch.py +++ b/ml-agents/mlagents/torch_utils/torch.py @@ -1,5 +1,7 @@ import os +from mlagents.torch_utils import cpu_utils + # Detect availability of torch package here. # NOTE: this try/except is temporary until torch is required for ML-Agents. try: @@ -7,7 +9,7 @@ # Everywhere else is caught by the banned-modules setting for flake8 import torch # noqa I201 - torch.set_num_interop_threads(2) + torch.set_num_threads(cpu_utils.get_num_threads_to_use()) os.environ["KMP_BLOCKTIME"] = "0" # Known PyLint compatibility with PyTorch https://github.com/pytorch/pytorch/issues/701 From dab488344ab0ec1f1efd5a051f3eec97af996eea Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Thu, 10 Sep 2020 17:22:32 -0700 Subject: [PATCH 2/4] Pylint-friendly logic --- ml-agents/mlagents/torch_utils/cpu_utils.py | 22 ++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ml-agents/mlagents/torch_utils/cpu_utils.py b/ml-agents/mlagents/torch_utils/cpu_utils.py index 8c6fcf15b5..0d447424a9 100644 --- a/ml-agents/mlagents/torch_utils/cpu_utils.py +++ b/ml-agents/mlagents/torch_utils/cpu_utils.py @@ -18,17 +18,17 @@ def _get_num_cpus() -> Optional[int]: Returns number of CPUs using cgroups if possible. This accounts for Docker containers that are limited in cores. """ - try: - period = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_period_us") - quota = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_quota_us") - num_cpus = quota // period - if num_cpus > 0: - return num_cpus - except FileNotFoundError: - pass - return os.cpu_count() + period = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_period_us") + quota = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_quota_us") + if period > 0 and quota > 0: + return int(quota // period) + else: + return os.cpu_count() def _read_in_integer_file(filename: str) -> int: - with open(filename) as f: - return int(f.readlines()[0]) + try: + with open(filename) as f: + return int(f.readlines()[0]) + except FileNotFoundError: + return -1 From 8aee80735fc2d1a33d2dee3fd009a488a2c9158e Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 11 Sep 2020 09:52:12 -0700 Subject: [PATCH 3/4] Use f.read().rstrip() --- ml-agents/mlagents/torch_utils/cpu_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ml-agents/mlagents/torch_utils/cpu_utils.py b/ml-agents/mlagents/torch_utils/cpu_utils.py index 0d447424a9..804ed12bfb 100644 --- a/ml-agents/mlagents/torch_utils/cpu_utils.py +++ b/ml-agents/mlagents/torch_utils/cpu_utils.py @@ -29,6 +29,6 @@ def _get_num_cpus() -> Optional[int]: def _read_in_integer_file(filename: str) -> int: try: with open(filename) as f: - return int(f.readlines()[0]) + return int(f.read().rstrip()) except FileNotFoundError: return -1 From d3ccff92d0d390425d6933e4a8409268cd2f7331 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 11 Sep 2020 11:27:13 -0700 Subject: [PATCH 4/4] Change function names --- ml-agents/mlagents/torch_utils/cpu_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/torch_utils/cpu_utils.py b/ml-agents/mlagents/torch_utils/cpu_utils.py index 804ed12bfb..e0272fad51 100644 --- a/ml-agents/mlagents/torch_utils/cpu_utils.py +++ b/ml-agents/mlagents/torch_utils/cpu_utils.py @@ -9,11 +9,11 @@ def get_num_threads_to_use() -> Optional[int]: need, but for smaller machines, we'd like to scale to less than that. By default, PyTorch uses 1/2 of the available cores. """ - num_cpus = _get_num_cpus() + num_cpus = _get_num_available_cpus() return max(min(num_cpus // 2, 4), 1) if num_cpus is not None else None -def _get_num_cpus() -> Optional[int]: +def _get_num_available_cpus() -> Optional[int]: """ Returns number of CPUs using cgroups if possible. This accounts for Docker containers that are limited in cores.