Skip to content

Commit

Permalink
[Options] Add option for controlling parallel build with number of jo…
Browse files Browse the repository at this point in the history
…bs or memory reserved for each job (#230)

* add parallel_tune option

* minor style change

* fix typo

---------

Co-authored-by: Xin Li <lixin39@eco-13.syslab.sandbox>
Co-authored-by: Xin Li <xin@centml.ai>
  • Loading branch information
3 people committed May 24, 2023
1 parent e198cd1 commit 6152bc9
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 4 deletions.
10 changes: 6 additions & 4 deletions python/hidet/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,13 +279,15 @@ def build_job(args) -> Optional[Tuple[str, FuncType]]:
]
build_results = []
if parallel:
cpu_count = os.cpu_count()
max_jobs, mem_for_worker = option.get_parallel_tune()
max_jobs = cpu_count if max_jobs == -1 else min(max_jobs, cpu_count)
mem_for_worker *= 1024**3
# Set the affinity of current process. Some package such as numpy will change affinity of current process,
# which might limit the parallelism of compilation.
os.sched_setaffinity(0, range(os.cpu_count()))
os.sched_setaffinity(0, range(cpu_count))

# the maximum number of processes is limited by the number of cores and memory
mem_for_worker = 1.5 * 1024 * 1024 * 1024 # 1.5 GiB
num_workers = min(max(int(psutil.virtual_memory().available // mem_for_worker), 1), psutil.cpu_count())
num_workers = min(max(int(psutil.virtual_memory().available // mem_for_worker), 1), max_jobs)

_lazy_initialize_cuda()
for build_result in tqdm(
Expand Down
36 changes: 36 additions & 0 deletions python/hidet/option.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ def register_hidet_options():
description='Whether to build operators in parallel.',
choices=[True, False],
)
register_option(
name='parallel_tune',
type_hint='int, float',
default_value=(-1, 1.5),
description='The pair (max_parallel_jobs, mem_gb_per_job) that describe '
'the maximum number of parallel jobs and memory reserved for each job',
)
register_option(
name='save_lower_ir',
type_hint='bool',
Expand Down Expand Up @@ -476,6 +483,35 @@ def get_parallel_build() -> bool:
return OptionContext.current().get_option('parallel_build')


def parallel_tune(max_parallel_jobs: int = -1, mem_gb_per_job: float = 1.5):
"""
Specify the maximum number of parallel compilation jobs to do,
and the number of GiB preserved for each job.
Parameters
----------
max_parallel_jobs: int
The maximum number of parallel jobs allowed, default -1
(the number of available vcpu returned by `os.cpu_count()`).
mem_gb_per_job: float
The minimum amount of memory (in GiB) reserved for each tuning job, default 1.5GiB.
"""
OptionContext.current().set_option('parallel_tune', (max_parallel_jobs, mem_gb_per_job))


def get_parallel_tune() -> Tuple[int, float]:
"""
Get the option value of whether to build operators in parallel.
Returns
-------
ret: Tuple[int, float]
Get the maximum number of jobs and minumum amount of memory reserved for tuning.
"""
return OptionContext.current().get_option('parallel_tune')


def save_lower_ir(enabled: bool = True):
"""
Whether to save the lower IR.
Expand Down

0 comments on commit 6152bc9

Please sign in to comment.