Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Options] Add option for controlling parallel build with number of jobs or memory reserved for each job #230

Merged
merged 10 commits into from
May 24, 2023
10 changes: 6 additions & 4 deletions python/hidet/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,13 +279,15 @@ def build_job(args) -> Optional[Tuple[str, FuncType]]:
]
build_results = []
if parallel:
cpu_count = os.cpu_count()
max_jobs, mem_for_worker = option.get_parallel_tune()
max_jobs = cpu_count if max_jobs == -1 else min(max_jobs, cpu_count)
mem_for_worker *= 1024**3
# Set the affinity of current process. Some package such as numpy will change affinity of current process,
# which might limit the parallelism of compilation.
os.sched_setaffinity(0, range(os.cpu_count()))
os.sched_setaffinity(0, range(cpu_count))

# the maximum number of processes is limited by the number of cores and memory
mem_for_worker = 1.5 * 1024 * 1024 * 1024 # 1.5 GiB
num_workers = min(max(int(psutil.virtual_memory().available // mem_for_worker), 1), psutil.cpu_count())
num_workers = min(max(int(psutil.virtual_memory().available // mem_for_worker), 1), max_jobs)

_lazy_initialize_cuda()
for build_result in tqdm(
Expand Down
36 changes: 36 additions & 0 deletions python/hidet/option.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ def register_hidet_options():
description='Whether to build operators in parallel.',
choices=[True, False],
)
register_option(
name='parallel_tune',
type_hint='int, float',
default_value=(-1, 1.5),
description='The pair (max_parallel_jobs, mem_gb_per_job) that describe '
'the maximum number of parallel jobs and memory reserved for each job',
)
register_option(
name='save_lower_ir',
type_hint='bool',
Expand Down Expand Up @@ -476,6 +483,35 @@ def get_parallel_build() -> bool:
return OptionContext.current().get_option('parallel_build')


def parallel_tune(max_parallel_jobs: int = -1, mem_gb_per_job: float = 1.5):
"""
Specify the maximum number of parallel compilation jobs to do,
and the number of GiB preserved for each job.

Parameters
----------
max_parallel_jobs: int
The maximum number of parallel jobs allowed, default -1
(the number of available vcpu returned by `os.cpu_count()`).
mem_gb_per_job: float
The minimum amount of memory (in GiB) reserved for each tuning job, default 1.5GiB.
"""
OptionContext.current().set_option('parallel_tune', (max_parallel_jobs, mem_gb_per_job))


def get_parallel_tune() -> Tuple[int, float]:
"""
Get the option value of whether to build operators in parallel.

Returns
-------
ret: Tuple[int, float]
Get the maximum number of jobs and minumum amount of memory reserved for tuning.

"""
return OptionContext.current().get_option('parallel_tune')


def save_lower_ir(enabled: bool = True):
"""
Whether to save the lower IR.
Expand Down