Skip to content

Commit

Permalink
add group_size/pool_size for lmp and fp (#127)
Browse files Browse the repository at this point in the history
Adding group_size/pool_size for `lmp` and `fp`. A node can run up to
`group_size` tasks and at most `pool_size` tasks are running at the same
time.

---------

Co-authored-by: Sikai Yao <yaosk@dp.tech>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people committed Feb 7, 2023
1 parent 21cd921 commit 6d08187
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 2 deletions.
2 changes: 2 additions & 0 deletions dpgen2/superop/prep_run_fp.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def _prep_run_fp(
run_template_config = run_config.pop("template_config")
prep_executor = init_executor(prep_config.pop("executor"))
run_executor = init_executor(run_config.pop("executor"))
template_slice_config = run_config.pop("template_slice_config", {})

prep_fp = Step(
"prep-fp",
Expand Down Expand Up @@ -167,6 +168,7 @@ def _prep_run_fp(
input_parameter=["task_name"],
input_artifact=["task_path"],
output_artifact=["log", "labeled_data"],
**template_slice_config,
),
python_packages=upload_python_packages,
**run_template_config,
Expand Down
3 changes: 2 additions & 1 deletion dpgen2/superop/prep_run_lmp.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def _prep_run_lmp(
run_template_config = run_config.pop("template_config")
prep_executor = init_executor(prep_config.pop("executor"))
run_executor = init_executor(run_config.pop("executor"))
template_slice_config = run_config.pop("template_slice_config", {})

prep_lmp = Step(
"prep-lmp",
Expand Down Expand Up @@ -168,6 +169,7 @@ def _prep_run_lmp(
input_parameter=["task_name"],
input_artifact=["task_path"],
output_artifact=["log", "traj", "model_devi", "plm_output"],
**template_slice_config,
),
python_packages=upload_python_packages,
**run_template_config,
Expand All @@ -184,7 +186,6 @@ def _prep_run_lmp(
argo_len(prep_lmp.outputs.parameters["task_names"]),
format=lmp_index_pattern,
),
# with_param=argo_range(argo_len(prep_lmp.outputs.parameters["task_names"])),
key=step_keys["run-lmp"],
executor=run_executor,
**run_config,
Expand Down
17 changes: 17 additions & 0 deletions dpgen2/utils/step_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,18 @@ def template_conf_args():
]


def template_slice_conf_args():
doc_group_size = "The number of tasks running on a single node. It is efficient for a large number of short tasks."
doc_pool_size = "The number of tasks running at the same time on one node."
return [
Argument("group_size", int, optional=True, default=None, doc=doc_group_size),
Argument("pool_size", int, optional=True, default=None, doc=doc_pool_size),
]


def step_conf_args():
doc_template = "The configs passed to the PythonOPTemplate."
doc_template_slice = "The configs passed to the Slices."
doc_executor = "The executor of the step."
doc_continue_on_failed = "If continue the the step is failed (FatalError, TransientError, A certain number of retrial is reached...)."
doc_continue_on_num_success = "Only in the sliced OP case. Continue the workflow if a certain number of the sliced jobs are successful."
Expand All @@ -111,6 +121,13 @@ def step_conf_args():
default={"image": default_image},
doc=doc_template,
),
Argument(
"template_slice_config",
dict,
template_slice_conf_args(),
optional=True,
doc=doc_template_slice,
),
Argument(
"continue_on_failed",
bool,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ classifiers = [
dependencies = [
'numpy',
'dpdata',
'pydflow>=1.6.33',
'pydflow>=1.6.39',
'dargs>=0.3.1',
'scipy',
'lbg',
Expand Down

0 comments on commit 6d08187

Please sign in to comment.