Skip to content

Commit

Permalink
Fix resubmit, only reuse steps of the lowest level (#82)
Browse files Browse the repository at this point in the history
* resubmit use the op of the lowest level

* sync submit and showkey

* fix bug

* fix import bugs

* update doc

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
  • Loading branch information
wanghan-iapcm and Han Wang committed Oct 8, 2022
1 parent ac285de commit 8589a93
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 42 deletions.
56 changes: 23 additions & 33 deletions docs/quickcli.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,45 +42,35 @@ The artifacts can be downloaded on-the-fly with `-d` flag. Note that the existin
## Show the keys of steps

Each dpgen2 step is assigned a unique key. The keys of the finished steps can be checked with `showkey` command
```bash $ dpgen2 watch input.json WFID
0 : init--scheduler
1 : init--id
2 : iter-000000--prep-train
3 -> 6 : iter-000000--run-train-0000 -> iter-000000--run-train-0003
7 : iter-000000--prep-run-train
8 : iter-000000--prep-lmp
9 -> 17 : iter-000000--run-lmp-000000 -> iter-000000--run-lmp-000008
18 : iter-000000--prep-run-lmp
19 : iter-000000--select-confs
20 : iter-000000--prep-fp
21 -> 24 : iter-000000--run-fp-000000 -> iter-000000--run-fp-000003
25 : iter-000000--prep-run-fp
26 : iter-000000--collect-data
27 : iter-000000--block
28 : iter-000000--scheduler
29 : iter-000000--id
30 : iter-000001--prep-train
31 -> 34 : iter-000001--run-train-0000 -> iter-000001--run-train-0003
35 : iter-000001--prep-run-train
36 : iter-000001--prep-lmp
37 -> 45 : iter-000001--run-lmp-000000 -> iter-000001--run-lmp-000008
46 : iter-000001--prep-run-lmp
47 : iter-000001--select-confs
48 : iter-000001--prep-fp
49 -> 52 : iter-000001--run-fp-000000 -> iter-000001--run-fp-000003
53 : iter-000001--prep-run-fp
54 : iter-000001--collect-data
55 : iter-000001--block
56 : iter-000001--scheduler
57 : iter-000001--id
```bash $ dpgen2 showkey input.json WFID
0 : iter-000000--prep-train
1 -> 4 : iter-000000--run-train-0000 -> iter-000000--run-train-0003
5 : iter-000000--prep-lmp
6 -> 14 : iter-000000--run-lmp-000000 -> iter-000000--run-lmp-000008
15 : iter-000000--select-confs
16 : iter-000000--prep-fp
17 -> 20 : iter-000000--run-fp-000000 -> iter-000000--run-fp-000003
21 : iter-000000--collect-data
22 : iter-000000--scheduler
23 : iter-000000--id
24 : iter-000001--prep-train
25 -> 28 : iter-000001--run-train-0000 -> iter-000001--run-train-0003
29 : iter-000001--prep-lmp
30 -> 38 : iter-000001--run-lmp-000000 -> iter-000001--run-lmp-000008
39 : iter-000001--select-confs
40 : iter-000001--prep-fp
41 -> 44 : iter-000001--run-fp-000000 -> iter-000001--run-fp-000003
45 : iter-000001--collect-data
46 : iter-000001--scheduler
47 : iter-000001--id
```


## Resubmit a workflow

If a workflow stopped abnormally, one may submit a new workflow with some steps of the old workflow reused.
```bash
dpgen2 resubmit input.json WFID --reuse 0-49
dpgen2 resubmit input.json WFID --reuse 0-41
```
The steps of workflow WDID 0-49 will be reused in the new workflow. The indexes of the steps are printed by `dpgen2 showkey`. In the example, all the steps before the `iter-000001--run-fp-000000` will be used in the new workflow.
The steps of workflow WDID 0-41 (0<=id<41, note that 41 is not included) will be reused in the new workflow. The indexes of the steps are printed by `dpgen2 showkey`. In the example, all the steps before the `iter-000001--run-fp-000000` will be used in the new workflow.

10 changes: 4 additions & 6 deletions dpgen2/entrypoint/showkey.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,20 @@
from dflow import (
Workflow,
)
from dpgen2.entrypoint.submit import successful_step_keys
from dpgen2.entrypoint.submit import get_resubmit_keys
from dpgen2.utils import (
sort_slice_ops,
print_keys_in_nice_format,
workflow_config_from_dict,
print_keys_in_nice_format,
)


def showkey(
wf_id,
wf_config,
):
workflow_config_from_dict(wf_config)
wf = Workflow(id=wf_id)
all_step_keys = successful_step_keys(wf)
all_step_keys = sort_slice_ops(
all_step_keys, ['run-train', 'run-lmp', 'run-fp'],)
all_step_keys = get_resubmit_keys(wf)
prt_str = print_keys_in_nice_format(
all_step_keys, ['run-train', 'run-lmp', 'run-fp'],)
print(prt_str)
Expand Down
19 changes: 16 additions & 3 deletions dpgen2/entrypoint/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
sort_slice_ops,
print_keys_in_nice_format,
workflow_config_from_dict,
matched_step_key,
)
from dpgen2.utils.step_config import normalize as normalize_step_dict
from dpgen2.entrypoint.submit_args import normalize as normalize_submit_args
Expand Down Expand Up @@ -463,6 +464,20 @@ def successful_step_keys(wf):
return all_step_keys


def get_resubmit_keys(
wf,
):
all_step_keys = successful_step_keys(wf)
all_step_keys = matched_step_key(
all_step_keys,
['prep-train', 'run-train', 'prep-lmp', 'run-lmp', 'select-confs',
'prep-fp', 'run-fp', 'collect-data', 'scheduler', 'id'],
)
all_step_keys = sort_slice_ops(
all_step_keys, ['run-train', 'run-lmp', 'run-fp'],)
return all_step_keys


def resubmit_concurrent_learning(
wf_config,
wfid,
Expand All @@ -475,10 +490,8 @@ def resubmit_concurrent_learning(
context = wf_global_workflow(wf_config)

old_wf = Workflow(id=wfid)
all_step_keys = get_resubmit_keys(old_wf)

all_step_keys = successful_step_keys(old_wf)
all_step_keys = sort_slice_ops(
all_step_keys, ['run-train', 'run-lmp', 'run-fp'],)
if list_steps:
prt_str = print_keys_in_nice_format(
all_step_keys, ['run-train', 'run-lmp', 'run-fp'],)
Expand Down

0 comments on commit 8589a93

Please sign in to comment.