Fix resubmit, only reuse steps of the lowest level (#82)

* resubmit use the op of the lowest level * sync submit and showkey * fix bug * fix import bugs * update doc Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
deepmodeling · Oct 8, 2022 · 8589a93 · 8589a93
1 parent ac285de
commit 8589a93
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 42 deletions.
diff --git a/docs/quickcli.md b/docs/quickcli.md
@@ -42,45 +42,35 @@ The artifacts can be downloaded on-the-fly with `-d` flag. Note that the existin
 ## Show the keys of steps
 
 Each dpgen2 step is assigned a unique key. The keys of the finished steps can be checked with `showkey` command
-```bash                                                                                                                                                                              $ dpgen2 watch input.json WFID
-                   0 : init--scheduler
-                   1 : init--id
-                   2 : iter-000000--prep-train
-              3 -> 6 : iter-000000--run-train-0000 -> iter-000000--run-train-0003
-                   7 : iter-000000--prep-run-train
-                   8 : iter-000000--prep-lmp
-             9 -> 17 : iter-000000--run-lmp-000000 -> iter-000000--run-lmp-000008
-                  18 : iter-000000--prep-run-lmp
-                  19 : iter-000000--select-confs
-                  20 : iter-000000--prep-fp
-            21 -> 24 : iter-000000--run-fp-000000 -> iter-000000--run-fp-000003
-                  25 : iter-000000--prep-run-fp
-                  26 : iter-000000--collect-data
-                  27 : iter-000000--block
-                  28 : iter-000000--scheduler
-                  29 : iter-000000--id
-                  30 : iter-000001--prep-train
-            31 -> 34 : iter-000001--run-train-0000 -> iter-000001--run-train-0003
-                  35 : iter-000001--prep-run-train
-                  36 : iter-000001--prep-lmp
-            37 -> 45 : iter-000001--run-lmp-000000 -> iter-000001--run-lmp-000008
-                  46 : iter-000001--prep-run-lmp
-                  47 : iter-000001--select-confs
-                  48 : iter-000001--prep-fp
-            49 -> 52 : iter-000001--run-fp-000000 -> iter-000001--run-fp-000003
-                  53 : iter-000001--prep-run-fp
-                  54 : iter-000001--collect-data
-                  55 : iter-000001--block
-                  56 : iter-000001--scheduler
-                  57 : iter-000001--id
+```bash                                                                                                                                                                              $ dpgen2 showkey input.json WFID
+                   0 : iter-000000--prep-train
+              1 -> 4 : iter-000000--run-train-0000 -> iter-000000--run-train-0003
+                   5 : iter-000000--prep-lmp
+             6 -> 14 : iter-000000--run-lmp-000000 -> iter-000000--run-lmp-000008
+                  15 : iter-000000--select-confs
+                  16 : iter-000000--prep-fp
+            17 -> 20 : iter-000000--run-fp-000000 -> iter-000000--run-fp-000003
+                  21 : iter-000000--collect-data
+                  22 : iter-000000--scheduler
+                  23 : iter-000000--id
+                  24 : iter-000001--prep-train
+            25 -> 28 : iter-000001--run-train-0000 -> iter-000001--run-train-0003
+                  29 : iter-000001--prep-lmp
+            30 -> 38 : iter-000001--run-lmp-000000 -> iter-000001--run-lmp-000008
+                  39 : iter-000001--select-confs
+                  40 : iter-000001--prep-fp
+            41 -> 44 : iter-000001--run-fp-000000 -> iter-000001--run-fp-000003
+                  45 : iter-000001--collect-data
+                  46 : iter-000001--scheduler
+                  47 : iter-000001--id
 ```
 
 
 ## Resubmit a workflow
 
 If a workflow stopped abnormally, one may submit a new workflow with some steps of the old workflow reused. 
 ```bash
-dpgen2 resubmit input.json WFID --reuse 0-49
+dpgen2 resubmit input.json WFID --reuse 0-41
 ```
-The steps of workflow WDID 0-49 will be reused in the new workflow. The indexes of the steps are printed by `dpgen2 showkey`. In the example, all the steps before the `iter-000001--run-fp-000000` will be used in the new workflow.
+The steps of workflow WDID 0-41 (0<=id<41, note that 41 is not included) will be reused in the new workflow. The indexes of the steps are printed by `dpgen2 showkey`. In the example, all the steps before the `iter-000001--run-fp-000000` will be used in the new workflow.
 
diff --git a/dpgen2/entrypoint/showkey.py b/dpgen2/entrypoint/showkey.py
@@ -3,22 +3,20 @@
 from dflow import (
     Workflow,
 )
-from dpgen2.entrypoint.submit import successful_step_keys
+from dpgen2.entrypoint.submit import get_resubmit_keys
 from dpgen2.utils import (
-    sort_slice_ops,
-    print_keys_in_nice_format,
     workflow_config_from_dict,
+    print_keys_in_nice_format,
 )
 
+
 def showkey(
         wf_id,
         wf_config,
 ):
     workflow_config_from_dict(wf_config)    
     wf = Workflow(id=wf_id)
-    all_step_keys = successful_step_keys(wf)
-    all_step_keys = sort_slice_ops(
-        all_step_keys, ['run-train', 'run-lmp', 'run-fp'],)
+    all_step_keys = get_resubmit_keys(wf)
     prt_str = print_keys_in_nice_format(
         all_step_keys, ['run-train', 'run-lmp', 'run-fp'],)
     print(prt_str)

diff --git a/dpgen2/entrypoint/submit.py b/dpgen2/entrypoint/submit.py
@@ -73,6 +73,7 @@
     sort_slice_ops,
     print_keys_in_nice_format,
     workflow_config_from_dict,
+    matched_step_key,
 )
 from dpgen2.utils.step_config import normalize as normalize_step_dict
 from dpgen2.entrypoint.submit_args import normalize as normalize_submit_args
@@ -463,6 +464,20 @@ def successful_step_keys(wf):
     return all_step_keys
 
 
+def get_resubmit_keys(
+        wf,
+):
+    all_step_keys = successful_step_keys(wf)
+    all_step_keys = matched_step_key(
+        all_step_keys,
+        ['prep-train', 'run-train', 'prep-lmp', 'run-lmp', 'select-confs', 
+         'prep-fp', 'run-fp', 'collect-data', 'scheduler', 'id'],
+    )
+    all_step_keys = sort_slice_ops(
+        all_step_keys, ['run-train', 'run-lmp', 'run-fp'],)
+    return all_step_keys
+
+
 def resubmit_concurrent_learning(
         wf_config,
         wfid,
@@ -475,10 +490,8 @@ def resubmit_concurrent_learning(
     context = wf_global_workflow(wf_config)
 
     old_wf = Workflow(id=wfid)
+    all_step_keys = get_resubmit_keys(old_wf)
 
-    all_step_keys = successful_step_keys(old_wf)
-    all_step_keys = sort_slice_ops(
-        all_step_keys, ['run-train', 'run-lmp', 'run-fp'],)
     if list_steps:
         prt_str = print_keys_in_nice_format(
             all_step_keys, ['run-train', 'run-lmp', 'run-fp'],)