support finetune on pretrained models (#152)

WIP, building unittests. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
deepmodeling · May 22, 2023 · 64363df · 64363df
1 parent b816df3
commit 64363df
Show file tree

Hide file tree

Showing 23 changed files with 707 additions and 27 deletions.
diff --git a/dpgen2/entrypoint/args.py b/dpgen2/entrypoint/args.py
@@ -1,3 +1,5 @@
+import textwrap
+
 import dargs
 from dargs import (
     Argument,
@@ -232,6 +234,16 @@ def input_args():
     doc_type_map = 'The type map. e.g. ["Al", "Mg"]. Al and Mg will have type 0 and 1, respectively.'
     doc_mass_map = "The mass map. e.g. [27., 24.]. Al and Mg will be set with mass 27. and 24. amu, respectively."
     doc_mixed_type = "Use `deepmd/npy/mixed` format for storing training data."
+    doc_do_finetune = """Finetune the pretrained model before the first iteration. If it is set to True, then an additional step, finetune-step,
+                       which is based on a branch of "PrepRunDPTrain," will be added before the dpgen_step. In the
+                       finetune-step, the internal flag finetune_mode is set to "finetune," which means SuperOP "PrepRunDPTrain"
+                       is now used as the "Finetune." In this step, we finetune the pretrained model in the train step and modify
+                       the template after training. After that, in the normal dpgen-step, the flag do_finetune is set as "train-init,"
+                       which means we use --init-frz-model to train based on models from the previous iteration. The "do_finetune" flag
+                       is set to False by default, while the internal flag finetune_mode is set to "no," which means anything related
+                       to finetuning will not be done.
+                       """
+    doc_do_finetune = textwrap.dedent(doc_do_finetune)
     doc_init_data_prefix = "The prefix of initial data systems"
     doc_init_sys = "The inital data systems"
 
@@ -246,6 +258,9 @@ def input_args():
             doc=doc_init_data_prefix,
         ),
         Argument("mixed_type", bool, optional=True, default=False, doc=doc_mixed_type),
+        Argument(
+            "do_finetune", bool, optional=True, default=False, doc=doc_do_finetune
+        ),
         Argument(
             "init_data_sys", [list, str], optional=False, default=None, doc=doc_init_sys
         ),

diff --git a/dpgen2/entrypoint/submit.py b/dpgen2/entrypoint/submit.py
@@ -11,6 +11,8 @@
     Dict,
     List,
     Optional,
+    Tuple,
+    Type,
     Union,
 )
 
@@ -305,14 +307,60 @@ def get_kspacing_kgamma_from_incar(
 
 def make_optional_parameter(
     mixed_type=False,
+    finetune_mode="no",
 ):
-    return {"data_mixed_type": mixed_type}
+    return {"data_mixed_type": mixed_type, "finetune_mode": finetune_mode}
+
+
+def make_finetune_step(
+    config,
+    prep_train_config,
+    run_train_config,
+    upload_python_packages,
+    numb_models,
+    template_script,
+    train_config,
+    init_models,
+    init_data,
+    iter_data,
+):
+    finetune_optional_parameter = {
+        "mixed_type": config["inputs"]["mixed_type"],
+        "finetune_mode": "finetune",
+    }
+
+    finetune_op = PrepRunDPTrain(
+        "finetune",
+        PrepDPTrain,
+        RunDPTrain,
+        prep_config=prep_train_config,
+        run_config=run_train_config,
+        upload_python_packages=upload_python_packages,
+        finetune=True,
+    )
+    finetune_step = Step(
+        "finetune-step",
+        template=finetune_op,
+        parameters={
+            "block_id": "finetune",
+            "numb_models": numb_models,
+            "template_script": template_script,
+            "train_config": train_config,
+            "run_optional_parameter": finetune_optional_parameter,
+        },
+        artifacts={
+            "init_models": init_models,
+            "init_data": init_data,
+            "iter_data": iter_data,
+        },
+    )
+    return finetune_step
 
 
 def workflow_concurrent_learning(
     config: Dict,
     old_style: bool = False,
-):
+) -> Tuple[Step, Optional[Step]]:
     default_config = (
         normalize_step_dict(config.get("default_config", {}))
         if old_style
@@ -493,10 +541,34 @@ def workflow_concurrent_learning(
     else:
         init_models = None
 
+    finetune_step = None
     optional_parameter = make_optional_parameter(
         config["inputs"]["mixed_type"],
     )
 
+    if config["inputs"].get("do_finetune", False):
+
+        finetune_step = make_finetune_step(
+            config,
+            prep_train_config,
+            run_train_config,
+            upload_python_packages,
+            numb_models,
+            template_script,
+            train_config,
+            init_models,
+            init_data,
+            iter_data,
+        )
+
+        init_models = finetune_step.outputs.artifacts["models"]
+        template_script = finetune_step.outputs.parameters["template_script"]
+
+        optional_parameter = make_optional_parameter(
+            config["inputs"]["mixed_type"],
+            finetune_mode="train-init",
+        )
+
     # here the scheduler is passed as input parameter to the concurrent_learning_op
     dpgen_step = Step(
         "dpgen-step",
@@ -517,7 +589,7 @@ def workflow_concurrent_learning(
             "iter_data": iter_data,
         },
     )
-    return dpgen_step
+    return dpgen_step, finetune_step
 
 
 def get_scheduler_ids(
@@ -603,7 +675,9 @@ def submit_concurrent_learning(
 
     global_config_workflow(wf_config)
 
-    dpgen_step = workflow_concurrent_learning(wf_config, old_style=old_style)
+    dpgen_step, finetune_step = workflow_concurrent_learning(
+        wf_config, old_style=old_style
+    )
 
     if reuse_step is not None and replace_scheduler:
         scheduler_new = copy.deepcopy(
@@ -639,8 +713,15 @@ def submit_concurrent_learning(
             "conf_selector",
             selector,
         )
+        wf_config["inputs"]["do_finetune"] = False
+        # finetune will not be done again if the old process is reused.
 
     wf = Workflow(name="dpgen")
+
+    if wf_config["inputs"].get("do_finetune", False):
+        assert finetune_step is not None
+        wf.add(finetune_step)
+
     wf.add(dpgen_step)
 
     # for debug purpose, we may not really submit the wf

diff --git a/dpgen2/exploration/task/conf_sampling_task_group.py b/dpgen2/exploration/task/conf_sampling_task_group.py
@@ -56,7 +56,7 @@ def set_conf(
 
     def _sample_confs(
         self,
-    ):
+    ) -> list:
         confs = []
         for ii in range(self.n_sample):
             if len(self.conf_queue) == 0:

diff --git a/dpgen2/exploration/task/lmp_template_task_group.py b/dpgen2/exploration/task/lmp_template_task_group.py
@@ -81,7 +81,7 @@ def make_task(
             templates.append(self.plm_template)
         conts = self.make_cont(templates, self.revisions)
         nconts = len(conts[0])
-        for cc, ii in itertools.product(confs, range(nconts)):
+        for cc, ii in itertools.product(confs, range(nconts)):  # type: ignore
             if not self.plm_set:
                 self.add_task(self._make_lmp_task(cc, conts[0][ii]))
             else:
@@ -165,7 +165,7 @@ def revise_lmp_input_plm(lmp_lines, in_plm, out_plm="output.plumed"):
 
 
 def revise_by_keys(lmp_lines, keys, values):
-    for kk, vv in zip(keys, values):
+    for kk, vv in zip(keys, values):  # type: ignore
         for ii in range(len(lmp_lines)):
             lmp_lines[ii] = lmp_lines[ii].replace(kk, str(vv))
     return lmp_lines
diff --git a/dpgen2/exploration/task/npt_task_group.py b/dpgen2/exploration/task/npt_task_group.py
@@ -95,7 +95,7 @@ def make_task(
         # clear all existing tasks
         self.clear()
         confs = self._sample_confs()
-        for cc, tt, pp in itertools.product(confs, self.temps, self.press):
+        for cc, tt, pp in itertools.product(confs, self.temps, self.press):  # type: ignore
             self.add_task(self._make_lmp_task(cc, tt, pp))
         return self
 

diff --git a/dpgen2/flow/dpgen_loop.py b/dpgen2/flow/dpgen_loop.py
@@ -66,12 +66,14 @@
 
 cl_default_optional_parameter = {
     "data_mixed_type": False,
+    "finetune_mode": "no",
 }
 
 
 def make_block_optional_parameter(cl_optional_parameter):
     return {
         "data_mixed_type": cl_optional_parameter["data_mixed_type"],
+        "finetune_mode": cl_optional_parameter["finetune_mode"],
     }
 
 

diff --git a/dpgen2/fp/vasp_input.py b/dpgen2/fp/vasp_input.py
@@ -117,7 +117,7 @@ def make_kspacing_kpoints(box, kspacing, kgamma):
     rbox = _reciprocal_box(box)
     kpoints = [
         max(1, (np.ceil(2 * np.pi * np.linalg.norm(ii) / ks).astype(int)))
-        for ii, ks in zip(rbox, kspacing)
+        for ii, ks in zip(rbox, kspacing)  # type: ignore
     ]
     ret = _make_vasp_kpoints(kpoints, kgamma)
     return ret

diff --git a/dpgen2/op/run_dp_train.py b/dpgen2/op/run_dp_train.py
@@ -51,6 +51,7 @@ class RunDPTrain(OP):
 
     default_optional_parameter = {
         "mixed_type": False,
+        "finetune_mode": "no",
     }
 
     @classmethod
@@ -115,6 +116,7 @@ def execute(
             On the failure of training or freezing. Human intervention needed.
         """
         mixed_type = ip["optional_parameter"]["mixed_type"]
+        finetune_mode = ip["optional_parameter"]["finetune_mode"]
         config = ip["config"] if ip["config"] is not None else {}
         config = RunDPTrain.normalize_config(config)
         task_name = ip["task_name"]
@@ -159,7 +161,9 @@ def execute(
             train_dict, config, do_init_model, major_version
         )
 
-        if RunDPTrain.skip_training(work_dir, train_dict, init_model, iter_data):
+        if RunDPTrain.skip_training(
+            work_dir, train_dict, init_model, iter_data, finetune_mode
+        ):
             return OPIO(
                 {
                     "script": work_dir / train_script_name,
@@ -181,14 +185,22 @@ def clean_before_quit():
                 json.dump(train_dict, fp, indent=4)
 
             # train model
-            if do_init_model:
+            if do_init_model or finetune_mode == "train-init":
                 command = [
                     "dp",
                     "train",
                     "--init-frz-model",
                     str(init_model),
                     train_script_name,
                 ]
+            elif finetune_mode == "finetune":
+                command = [
+                    "dp",
+                    "train",
+                    train_script_name,
+                    "--finetune",
+                    str(init_model),
+                ]
             else:
                 command = ["dp", "train", train_script_name]
             ret, out, err = run_command(command)
@@ -202,6 +214,9 @@ def clean_before_quit():
             fplog.write("#=================== train std err ===================\n")
             fplog.write(err)
 
+            if finetune_mode == "finetune" and os.path.exists("input_v2_compat.json"):
+                shutil.copy2("input_v2_compat.json", train_script_name)
+
             # freeze model
             ret, out, err = run_command(["dp", "freeze", "-o", "frozen_model.pb"])
             if ret != 0:
@@ -280,8 +295,13 @@ def skip_training(
         train_dict,
         init_model,
         iter_data,
+        finetune_mode,
     ):
         # we have init model and no iter data, skip training
+        if finetune_mode is not None and (
+            finetune_mode == "train-init" or finetune_mode == "finetune"
+        ):
+            return False
         if (init_model is not None) and (iter_data is None or len(iter_data) == 0):
             with set_directory(work_dir):
                 with open(train_script_name, "w") as fp:
@@ -345,7 +365,6 @@ def training_args():
         doc_init_model_start_pref_v = (
             "The start virial prefactor in loss when init-model"
         )
-
         return [
             Argument(
                 "init_model_policy",

diff --git a/dpgen2/op/select_confs.py b/dpgen2/op/select_confs.py
@@ -107,7 +107,7 @@ def validate_trajs(
             )
         rett = []
         retm = []
-        for tt, mm in zip(trajs, model_devis):
+        for tt, mm in zip(trajs, model_devis):  # type: ignore
             if (tt is None and mm is not None) or (tt is not None and mm is None):
                 raise FatalError("trajs frame is {tt} while model_devis frame is {mm}")
             elif tt is not None and mm is not None:

diff --git a/dpgen2/superop/block.py b/dpgen2/superop/block.py
@@ -60,6 +60,7 @@
 
 block_default_optional_parameter = {
     "data_mixed_type": False,
+    "finetune_mode": "no",
 }
 
 
@@ -72,6 +73,7 @@ def make_collect_data_optional_parameter(block_optional_parameter):
 def make_run_dp_train_optional_parameter(block_optional_parameter):
     return {
         "mixed_type": block_optional_parameter["data_mixed_type"],
+        "finetune_mode": block_optional_parameter["finetune_mode"],
     }