refactor fp. easier to implement new fp methods (#98)

* refactor fp. easier to implement new fp methods * fix bugs * fix UT for latest pydflow * define PrepFP and RunFp as ABC. Add abstractmethod to them. Add doc strs * fix bugs * add static abstract `args` method to RunFp. mv normalize_config to RunFp. * add doc str for normalize_config Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
deepmodeling · Dec 19, 2022 · f7cb932 · f7cb932
1 parent e5d3b9f
commit f7cb932
Show file tree

Hide file tree

Showing 20 changed files with 677 additions and 397 deletions.
diff --git a/dpgen2/constants.py b/dpgen2/constants.py
@@ -12,14 +12,10 @@
 lmp_traj_name = 'traj.dump'
 lmp_log_name = 'log.lammps'
 lmp_model_devi_name = 'model_devi.out'
-vasp_index_pattern = '%06d'
-vasp_task_pattern = 'task.' + vasp_index_pattern
-vasp_conf_name = 'POSCAR'
-vasp_input_name = 'INCAR'
-vasp_pot_name = 'POTCAR'
-vasp_kp_name = 'KPOINTS'
-vasp_default_log_name = 'vasp.log'
-vasp_default_out_data_name = 'data'
+fp_index_pattern = '%06d'
+fp_task_pattern = 'task.' + fp_index_pattern
+fp_default_log_name = 'fp.log'
+fp_default_out_data_name = 'data'
 
 default_image = 'dptechnology/dpgen2:latest'
 default_host = '127.0.0.1:2746'
diff --git a/dpgen2/entrypoint/submit.py b/dpgen2/entrypoint/submit.py
@@ -31,8 +31,6 @@
     RunDPTrain,
     PrepLmp,
     RunLmp,
-    PrepVasp,
-    RunVasp,
     SelectConfs,
     CollectData,
 )
@@ -45,9 +43,7 @@
 from dpgen2.flow import (
     ConcurrentLearning,
 )
-from dpgen2.fp import (
-    VaspInputs,
-)
+from dpgen2.fp import fp_styles
 from dpgen2.exploration.scheduler import (
     ExplorationScheduler,
     ConvergenceCheckStageScheduler,
@@ -135,11 +131,12 @@ def make_concurrent_learning_op (
         )
     else:
         raise RuntimeError(f'unknown explore_style {explore_style}')
-    if fp_style == 'vasp':
+
+    if fp_style in fp_styles.keys():        
         prep_run_fp_op = PrepRunFp(
-            "prep-run-vasp",
-            PrepVasp,
-            RunVasp,
+            f"prep-run-fp",
+            fp_styles[fp_style]['prep'],
+            fp_styles[fp_style]['run'],
             prep_config = prep_fp_config,
             run_config = run_fp_config,
             upload_python_packages = upload_python_packages,
@@ -344,16 +341,23 @@ def workflow_concurrent_learning(
     train_config = {} if old_style else config['train']['config']
     lmp_config = config.get('lmp_config', {}) if old_style else config['explore']['config']
     fp_config = config.get('fp_config', {}) if old_style else config['fp']['config']
-    kspacing, kgamma = get_kspacing_kgamma_from_incar(config['fp_incar'] if old_style else config['fp']['incar'])
-    fp_pp_files = config['fp_pp_files'] if old_style else config['fp']['pp_files']
-    incar_file = config['fp_incar'] if old_style else config['fp']['incar']
-    fp_inputs = VaspInputs(
-        kspacing = kspacing,
-        kgamma = kgamma,
-        incar_template_name = incar_file,
-        potcar_names = fp_pp_files,
-    )
+    if old_style:        
+        potcar_names = config['fp_pp_files']
+        incar_template_name = config['fp_incar']
+        kspacing, kgamma = get_kspacing_kgamma_from_incar(incar_template_name)
+        fp_inputs_config = {
+            'kspacing' : kspacing,
+            'kgamma' : kgamma,
+            'incar_template_name' : incar_template_name,
+            'potcar_names' : potcar_names,
+        }
+    else:
+        fp_inputs_config = config['fp']['inputs_config']
+    fp_inputs = fp_styles[fp_style]['inputs'](**fp_inputs_config)
+
     fp_config['inputs'] = fp_inputs
+    fp_config['run'] = config['fp']['run_config']
+
     init_data_prefix = config.get('init_data_prefix') if old_style else config['inputs']['init_data_prefix']
     init_data = config['init_data_sys'] if old_style else config['inputs']['init_data_sys']
     if init_data_prefix is not None:

diff --git a/dpgen2/entrypoint/submit_args.py b/dpgen2/entrypoint/submit_args.py
@@ -7,11 +7,11 @@
 from dflow.plugins.lebesgue import LebesgueExecutor
 from dpgen2.op.run_dp_train import RunDPTrain
 from dpgen2.op.run_lmp import RunLmp
-from dpgen2.op.run_vasp import RunVasp
 from dpgen2.utils import (
     step_conf_args,
     normalize_step_dict,
 )
+from dpgen2.fp import fp_styles
 
 def dp_train_args():
     doc_numb_models = "Number of models trained for evaluating the model deviation"
@@ -63,24 +63,41 @@ def variant_explore():
         Argument("lmp", dict, lmp_args()),
     ], doc=doc)
 
-def vasp_args():
-    doc_config = "Configuration of vasp runs"
+
+def fp_args(inputs, run):
+    doc_inputs_config = "Configuration for preparing vasp inputs"
+    doc_run_config = "Configuration for running vasp tasks"
     doc_task_max = "Maximum number of vasp tasks for each iteration"
-    doc_pp_files = 'The pseudopotential files set by a dict, e.g. {"Al" : "path/to/the/al/pp/file", "Mg" : "path/to/the/mg/pp/file"}'
-    doc_incar = "The path to the template incar file"
 
     return [
-        Argument("config", dict, RunVasp.vasp_args(), optional=True, default=RunVasp.normalize_config({}), doc=doc_config),
+        Argument("inputs_config", dict, 
+                 inputs.args(),
+                 optional=False,
+                 doc=doc_inputs_config,
+                 ),
+        Argument("run_config", dict,
+                 run.args(),
+                 optional=False,
+                 doc=doc_run_config,
+                 ),
         Argument("task_max", int, optional=True, default=10, doc=doc_task_max),
-        Argument("pp_files", dict, optional=False, doc=doc_pp_files),
-        Argument("incar", str, optional=False, doc=doc_pp_files),
     ]
 
+
 def variant_fp():
     doc = "the type of the fp"
-    return Variant("type", [
-        Argument("vasp", dict, vasp_args()),
-    ], doc=doc)
+
+    fp_list = []
+    for kk in fp_styles.keys():
+        fp_list.append(
+            Argument(
+                kk,
+                dict, 
+                fp_args(fp_styles[kk]['inputs'], fp_styles[kk]['run']),
+            ))
+
+    return Variant("type", fp_list, doc=doc)
+
 
 def input_args():
     doc_type_map = 'The type map. e.g. ["Al", "Mg"]. Al and Mg will have type 0 and 1, respectively.'

diff --git a/dpgen2/fp/__init__.py b/dpgen2/fp/__init__.py
@@ -1 +1,13 @@
-from .vasp import VaspInputs
+from .vasp import (
+    VaspInputs,
+    PrepVasp,
+    RunVasp,
+)
+
+fp_styles = {
+    "vasp" :  {
+        "inputs" : VaspInputs,
+        "prep" : PrepVasp,
+        "run" : RunVasp,
+    }
+}
diff --git a/dpgen2/op/prep_vasp.py → dpgen2/fp/prep_fp.py b/dpgen2/op/prep_vasp.py → dpgen2/fp/prep_fp.py
@@ -1,3 +1,4 @@
+from abc import ABC, abstractmethod
 import dpdata
 from dflow.python import (
     OP,
@@ -13,25 +14,21 @@
     Set, 
     Dict,
     Union,
+    Any,
 )
 from pathlib import Path
-from dpgen2.fp.vasp import VaspInputs
 from dpgen2.utils import (
     set_directory,
 )
 from dpgen2.constants import (
-    vasp_task_pattern,
-    vasp_conf_name,
-    vasp_input_name,
-    vasp_pot_name,
-    vasp_kp_name,
+    fp_task_pattern,
 )
 
-class PrepVasp(OP):
-    r"""Prepares the working directories for VASP tasks.
+class PrepFp(OP, ABC):
+    r"""Prepares the working directories for first-principles (FP) tasks.
 
     A list of (same length as ip["confs"]) working directories
-    containing all files needed to start VASP tasks will be
+    containing all files needed to start FP tasks will be
     created. The paths of the directories will be returned as
     `op["task_paths"]`. The identities of the tasks are returned as
     `op["task_names"]`.
@@ -53,6 +50,23 @@ def get_output_sign(cls):
             "task_paths" : Artifact(List[Path]),
         })
 
+    @abstractmethod
+    def prep_task(
+            self,
+            conf_frame: dpdata.System,
+            inputs: Any,
+    ):
+        r"""Define how one FP task is prepared.
+
+        Parameters
+        ----------
+        conf_frame : dpdata.System
+            One frame of configuration in the dpdata format.
+        inputs: Any
+            The class object handels all other input files of the task. 
+            For example, pseudopotential file, k-point file and so on.
+        """
+        pass
 
     @OP.exec_sign_check
     def execute(
@@ -66,16 +80,16 @@ def execute(
         ip : dict
             Input dict with components:
 
-            - `config` : (`dict`) Should have `config['inputs']`, which is of type `VaspInputs` and definites the VASP inputs
-            - `confs` : (`Artifact(List[Path])`) Configurations for the VASP tasks. Stored in folders as deepmd/npy format. Can be parsed as dpdata.MultiSystems. 
+            - `config` : (`dict`) Should have `config['inputs']`, which defines the input files of the FP task.
+            - `confs` : (`Artifact(List[Path])`) Configurations for the FP tasks. Stored in folders as deepmd/npy format. Can be parsed as dpdata.MultiSystems. 
         
         Returns
         -------
         op : dict 
             Output dict with components:
 
             - `task_names`: (`List[str]`) The name of tasks. Will be used as the identities of the tasks. The names of different tasks are different.
-            - `task_paths`: (`Artifact(List[Path])`) The parepared working paths of the tasks. Contains all input files needed to start the VASP. The order fo the Paths should be consistent with `op["task_names"]`
+            - `task_paths`: (`Artifact(List[Path])`) The parepared working paths of the tasks. Contains all input files needed to start the FP. The order fo the Paths should be consistent with `op["task_names"]`
         """
 
         inputs = ip['config']['inputs']
@@ -107,22 +121,11 @@ def execute(
     def _exec_one_frame(
             self,
             idx,
-            vasp_inputs : VaspInputs,
+            inputs,
             conf_frame : dpdata.System,
     ) -> Tuple[str, Path]:
-        task_name = vasp_task_pattern % idx
+        task_name = fp_task_pattern % idx
         task_path = Path(task_name)
         with set_directory(task_path):
-            conf_frame.to('vasp/poscar', vasp_conf_name)
-            Path(vasp_input_name).write_text(
-                vasp_inputs.incar_template
-            )
-            # fix the case when some element have 0 atom, e.g. H0O2
-            tmp_frame = dpdata.System(vasp_conf_name, fmt='vasp/poscar')
-            Path(vasp_pot_name).write_text(
-                vasp_inputs.make_potcar(tmp_frame['atom_names'])
-            )
-            Path(vasp_kp_name).write_text(
-                vasp_inputs.make_kpoints(conf_frame['cells'][0])
-            )
+            self.prep_task(conf_frame, inputs)
         return task_name, task_path