Improve the document (#175)

- provide cross links when it is applicable - fix bugs - remove duplicated contents --------- Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
deepmodeling · Oct 13, 2023 · 7794611 · 7794611
1 parent 2bcc460
commit 7794611
Show file tree

Hide file tree

Showing 12 changed files with 183 additions and 72 deletions.
diff --git a/docs/index.rst b/docs/index.rst
@@ -16,7 +16,7 @@ DPGEN2 is the 2nd generation of the Deep Potential GENerator.
    cli
    input.md
    submit_args
-   dpgen2_configs
+..   dpgen2_configs
 
 .. _developer-guide:
 

diff --git a/docs/input.md b/docs/input.md
@@ -40,7 +40,7 @@ This section provides the inputs to start a dpgen2 workflow. An example for the
 	],
 }
 ```
-The key `"init_data_sys"` provides the initial training data to kick-off the training of deep potential (DP) models.
+The key {dargs:argument}`"init_data_sys" <inputs/init_data_sys>` provides the initial training data to kick-off the training of deep potential (DP) models.
 
 
 ### Training
@@ -51,15 +51,13 @@ This section defines how a model is trained.
 	"type" : "dp",
 	"numb_models" : 4,
 	"config" : {},
-	"template_script" : {
-		"_comment" : "omitted content of tempalte script"
-	},
+	"template_script" : "/path/to/the/template/input.json",
 	"_comment" : "all"
 }
 ```
-The `"type" : "dp"` tell the traning method is `"dp"`, i.e. calling [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit) to train DP models.
-The `"config"` key defines the training configs, see [the full documentation](rundptrainargs).
-The `"template_script"` provides the template training script in `json` format.
+The `"type" : "dp"` tell the traning method is {dargs:argument}`"dp" <train>`, i.e. calling [DeePMD-kit](https://github.com/deepmodeling/deepmd-kit) to train DP models.
+The `"config"` key defines the training configs, see {ref}`the full documentation<train[dp]/config>`.
+The {dargs:argument}`"template_script" <train[dp]/template_script>` provides the template training script in `json` format.
 
 
 ### Exploration
@@ -71,23 +69,28 @@ This section defines how the configuration space is explored.
 	"config" : {
 		"command": "lmp -var restart 0"
 	},
+	"convergence": {
+	    "type" :	"fixed-levels",
+	    "conv_accuracy" :	0.9,
+	    "level_f_lo":	0.05,
+	    "level_f_hi":	0.50,
+	    "_comment" : "all"
+	},
 	"max_numb_iter" :	5,
-	"conv_accuracy" :	0.9,
 	"fatal_at_max" :	false,
-	"f_trust_lo":		0.05,
-	"f_trust_hi":		0.50,
 	"configurations":	[
 		{
+		"type": "alloy",
 		"lattice" : ["fcc", 4.57],
 		"replicate" : [2, 2, 2],
 		"numb_confs" : 30,
 		"concentration" : [[1.0, 0.0], [0.5, 0.5], [0.0, 1.0]]
-		}
+		},
 		{
-		"lattice" : ["fcc", 4.57],
-		"replicate" : [3, 3, 3],
-		"numb_confs" : 30,
-		"concentration" : [[1.0, 0.0], [0.5, 0.5], [0.0, 1.0]]
+		"type" : "file",
+		"prefix": "/file/prefix",
+		"files" : ["relpath/to/confs/*"],
+		"fmt" : "deepmd/npy"
 		}
 	],
 	"stages":	[
@@ -99,7 +102,7 @@ This section defines how the configuration space is explored.
 		    "conf_idx": [0], "n_sample" : 3
 		},
 		{
-		    "_comment" : "stage 1, task group 0",
+		    "_comment" : "stage 0, task group 1",
 		    "type" : "lmp-template",
 		    "lmp" : "template.lammps", "plm" : "template.plumed",
 		    "trj_freq" : 10, "revisions" : {"V_NSTEPS" : [40], "V_TEMP" : [150, 200]},
@@ -113,20 +116,20 @@ This section defines how the configuration space is explored.
 		    "ensemble": "npt", "nsteps":  50, "press": [1e0], "temps": [50, 100, 200], "trj_freq": 10,
 		    "conf_idx": [1], "n_sample" : 3
 		}
-	    ],
-	],
+	    ]
+	]
 }
 ```
-The `"type" : "lmp"` means that configurations are explored by LAMMPS DPMD runs.
-The `"config"` key defines the lmp configs, see [the full documentation](runlmpargs).
-The `"configurations"` provides the initial configurations (coordinates of atoms and the simulation cell) of the DPMD simulations. It is a list. The elements of the list can be
+The {dargs:argument}`"type" : "lmp"<explore>` means that configurations are explored by LAMMPS DPMD runs.
+The {dargs:argument}`"config"<explore[lmp]/config>` key defines the lmp configs.
+The {dargs:argument}`"configurations"<explore[lmp]/configurations>` provides the initial configurations (coordinates of atoms and the simulation cell) of the DPMD simulations. It is a list. The elements of the list are `dict`s that defines how the configurations are generated
 
-- `list[str]`: The strings provides the path to the configuration files.
-- `dict`: Automatic alloy configuration generator. See [the detailed doc](alloy_configs) of the allowed keys.
+- Automatic alloy configuration generator. See {ref}`the detailed doc<explore[lmp]/configurations[alloy]>` for the allowed keys.
+- Configurations load from files. See {ref}`the detailed doc<explore[lmp]/configurations[file]>` for the allowed keys.
 
-The `"stages"` defines the exploration stages. It is of type `list[list[dict]]`. The outer `list` enumerate the exploration stages, the inner list enumerate the task groups of the stage. Each `dict` defines a stage. See [the full documentation of the target group](task_group_configs) for writting task groups.
+The {dargs:argument}`"stages"<explore[lmp]/stages>` defines the exploration stages. It is of type `list[list[dict]]`. The outer `list` enumerate the exploration stages, the inner list enumerate the task groups of the stage. Each `dict` defines a stage. See {ref}`the full documentation of the task group<task_group_sec>` for writting task groups.
 
-`"n_sample"` tells the number of confgiruations randomly sampled from the set picked by `"conf_idx"` from `configurations` for each exploration task. All configurations has the equal possibility to be sampled. The default value of `"n_sample"` is `null`, in this case all picked configurations are sampled. In the example, we have 3 samples for stage 0 task group 0 and 2 thermodynamic states (NVT, T=50 and 100K), then the task group has 3x2=6 NVT DPMD tasks.
+The {dargs:argument}`"n_sample"<task_group[lmp-md]/n_sample>` tells the number of confgiruations randomly sampled from the set picked by {dargs:argument}`"conf_idx"<task_group[lmp-md]/conf_idx>` from {dargs:argument}`"configurations"<explore[lmp]/configurations>` for each exploration task. All configurations has the equal possibility to be sampled. The default value of `"n_sample"` is `null`, in this case all picked configurations are sampled. In the example, we have 3 samples for stage 0 task group 0 and 2 thermodynamic states (NVT, T=50 and 100K), then the task group has 3x2=6 NVT DPMD tasks.
 
 
 ### FP
@@ -135,25 +138,27 @@ This section defines the first-principle (FP) calculation .
 
 ```json
 "fp" : {
-	"type" :	"vasp",
-	"config" : {
+	"type": "vasp",
+	"task_max":	2,
+	"run_config": {
 		"command": "source /opt/intel/oneapi/setvars.sh && mpirun -n 16 vasp_std"
 	},
-	"task_max":	2,
-	"pp_files":	{"Al" : "vasp/POTCAR.Al", "Mg" : "vasp/POTCAR.Mg"},
-	"incar":         "vasp/INCAR",
-	"_comment" : "all"
+	"inputs_config": {
+		"pp_files":	{"Al" : "vasp/POTCAR.Al", "Mg" : "vasp/POTCAR.Mg"},
+		"kspacing":	0.32,
+		"incar": "vasp/INCAR"
+	}
 }
 ```
-The `"type" : "vasp"` means that first-principles are VASP calculations.
-The `"config"` key defines the vasp configs, see [the full documentation](runvaspargs).
-The `"task_max"` key defines the maximal number of vasp calculations in each dpgen2 iteration.
-The `"pp_files"` and `"incar"` keys provides the pseudopotential files and the template incar file.
+The {dargs:argument}`"type" : "vasp"<fp>` means that first-principles are VASP calculations.
+The {dargs:argument}`"run_config"<fp[vasp]/run_config>` key defines the configs for running VASP tasks.
+The {dargs:argument}`"task_max"<fp[vasp]/task_max>` key defines the maximal number of vasp calculations in each dpgen2 iteration.
+The {dargs:argument}`"pp_files"<fp[vasp]/inputs_config/pp_files>`, {dargs:argument}`"kspacing"<fp[vasp]/inputs_config/kspacing>` and {dargs:argument}`"incar"<fp[vasp]/inputs_config/incar>` keys provides the pseudopotential files, spacing for kspace sampling and the template incar file, respectively.
 
 
 ### Configuration of dflow step
 
-The execution units of the dpgen2 are the dflow `Step`s. How each step is executed is defined by the `"step_configs"`.
+The execution units of the dpgen2 are the dflow `Step`s. How each step is executed is defined by the {dargs:argument}`"step_configs"<step_configs>`.
 ```json
 "step_configs":{
 	"prep_train_config" : {
@@ -188,14 +193,12 @@ The execution units of the dpgen2 are the dflow `Step`s. How each step is execut
 ```
 The configs for prepare training, run training, prepare exploration, run exploration, prepare fp, run fp, select configurations, collect data and concurrent learning steps are given correspondingly.
 
-The readers are refered to [this page](stepconfigargs) for a full documentation of the step configs.
-
-Any of the config in the `step_configs` can be ommitted. If so, the configs of the step is set to the default step configs, which is provided by the following section, for example,
+Any of the config in the {dargs:argument}`"step_configs"<step_configs>` can be ommitted. If so, the configs of the step is set to the default step configs, which is provided by the following section, for example,
 ```json
 "default_step_config" : {
 	"template_config" : {
 	    "image" : "dpgen2:x.x.x"
 	}
 },
 ```
-The way of writing the `default_step_config` is the same as any step config in the `step_configs`. One may refer to [this page](stepconfigargs) for full documentation.
+The way of writing the {dargs:argument}`"default_step_config"<default_step_config>` is the same as any step config in the {dargs:argument}`"step_configs"<step_configs>`.
diff --git a/docs/submit_args.rst b/docs/submit_args.rst
@@ -1,9 +1,20 @@
+.. _submitargs:
+
 Arguments of the submit script
 ==============================
 .. note::
    One can load, modify, and export the input file by using our effective web-based tool `DP-GUI <https://deepmodeling.com/dpgui/input/dpgen2-submit>`_ online or hosted using the :ref:`command line interface <cli>` :code:`dpgen2 gui`. All parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file.
 
-.. _submitargs:
 .. dargs::
    :module: dpgen2.entrypoint.args
    :func: submit_args
+
+
+.. _task_group_sec:
+
+Task group definition
+------------------
+
+.. dargs::
+   :module: dpgen2.exploration.task
+   :func: task_group_args
diff --git a/dpgen2/conf/alloy_conf.py b/dpgen2/conf/alloy_conf.py
@@ -95,12 +95,25 @@ def generate(
             ms.append(ss)
         return ms
 
+    @staticmethod
+    def doc() -> str:
+        from dpgen2.entrypoint.args import (
+            make_link,
+        )
+
+        return f"Generate alloys with {make_link('a certain lattice or user proided structure', 'explore[lmp]/configurations[alloy]/lattice')}, the elements randomly occuping the lattice with {make_link('user provided probability', 'explore[lmp]/configurations[alloy]/concentration')} ."
+
     @staticmethod
     def args() -> List[Argument]:
+        from dpgen2.entrypoint.args import (
+            make_link,
+        )
+
+        link_to_type_map = make_link("type_map", "inputs/type_map")
         doc_numb_confs = "The number of configurations to generate"
-        doc_lattice = 'The lattice. Should be a list providing [ "lattice_type", lattice_const ], or a list providing [ "/path/to/dpdata/system", "fmt" ]. The two styles are distinguished by the type of the second element.'
+        doc_lattice = 'The lattice. Should be a list providing [ "lattice_type", lattice_const ], or a list providing [ "/path/to/dpdata/system", "fmt" ]. The two styles are distinguished by the type of the second element. Currently "lattice_type" can be "bcc", "fcc", "hcp", "sc" or "diamond".'
         doc_replicate = "The number of replicates in each direction"
-        doc_concentration = "The concentration of each element. If None all elements have the same concentration"
+        doc_concentration = f"The concentration of each element. `List[List[float]]` or `List[float]` or `None`. If `List[float]`, the concentrations of each element. The length of the list should be the same as the {link_to_type_map}. If `List[List[float]]`, a list of concentrations (`List[float]`) is randomly picked from the List. If `None`, the elements are assumed to be of equal concentration."
         doc_cell_pert_frac = "The faction of cell perturbation"
         doc_atom_pert_dist = "The distance of atomic position perturbation"
 

diff --git a/dpgen2/conf/file_conf.py b/dpgen2/conf/file_conf.py
@@ -81,6 +81,10 @@ def generate_mixed(
         ms.from_deepmd_npy_mixed(self.files[0], fmt="deepmd/npy/mixed", labeled=False)  # type: ignore
         return ms
 
+    @staticmethod
+    def doc() -> str:
+        return "Generate alloys from user provided file(s). The file(s) are assume to be load by `dpdata`."
+
     @staticmethod
     def args() -> List[Argument]:
         doc_files = "The paths to the configuration files. widecards are supported."

diff --git a/dpgen2/entrypoint/args.py b/dpgen2/entrypoint/args.py
@@ -31,6 +31,13 @@
 )
 
 
+def make_link(content, ref_key):
+    raw_anchor = dargs.dargs.RAW_ANCHOR
+    return (
+        f"`{content} <{ref_key}_>`_" if not raw_anchor else f"`{content} <#{ref_key}>`_"
+    )
+
+
 def dp_dist_train_args():
     doc_config = "Configuration of training"
     doc_template_script = "File names of the template training script. It can be a `List[str]`, the length of which is the same as `numb_models`. Each template script in the list is used to train a model. Can be a `str`, the models share the same template training script. "
@@ -95,10 +102,17 @@ def variant_train():
 
 
 def variant_conv():
-    doc = "the type of the convergence check"
+    doc = "the type of the condidate selection and convergence check method."
     var_list = []
     for kk in conv_styles.keys():
-        var_list.append(Argument(kk, dict, conv_styles[kk].args()))
+        var_list.append(
+            Argument(
+                kk,
+                dict,
+                conv_styles[kk].args(),
+                doc=conv_styles[kk].doc(),
+            )
+        )
     return Variant(
         "type",
         var_list,
@@ -107,10 +121,17 @@ def variant_conv():
 
 
 def variant_conf():
-    doc = "the type of the configuration generator"
+    doc = "the type of the initial configuration generator."
     var_list = []
     for kk in conf_styles.keys():
-        var_list.append(Argument(kk, dict, conf_styles[kk].args()))
+        var_list.append(
+            Argument(
+                kk,
+                dict,
+                conf_styles[kk].args(),
+                doc=conf_styles[kk].doc(),
+            )
+        )
     return Variant(
         "type",
         var_list,
@@ -128,7 +149,12 @@ def lmp_args():
     doc_convergence = "The method of convergence check."
     doc_configuration_prefix = "The path prefix of lmp initial configurations"
     doc_configuration = "A list of initial configurations."
-    doc_stages = "A list of exploration stages."
+    doc_stages = (
+        "The definition of exploration stages of type `List[List[ExplorationTaskGroup]`. "
+        "The outer list provides the enumeration of the exploration stages. "
+        "Then each stage is defined by a list of exploration task groups. "
+        "Each task group is described in :ref:`the task group definition<task_group_sec>` "
+    )
 
     return [
         Argument(
@@ -156,13 +182,6 @@ def lmp_args():
             optional=False,
             doc=doc_convergence,
         ),
-        Argument(
-            "configuration_prefix",
-            str,
-            optional=True,
-            default=None,
-            doc=doc_configuration_prefix,
-        ),
         Argument(
             "configurations",
             list,
@@ -178,11 +197,12 @@ def lmp_args():
 
 
 def variant_explore():
-    doc = "the type of the exploration"
+    doc = "The type of the exploration"
+    doc_lmp = "The exploration by LAMMPS simulations"
     return Variant(
         "type",
         [
-            Argument("lmp", dict, lmp_args()),
+            Argument("lmp", dict, lmp_args(), doc=doc_lmp),
         ],
         doc=doc,
     )
@@ -232,15 +252,16 @@ def input_args():
     doc_type_map = 'The type map. e.g. ["Al", "Mg"]. Al and Mg will have type 0 and 1, respectively.'
     doc_mass_map = "The mass map. e.g. [27., 24.]. Al and Mg will be set with mass 27. and 24. amu, respectively."
     doc_mixed_type = "Use `deepmd/npy/mixed` format for storing training data."
-    doc_do_finetune = """Finetune the pretrained model before the first iteration. If it is set to True, then an additional step, finetune-step,
-                       which is based on a branch of "PrepRunDPTrain," will be added before the dpgen_step. In the
-                       finetune-step, the internal flag finetune_mode is set to "finetune," which means SuperOP "PrepRunDPTrain"
-                       is now used as the "Finetune." In this step, we finetune the pretrained model in the train step and modify
-                       the template after training. After that, in the normal dpgen-step, the flag do_finetune is set as "train-init,"
-                       which means we use --init-frz-model to train based on models from the previous iteration. The "do_finetune" flag
-                       is set to False by default, while the internal flag finetune_mode is set to "no," which means anything related
-                       to finetuning will not be done.
-                       """
+    doc_do_finetune = (
+        "Finetune the pretrained model before the first iteration. If it is set to True, then an additional step, finetune-step, "
+        'which is based on a branch of "PrepRunDPTrain," will be added before the dpgen_step. In the '
+        'finetune-step, the internal flag finetune_mode is set to "finetune," which means SuperOP "PrepRunDPTrain" '
+        'is now used as the "Finetune." In this step, we finetune the pretrained model in the train step and modify '
+        'the template after training. After that, in the normal dpgen-step, the flag do_finetune is set as "train-init," '
+        'which means we use `--init-frz-model` to train based on models from the previous iteration. The "do_finetune" flag '
+        'is set to False by default, while the internal flag finetune_mode is set to "no," which means anything related '
+        "to finetuning will not be done."
+    )
     doc_do_finetune = textwrap.dedent(doc_do_finetune)
     doc_init_data_prefix = "The prefix of initial data systems"
     doc_init_sys = "The inital data systems"

diff --git a/dpgen2/exploration/report/report_adaptive_lower.py b/dpgen2/exploration/report/report_adaptive_lower.py
@@ -128,6 +128,27 @@ def __init__(
         self.fmt_flt = "%.4f"
         self.header_str = "#" + self.fmt_str % print_tuple
 
+    @staticmethod
+    def doc() -> str:
+        def make_class_doc_link(key):
+            from dpgen2.entrypoint.args import (
+                make_link,
+            )
+
+            return make_link(key, f"explore[lmp]/convergence[adaptive-lower]/{key}")
+
+        numb_candi_f_link = make_class_doc_link("numb_candi_f")
+        rate_candi_f_link = make_class_doc_link("rate_candi_f")
+        numb_candi_v_link = make_class_doc_link("numb_candi_v")
+        rate_candi_v_link = make_class_doc_link("rate_candi_v")
+        numb_candi_s = f"{numb_candi_f_link} or {numb_candi_v_link}"
+        rate_candi_s = f"{rate_candi_f_link} or {rate_candi_v_link}"
+        level_f_hi_link = make_class_doc_link("level_f_hi")
+        level_v_hi_link = make_class_doc_link("level_v_hi")
+        conv_tolerance_link = make_class_doc_link("conv_tolerance")
+        n_checked_steps_link = make_class_doc_link("n_checked_steps")
+        return f"The method of adaptive adjust the lower trust levels. In each step of iterations, a number (set by {numb_candi_s}) or a ratio (set by {rate_candi_s}) of configurations with a model deviation lower than the higher trust level ({level_f_hi_link}, {level_v_hi_link}) are treated as candidates. The lowest model deviation of the candidates are treated as the lower trust level. If the lower trust level does not change significant (controlled by {conv_tolerance_link}) in {n_checked_steps_link}, the stage is treated as converged. "
+
     @staticmethod
     def args() -> List[Argument]:
         doc_level_f_hi = "The higher trust level of force model deviation"