From b7843512d818f0a0cd66c16de2bf48499fb7e501 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 9 May 2022 03:42:29 -0400 Subject: [PATCH 01/25] add link to DP-GUI (#724) --- doc/run-mdata.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/run-mdata.rst b/doc/run-mdata.rst index a7b21c75c..4f92fc4fc 100644 --- a/doc/run-mdata.rst +++ b/doc/run-mdata.rst @@ -1,4 +1,6 @@ dpgen run machine parameters ============================ +.. note:: + One can load, modify, and export the input file by using our effective web-based tool `DP-GUI `_. All parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file. .. include:: run-mdata-auto.rst From f66c2a79ebcc4a1ba5263f2013593f7b7c1056ba Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 23 May 2022 05:29:24 -0400 Subject: [PATCH 02/25] add api to doc (#736) --- .gitignore | 1 + doc/conf.py | 28 ++++++++++++++++++++++++++-- doc/index.rst | 1 + 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 1aa647f54..936bfd426 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,4 @@ dbconfig.json .idea/* _build tests/generator/calypso_test_path +doc/api/ diff --git a/doc/conf.py b/doc/conf.py index 476146c6a..9d5ecc006 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -43,14 +43,15 @@ "sphinx_rtd_theme", 'myst_parser', 'sphinx.ext.autosummary', + 'sphinx.ext.viewcode', ] # Tell sphinx what the primary language being documented is. -primary_domain = 'cpp' +primary_domain = 'py' # Tell sphinx what the pygments highlight language should be. -highlight_language = 'cpp' +highlight_language = 'py' # myst_heading_anchors = 4 @@ -81,9 +82,32 @@ autosummary_generate = True master_doc = 'index' +intersphinx_mapping = { + "python": ("https://docs.python.org/", None), + "dargs": ("https://docs.deepmodeling.com/projects/dargs/en/latest/", None), + "dpdata": ("https://docs.deepmodeling.com/projects/dpdata/en/latest/", None), + "dpdispatcher": ("https://docs.deepmodeling.com/projects/dpdispatcher/en/latest/", None), + "ase": ("https://wiki.fysik.dtu.dk/ase/", None), + "numpy": ("https://docs.scipy.org/doc/numpy/", None), + "pamatgen": ("https://pymatgen.org/", None), + "monty": ("https://guide.materialsvirtuallab.org/monty/", None), + "paramiko": ("https://docs.paramiko.org/en/stable/", None), + "custodian": ("https://cloudcustodian.io/docs/", None), + "GromacsWrapper": ("https://gromacswrapper.readthedocs.io/en/latest/", None), +} + + +def run_apidoc(_): + from sphinx.ext.apidoc import main + sys.path.append(os.path.join(os.path.dirname(__file__), '..')) + cur_dir = os.path.abspath(os.path.dirname(__file__)) + module = os.path.join(cur_dir, "..", "dpgen") + main(['-M', '--tocfile', 'api', '-H', 'DP-GEN API', '-o', os.path.join(cur_dir, "api"), module, '--force']) + def generate_arginfo(app): subprocess.check_output((sys.executable, "gen_arginfo.py"), universal_newlines=True) def setup(app): + app.connect('builder-inited', run_apidoc) app.connect('builder-inited', generate_arginfo) diff --git a/doc/index.rst b/doc/index.rst index aaf253448..1ad29e4e7 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -27,6 +27,7 @@ DPGEN's documentation :caption: Contribution Guild README.md + api/api * :ref:`genindex` * :ref:`modindex` From e3671b04fc8d4d00bf85364cec2220d064c9f72d Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 30 May 2022 20:40:23 -0400 Subject: [PATCH 03/25] add DPRc implementation (#729) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add amber model_devi_engine and amber/corr fp_style (and make the code more like a 💩 hill) * fix show error * use new dpdispatcher * update new dpdispatcher * update for new dpdispatcher * support dpdispatcher * fix bug when using new dpdispatcher * update for new dpdispatcher init_bulk * update examples for new dpdispatcher * fix install * fix typo use LooseVersion Co-authored-by: Jinzhe Zeng * update LooseVersion * update dependency dpdispatcher min version * kk * support hybrid descriptor (fix #458) (#459) * support multiple param7 files * hdf5 & other improvements * disable sort * kk * migrate parameters into parm file * fix the issue to detect the list in the yaml * append ref to sander command * bugfix * skip the first "active learning" line * bugfix * separate low level and high level mdin * add quote to dpamber command * Revert "skip the first "active learning" line" This reverts commit 3ac402668383d920ab22bba75034f7701d519fb7. * only skip first active line if not restarting * generate sub models * fix the bug of sys_counter * set the seed of type_embedding network * sync with devel * sync with devel * make symlink symlink * fix * fix merge issue * rewrite simplify; support hdf5 as initial training data * bugfix * fix typo * revert changes to simplify; going to submit in another PR * revert training * revert changes * fix bug * add comments * add comments * add tests * fix typo * add an example * add netCDF4 to require * raise RuntimeError when model_devi_engine is unknown Co-authored-by: Yuan Fengbo Co-authored-by: felix5572 Co-authored-by: Yuan Fengbo (袁奉博) <757627927@qq.com> --- dpgen/generator/run.py | 479 +++++++++++++++++++++++--- examples/run/dprc/README.md | 5 + examples/run/dprc/generator.yaml | 215 ++++++++++++ setup.py | 2 +- tests/generator/amber/init_-1.20.rst7 | 0 tests/generator/amber/ml.mdin | 77 +++++ tests/generator/context.py | 1 + tests/generator/param-amber.json | 54 +++ tests/generator/test_make_fp.py | 21 ++ tests/generator/test_make_md.py | 23 +- tests/generator/test_post_fp.py | 19 + 11 files changed, 854 insertions(+), 42 deletions(-) create mode 100644 examples/run/dprc/README.md create mode 100644 examples/run/dprc/generator.yaml create mode 100644 tests/generator/amber/init_-1.20.rst7 create mode 100644 tests/generator/amber/ml.mdin create mode 100644 tests/generator/param-amber.json diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index dc7a91d3b..d06c137b3 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -27,6 +27,7 @@ import subprocess as sp import scipy.constants as pc from collections import Counter +from collections.abc import Iterable from distutils.version import LooseVersion from typing import List from numpy.linalg import norm @@ -838,7 +839,8 @@ def make_model_devi (iter_index, ss = sys_configs[idx] for ii in ss : cur_systems += glob.glob(ii) - cur_systems.sort() + # cur_systems should not be sorted, as we may add specific constrict to the similutions + #cur_systems.sort() cur_systems = [os.path.abspath(ii) for ii in cur_systems] conf_systems.append (cur_systems) @@ -898,6 +900,12 @@ def make_model_devi (iter_index, system.to_lammps_lmp(os.path.join(conf_path, lmp_name)) elif model_devi_engine == "gromacs": pass + elif model_devi_engine == "amber": + # Jinzhe's specific Amber version + conf_name = make_model_devi_conf_name(sys_idx[sys_counter], conf_counter) + rst7_name = conf_name + '.rst7' + # link restart file + os.symlink(cc, os.path.join(conf_path, rst7_name)) conf_counter += 1 sys_counter += 1 @@ -913,6 +921,8 @@ def make_model_devi (iter_index, _make_model_devi_native(iter_index, jdata, mdata, conf_systems) elif model_devi_engine == "gromacs": _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems) + elif model_devi_engine == "amber": + _make_model_devi_amber(iter_index, jdata, mdata, conf_systems) elif model_devi_engine == "calypso": _make_model_devi_native_calypso(iter_index,model_devi_jobs, calypso_run_opt_path) # generate input.dat automatic in each iter else: @@ -1269,7 +1279,165 @@ def _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems): conf_counter += 1 sys_counter += 1 +def _make_model_devi_amber(iter_index: int, jdata: dict, mdata: dict, conf_systems: list): + """Make amber's MD inputs. + + Parameters + ---------- + iter_index : int + iter index + jdata : dict + run parameters. The following parameters will be used in this method: + model_devi_jobs : list[dict] + The list including the dict for information of each cycle: + sys_idx : list[int] + list of systems to run + trj_freq : int + freq to dump trajectory + low_level : str + low level method + cutoff : float + cutoff radius of the DPRc model + parm7_prefix : str + The path prefix to AMBER PARM7 files + parm7 : list[str] + List of paths to AMBER PARM7 files. Each file maps to a system. + mdin_prefix : str + The path prefix to AMBER mdin files + mdin : list[str] + List of paths to AMBER mdin files. Each files maps to a system. + The following keywords will be replaced by the actual value: + @freq@ : freq to dump trajectory + @nstlim@ : total time step to run + @qm_region@ : AMBER mask of the QM region + @qm_theory@ : The QM theory, such as DFTB2 + @qm_charge@ : The total charge of the QM theory, such as -2 + @rcut@ : cutoff radius of the DPRc model + @GRAPH_FILE0@, @GRAPH_FILE1@, ... : graph files + qm_region : list[str] + AMBER mask of the QM region. Each mask maps to a system. + qm_charge : list[int] + Charge of the QM region. Each charge maps to a system. + nsteps : list[int] + The number of steps to run. Each number maps to a system. + r : list[list[float]] or list[list[list[float]]] + Constrict values for the enhanced sampling. The first dimension maps to systems. + The second dimension maps to confs in each system. The third dimension is the + constrict value. It can be a single float for 1D or list of floats for nD. + disang_prefix : str + The path prefix to disang prefix. + disang : list[str] + List of paths to AMBER disang files. Each file maps to a sytem. + The keyword RVAL will be replaced by the constrict values, or RVAL1, RVAL2, ... + for an nD system. + mdata : dict + machine parameters. Nothing will be used in this method. + conf_systems : list + conf systems + + References + ---------- + .. [1] Development of Range-Corrected Deep Learning Potentials for Fast, Accurate Quantum + Mechanical/Molecular Mechanical Simulations of Chemical Reactions in Solution, + Jinzhe Zeng, Timothy J. Giese, Şölen Ekesan, and Darrin M. York, Journal of Chemical + Theory and Computation 2021 17 (11), 6993-7009 + + inputs: restart (coords), param, mdin, graph, disang (optional) + """ + model_devi_jobs = jdata['model_devi_jobs'] + if (iter_index >= len(model_devi_jobs)) : + return False + cur_job = model_devi_jobs[iter_index] + sys_idx = expand_idx(cur_job['sys_idx']) + if (len(sys_idx) != len(list(set(sys_idx)))) : + raise RuntimeError("system index should be uniq") + + iter_name = make_iter_name(iter_index) + train_path = os.path.join(iter_name, train_name) + train_path = os.path.abspath(train_path) + work_path = os.path.join(iter_name, model_devi_name) + # parm7 - list + parm7 = jdata['parm7'] + parm7_prefix = jdata.get("parm7_prefix", "") + parm7 = [os.path.join(parm7_prefix, pp) for pp in parm7] + + # link parm file + for ii, pp in enumerate(parm7): + os.symlink(pp, os.path.join(work_path, 'qmmm%d.parm7'%ii)) + # TODO: consider writing input in json instead of a given file + # mdin + mdin = jdata['mdin'] + mdin_prefix = jdata.get("mdin_prefix", "") + mdin = [os.path.join(mdin_prefix, pp) for pp in mdin] + + qm_region = jdata['qm_region'] + qm_charge = jdata['qm_charge'] + nsteps = jdata['nsteps'] + + for ii, pp in enumerate(mdin): + with open(pp) as f, open(os.path.join(work_path, 'init%d.mdin'%ii), 'w') as fw: + mdin_str = f.read() + # freq, nstlim, qm_region, qm_theory, qm_charge, rcut, graph + mdin_str = mdin_str.replace("@freq@", str(cur_job.get('trj_freq', 50))) \ + .replace("@nstlim@", str(nsteps[ii])) \ + .replace("@qm_region@", qm_region[ii]) \ + .replace("@qm_charge@", str(qm_charge[ii])) \ + .replace("@qm_theory@", jdata['low_level']) \ + .replace("@rcut@", str(jdata['cutoff'])) + models = sorted(glob.glob(os.path.join(train_path, "graph.*.pb"))) + task_model_list = [] + for ii in models: + task_model_list.append(os.path.join('..', os.path.basename(ii))) + # graph + for jj, mm in enumerate(task_model_list): + # replace graph + mdin_str = mdin_str.replace("@GRAPH_FILE%d@" % jj, mm) + fw.write(mdin_str) + # disang - list + disang = jdata['disang'] + disang_prefix = jdata.get("disang_prefix", "") + disang = [os.path.join(disang_prefix, pp) for pp in disang] + + for sys_counter, ss in enumerate(conf_systems): + for idx_cc, cc in enumerate(ss) : + task_counter = idx_cc + conf_counter = idx_cc + + task_name = make_model_devi_task_name(sys_idx[sys_counter], task_counter) + conf_name = make_model_devi_conf_name(sys_idx[sys_counter], conf_counter) + task_path = os.path.join(work_path, task_name) + # create task path + create_path(task_path) + # link restart file + loc_conf_name = 'init.rst7' + os.symlink(os.path.join(os.path.join('..','confs'), conf_name + ".rst7"), + os.path.join(task_path, loc_conf_name) ) + cwd_ = os.getcwd() + # chdir to task path + os.chdir(task_path) + + # reaction coordinates of umbrella sampling + # TODO: maybe consider a better name instead of `r`? + if 'r' in jdata: + r=jdata['r'][sys_idx[sys_counter]][conf_counter] + # r can either be a float or a list of float (for 2D coordinates) + if not isinstance(r, Iterable) or isinstance(r, str): + r = [r] + # disang file should include RVAL, RVAL2, ... + with open(disang[sys_idx[sys_counter]]) as f, open('TEMPLATE.disang', 'w') as fw: + tl = f.read() + for ii, rr in enumerate(r): + if isinstance(rr, Iterable) and not isinstance(rr, str): + raise RuntimeError("rr should not be iterable! sys: %d rr: %s r: %s" % (sys_idx[sys_counter], str(rr), str(r))) + tl = tl.replace("RVAL"+str(ii+1), str(rr)) + if len(r) == 1: + tl = tl.replace("RVAL", str(r[0])) + fw.write(tl) + + with open('job.json', 'w') as fp: + json.dump(cur_job, fp, indent = 4) + os.chdir(cwd_) def run_md_model_devi (iter_index, jdata, @@ -1353,7 +1521,17 @@ def run_md_model_devi (iter_index, forward_files = [mdp_filename, topol_filename, conf_filename, index_filename, ref_filename, type_filename, "input.json", "job.json" ] if ndx_filename: forward_files.append(ndx_filename) backward_files = ["%s.tpr" % deffnm, "%s.log" %deffnm , traj_filename, 'model_devi.out', "traj", "traj_deepmd" ] - + elif model_devi_engine == "amber": + commands = [( + "TASK=$(basename $(pwd)) && " + "SYS1=${TASK:5:3} && " + "SYS=$((10#$SYS1)) && " + )+ model_devi_exec + ( + " -O -p ../qmmm$SYS.parm7 -c init.rst7 -i ../init$SYS.mdin -o rc.mdout -r rc.rst7 -x rc.nc -inf rc.mdinfo -ref init.rst7" + )] + forward_files = ['init.rst7', 'TEMPLATE.disang'] + backward_files = ['rc.mdout', 'rc.nc', 'rc.rst7', 'TEMPLATE.dumpave'] + model_names.extend(["qmmm*.parm7", "init*.mdin"]) cwd = os.getcwd() @@ -1731,7 +1909,7 @@ def _make_fp_vasp_inner (modd_path, charges_recorder = [] # record charges for each fp_task charges_map = jdata.get("sys_charges", []) - cluster_cutoff = jdata['cluster_cutoff'] if jdata.get('use_clusters', False) else None + cluster_cutoff = jdata.get('cluster_cutoff', None) model_devi_adapt_trust_lo = jdata.get('model_devi_adapt_trust_lo', False) model_devi_f_avg_relative = jdata.get('model_devi_f_avg_relative', False) # skip save *.out if detailed_report_make_fp is False, default is True @@ -1752,40 +1930,78 @@ def _trust_limitation_check(sys_idx, lim): modd_system_glob = os.path.join(modd_path, 'task.' + ss + '.*') modd_system_task = glob.glob(modd_system_glob) modd_system_task.sort() - - # convert global trust limitations to local ones - f_trust_lo_sys = _trust_limitation_check(ss, f_trust_lo) - f_trust_hi_sys = _trust_limitation_check(ss, f_trust_hi) - v_trust_lo_sys = _trust_limitation_check(ss, v_trust_lo) - v_trust_hi_sys = _trust_limitation_check(ss, v_trust_hi) - - # assumed e -> v - if not model_devi_adapt_trust_lo: - fp_rest_accurate, fp_candidate, fp_rest_failed, counter \ - = _select_by_model_devi_standard( - modd_system_task, - f_trust_lo_sys, f_trust_hi_sys, - v_trust_lo_sys, v_trust_hi_sys, - cluster_cutoff, - model_devi_engine, - model_devi_skip, - model_devi_f_avg_relative = model_devi_f_avg_relative, - detailed_report_make_fp = detailed_report_make_fp, - ) + if model_devi_engine in ('lammps', 'gromacs', 'calypso'): + # convert global trust limitations to local ones + f_trust_lo_sys = _trust_limitation_check(ss, f_trust_lo) + f_trust_hi_sys = _trust_limitation_check(ss, f_trust_hi) + v_trust_lo_sys = _trust_limitation_check(ss, v_trust_lo) + v_trust_hi_sys = _trust_limitation_check(ss, v_trust_hi) + + # assumed e -> v + if not model_devi_adapt_trust_lo: + fp_rest_accurate, fp_candidate, fp_rest_failed, counter \ + = _select_by_model_devi_standard( + modd_system_task, + f_trust_lo_sys, f_trust_hi_sys, + v_trust_lo_sys, v_trust_hi_sys, + cluster_cutoff, + model_devi_engine, + model_devi_skip, + model_devi_f_avg_relative = model_devi_f_avg_relative, + detailed_report_make_fp = detailed_report_make_fp, + ) + else: + numb_candi_f = jdata.get('model_devi_numb_candi_f', 10) + numb_candi_v = jdata.get('model_devi_numb_candi_v', 0) + perc_candi_f = jdata.get('model_devi_perc_candi_f', 0.) + perc_candi_v = jdata.get('model_devi_perc_candi_v', 0.) + fp_rest_accurate, fp_candidate, fp_rest_failed, counter, f_trust_lo_ad, v_trust_lo_ad \ + = _select_by_model_devi_adaptive_trust_low( + modd_system_task, + f_trust_hi_sys, numb_candi_f, perc_candi_f, + v_trust_hi_sys, numb_candi_v, perc_candi_v, + model_devi_skip = model_devi_skip, + model_devi_f_avg_relative = model_devi_f_avg_relative, + ) + dlog.info("system {0:s} {1:9s} : f_trust_lo {2:6.3f} v_trust_lo {3:6.3f}".format(ss, 'adapted', f_trust_lo_ad, v_trust_lo_ad)) + elif model_devi_engine == "amber": + counter = Counter() + counter['candidate'] = 0 + counter['failed'] = 0 + counter['accurate'] = 0 + fp_rest_accurate = [] + fp_candidate = [] + fp_rest_failed = [] + for tt in modd_system_task : + cc = 0 + with open(os.path.join(tt, "rc.mdout")) as f: + skip_first = False + first_active = True + for line in f: + if line.startswith(" ntx = 1"): + skip_first = True + if line.startswith("Active learning frame written with max. frc. std.:"): + if skip_first and first_active: + first_active = False + continue + model_devi = float(line.split()[-2]) * dpdata.unit.EnergyConversion("kcal_mol", "eV").value() + if model_devi < f_trust_lo: + # accurate + if detailed_report_make_fp: + fp_rest_accurate.append([tt, cc]) + counter['accurate'] += 1 + elif model_devi > f_trust_hi: + # failed + if detailed_report_make_fp: + fp_rest_failed.append([tt, cc]) + counter['failed'] += 1 + else: + # candidate + fp_candidate.append([tt, cc]) + counter['candidate'] += 1 + cc += 1 else: - numb_candi_f = jdata.get('model_devi_numb_candi_f', 10) - numb_candi_v = jdata.get('model_devi_numb_candi_v', 0) - perc_candi_f = jdata.get('model_devi_perc_candi_f', 0.) - perc_candi_v = jdata.get('model_devi_perc_candi_v', 0.) - fp_rest_accurate, fp_candidate, fp_rest_failed, counter, f_trust_lo_ad, v_trust_lo_ad \ - = _select_by_model_devi_adaptive_trust_low( - modd_system_task, - f_trust_hi_sys, numb_candi_f, perc_candi_f, - v_trust_hi_sys, numb_candi_v, perc_candi_v, - model_devi_skip = model_devi_skip, - model_devi_f_avg_relative = model_devi_f_avg_relative, - ) - dlog.info("system {0:s} {1:9s} : f_trust_lo {2:6.3f} v_trust_lo {3:6.3f}".format(ss, 'adapted', f_trust_lo_ad, v_trust_lo_ad)) + raise RuntimeError('unknown model_devi_engine', model_devi_engine) # print a report fp_sum = sum(counter.values()) @@ -1847,6 +2063,7 @@ def _trust_limitation_check(sys_idx, lim): model_devi_engine = jdata.get("model_devi_engine", "lammps") count_bad_box = 0 count_bad_cluster = 0 + fp_candidate = sorted(fp_candidate[:numb_task]) for cc in range(numb_task) : tt = fp_candidate[cc][0] ii = fp_candidate[cc][1] @@ -1858,6 +2075,9 @@ def _trust_limitation_check(sys_idx, lim): elif model_devi_engine == "gromacs": conf_name = os.path.join(conf_name, str(ii) + '.gromacstrj') ffmt = 'lammps/dump' + elif model_devi_engine == "amber": + conf_name = os.path.join(tt, "rc.nc") + rst_name = os.path.abspath(os.path.join(tt, "init.rst7")) elif model_devi_engine == "calypso": conf_name = os.path.join(conf_name, str(ii) + '.poscar') ffmt = 'vasp/poscar' @@ -1897,15 +2117,36 @@ def _trust_limitation_check(sys_idx, lim): cwd = os.getcwd() os.chdir(fp_task_path) if cluster_cutoff is None: - if model_devi_engine != 'calypso': + if model_devi_engine in ("lammps", "gromacs"): os.symlink(os.path.relpath(conf_name), 'conf.dump') os.symlink(os.path.relpath(job_name), 'job.json') - else: + elif model_devi_engine == "amber": + # read and write with ase + from ase.io.netcdftrajectory import NetCDFTrajectory, write_netcdftrajectory + if cc > 0 and tt == fp_candidate[cc-1][0]: + # same MD task, use the same file + pass + else: + # not the same file + if cc > 0: + # close the old file + netcdftraj.close() + netcdftraj = NetCDFTrajectory(conf_name) + # write nc file + write_netcdftrajectory('rc.nc', netcdftraj[ii]) + if cc >= numb_task - 1: + netcdftraj.close() + # link restart since it's necessary to start Amber + os.symlink(os.path.relpath(rst_name), 'init.rst7') + os.symlink(os.path.relpath(job_name), 'job.json') + elif model_devi_engine == "calypso": os.symlink(os.path.relpath(conf_name), 'POSCAR') fjob = open('job.json','w+') fjob.write('{"model_devi_engine":"calypso"}') fjob.close() #os.system('touch job.json') + else: + raise RuntimeError('unknown model_devi_engine', model_devi_engine) else: os.symlink(os.path.relpath(poscar_name), 'POSCAR') np.save("atom_pref", new_system.data["atom_pref"]) @@ -1932,7 +2173,7 @@ def _trust_limitation_check(sys_idx, lim): dump_to_deepmd_raw('conf.dump', 'deepmd.raw', type_map, fmt='gromacs/gro', charge=charges_recorder[idx]) else: dump_to_deepmd_raw('conf.dump', 'deepmd.raw', type_map, fmt='gromacs/gro', charge=None) - elif model_devi_engine == 'calypso': + elif model_devi_engine in ("amber", 'calypso'): pass else: raise RuntimeError("unknown model_devi engine", model_devi_engine) @@ -2482,6 +2723,99 @@ def make_fp_pwmat (iter_index, # 2, create pwmat input _make_fp_pwmat_input(iter_index, jdata) +def make_fp_amber_diff(iter_index: int, jdata: dict): + """Run amber twice to calculate high-level and low-level potential, + and then generate difference between them. + + Besides AMBER, one needs to install `dpamber` package, which is avaiable at + https://github.com/njzjz/dpamber + + Currently, it should be used with the AMBER model_devi driver. + + Parameters + ---------- + iter_index : int + iter index + jdata : dict + Run parameters. The following parameters are used in this method: + mdin_prefix : str + The path prefix to AMBER mdin files + qm_region : list[str] + AMBER mask of the QM region. Each mask maps to a system. + qm_charge : list[int] + Charge of the QM region. Each charge maps to a system. + high_level : str + high level method + low_level : str + low level method + fp_params : dict + This parameters includes: + high_level_mdin : str + High-level AMBER mdin file. %qm_theory%, %qm_region%, + and %qm_charge% will be replace. + low_level_mdin : str + Low-level AMBER mdin file. %qm_theory%, %qm_region%, + and %qm_charge% will be replace. + parm7_prefix : str + The path prefix to AMBER PARM7 files + parm7 : list[str] + List of paths to AMBER PARM7 files. Each file maps to a system. + + References + ---------- + .. [1] Development of Range-Corrected Deep Learning Potentials for Fast, Accurate Quantum + Mechanical/Molecular Mechanical Simulations of Chemical Reactions in Solution, + Jinzhe Zeng, Timothy J. Giese, Şölen Ekesan, and Darrin M. York, Journal of Chemical + Theory and Computation 2021 17 (11), 6993-7009 + """ + # make config + fp_tasks = _make_fp_vasp_configs(iter_index, jdata) + # make amber input + cwd = os.getcwd() + # link two mdin files and param7 + os.chdir(os.path.join(fp_tasks[0], "..")) + mdin_prefix = jdata.get('mdin_prefix', '') + low_level_mdin = jdata['fp_params']['low_level_mdin'] + low_level_mdin = os.path.join(mdin_prefix, low_level_mdin) + high_level_mdin = jdata['fp_params']['high_level_mdin'] + high_level_mdin = os.path.join(mdin_prefix, high_level_mdin) + with open(low_level_mdin) as f: + low_level_mdin_str = f.read() + with open(high_level_mdin) as f: + high_level_mdin_str = f.read() + + qm_region = jdata['qm_region'] + high_level = jdata['high_level'] + low_level = jdata['low_level'] + qm_charge = jdata['qm_charge'] + # qm_theory qm_region qm_charge + for ii, _ in enumerate(qm_region): + mdin_new_str = low_level_mdin_str.replace("%qm_theory%", low_level) \ + .replace("%qm_region%", qm_region[ii]) \ + .replace("%qm_charge%", str(qm_charge[ii])) + with open('low_level%d.mdin'%ii, 'w') as f: + f.write(mdin_new_str) + + mdin_new_str = high_level_mdin_str.replace("%qm_theory%", high_level) \ + .replace("%qm_region%", qm_region[ii]) \ + .replace("%qm_charge%", str(qm_charge[ii])) + with open('high_level%d.mdin'%ii, 'w') as f: + f.write(mdin_new_str) + + parm7 = jdata['parm7'] + parm7_prefix = jdata.get("parm7_prefix", "") + parm7 = [os.path.join(parm7_prefix, pp) for pp in parm7] + for ii, pp in enumerate(parm7): + os.symlink(pp, "qmmm%d.parm7"%ii) + + rst7_prefix = jdata.get("sys_configs_prefix", "") + for ii, ss in enumerate(jdata['sys_configs']): + os.symlink(os.path.join(rst7_prefix, ss[0]), "init%d.rst7"%ii) + + with open("qm_region", 'w') as f: + f.write("\n".join(qm_region)) + os.chdir(cwd) + def make_fp (iter_index, jdata, mdata) : @@ -2501,6 +2835,8 @@ def make_fp (iter_index, make_fp_cp2k(iter_index, jdata) elif fp_style == "pwmat" : make_fp_pwmat(iter_index, jdata) + elif fp_style == "amber/diff": + make_fp_amber_diff(iter_index, jdata) else : raise RuntimeError ("unsupported fp style") # Copy user defined forward_files @@ -2605,6 +2941,26 @@ def run_fp_inner (iter_index, fp_tasks.sort() if len(fp_tasks) == 0 : return + + fp_style = jdata['fp_style'] + if fp_style == 'amber/diff': + # firstly get sys_idx + fp_command = ( + "TASK=$(basename $(pwd)) && " + "SYS1=${TASK:5:3} && " + "SYS=$((10#$SYS1)) && " + 'QM_REGION=$(awk "NR==$SYS+1" ../qm_region) &&' + ) + fp_command + ( + " -O -p ../qmmm$SYS.parm7 -c ../init$SYS.rst7 -i ../low_level$SYS.mdin -o low_level.mdout -r low_level.rst7 " + "-x low_level.nc -y rc.nc -frc low_level.mdfrc -inf low_level.mdinfo && " + ) + fp_command + ( + " -O -p ../qmmm$SYS.parm7 -c ../init$SYS.rst7 -i ../high_level$SYS.mdin -o high_level.mdout -r high_level.rst7 " + "-x high_level.nc -y rc.nc -frc high_level.mdfrc -inf high_level.mdinfo && " + ) + ( + "dpamber corr --cutoff %f --parm7_file ../qmmm$SYS.parm7 --nc rc.nc --hl high_level --ll low_level --qm_region \"$QM_REGION\"") % ( + jdata['cutoff'], + ) + fp_run_tasks = fp_tasks # for ii in fp_tasks : @@ -2653,7 +3009,8 @@ def run_fp (iter_index, jdata, mdata) : fp_style = jdata['fp_style'] - fp_pp_files = jdata['fp_pp_files'] + fp_pp_files = jdata.get('fp_pp_files', []) + if fp_style == "vasp" : forward_files = ['POSCAR', 'INCAR', 'POTCAR','KPOINTS'] backward_files = ['fp.log','OUTCAR','vasprun.xml'] @@ -2699,6 +3056,16 @@ def run_fp (iter_index, forward_files = ['atom.config', 'etot.input'] + fp_pp_files backward_files = ['REPORT', 'OUT.MLMD', 'output'] run_fp_inner(iter_index, jdata, mdata, forward_files, backward_files, _pwmat_check_fin, log_file = 'output') + elif fp_style == 'amber/diff': + forward_files = ['rc.nc'] + backward_files = [ + 'low_level.mdfrc', 'low_level.mdout', + 'high_level.mdfrc', 'high_level.mdout', + 'output', 'dataset' + ] + forward_common_files = ['low_level*.mdin', 'high_level*.mdin', 'qmmm*.parm7', 'qm_region', 'init*.rst7'] + run_fp_inner(iter_index, jdata, mdata, forward_files, backward_files, None, log_file = 'output', + forward_common_files=forward_common_files) else : raise RuntimeError ("unsupported fp style") @@ -3096,6 +3463,36 @@ def post_fp_pwmat (iter_index, raise RuntimeError("find too many unsuccessfully terminated jobs") +def post_fp_amber_diff(iter_index, jdata): + model_devi_jobs = jdata['model_devi_jobs'] + assert (iter_index < len(model_devi_jobs)) + + iter_name = make_iter_name(iter_index) + work_path = os.path.join(iter_name, fp_name) + fp_tasks = glob.glob(os.path.join(work_path, 'task.*')) + fp_tasks.sort() + if len(fp_tasks) == 0 : + return + + system_index = [] + for ii in fp_tasks : + system_index.append(os.path.basename(ii).split('.')[1]) + system_index.sort() + set_tmp = set(system_index) + system_index = list(set_tmp) + system_index.sort() + + for ss in system_index : + sys_output = glob.glob(os.path.join(work_path, "task.%s.*"%ss)) + sys_output.sort() + all_sys=dpdata.MultiSystems() + for oo in sys_output : + sys=dpdata.MultiSystems().from_deepmd_npy(os.path.join(oo, 'dataset')) + all_sys.append(sys) + sys_data_path = os.path.join(work_path, 'data.%s'%ss) + all_sys.to_deepmd_raw(sys_data_path) + all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output), prec=np.float64) + def post_fp (iter_index, jdata) : fp_style = jdata['fp_style'] @@ -3114,6 +3511,8 @@ def post_fp (iter_index, post_fp_cp2k(iter_index, jdata) elif fp_style == 'pwmat' : post_fp_pwmat(iter_index, jdata) + elif fp_style == 'amber/diff': + post_fp_amber_diff(iter_index, jdata) else : raise RuntimeError ("unsupported fp style") # clean traj diff --git a/examples/run/dprc/README.md b/examples/run/dprc/README.md new file mode 100644 index 000000000..016e27850 --- /dev/null +++ b/examples/run/dprc/README.md @@ -0,0 +1,5 @@ +## Deep Potential Range Correction (DPRc) + +This example shows how to train a DPRc model. + +For details, refer to [10.1021/acs.jctc.1c00201](https://doi.org/10.1021/acs.jctc.1c00201). diff --git a/examples/run/dprc/generator.yaml b/examples/run/dprc/generator.yaml new file mode 100644 index 000000000..cab076ff4 --- /dev/null +++ b/examples/run/dprc/generator.yaml @@ -0,0 +1,215 @@ +type_map: + - C + - H + - O + - "N" + - P + - HW + - OW +init_data_sys: + - /path/to/initial_data +# qm_region: the Amber mask of the QM region in the parm file +qm_region: + - ":1" +# qm_charge: the charge of the QM region +qm_charge: + - 0 +# parm7: absolute path to parm7 file +parm7: + - /path/to/MON.parm7 +# mdin: absolute path to mdin file +mdin: + - /path/to/ml.mdin +# disang: absolute path to disang template file +disang: + - /path/to/mon.disang +# sys_configs: abosulute path to initial configurations +sys_configs: + - - /path/to/init_-1.20.rst7 + - /path/to/init_-0.80.rst7 + - /path/to/init_-0.40.rst7 + - /path/to/init_0.00.rst7 +# r: reaction coordinates to replace RVAL in the template; can be a list for 2D +r: + - - -1.20 + - -0.80 + - -0.40 + - 0.00 +numb_models: 4 +# same as the simplify +default_training_param: + model: + # type_map: same as above + type_map: + - C + - H + - O + - "N" + - P + - HW + - OW + descriptor: + type: hybrid + list: + # QM-QM descriptor + - type: se_a + # sel: refer https://docs.deepmodeling.org/projects/deepmd/en/latest/model/sel.html to obatin the proper sel + # In a DPRc model, sel of MM atoms for the QM-QM descriptor should be set to zero. + sel: + - 6 + - 7 + - 6 + - 1 + - 1 + - 0 + - 0 + rcut_smth: 1.0 + rcut: 9.0 + neuron: + - 25 + - 50 + - 100 + resnet_dt: false + axis_neuron: 12 + # exclude_types: In the QM-QM descriptor, MM-MM and QM-MM descriptor should be disabled. + # >>> from itertools import combinations_with_replacement, product + # >>> qm = range(5) + # >>> mm = range(5,7) + # >>> list(map(list,list(combinations_with_replacement(mm,2))+list(product(qm,mm)))) + exclude_types: [[5, 5], [5, 6], [6, 6], [0, 5], [0, 6], [1, 5], [1, 6], [2, 5], [2, 6], [3, 5], [3, 6], [4, 5], [4, 6]] + # set_davg_zero: In a DPRc model, it must be set to true. + set_davg_zero: true + type_one_side: true + precision: "float32" + # QM-MM descriptor + - type: se_a + # *1.2 for MM atoms + sel: + - 6 + - 7 + - 6 + - 1 + - 1 + - 88 + - 44 + rcut_smth: 1.0 + rcut: 6.0 + neuron: + - 25 + - 50 + - 100 + resnet_dt: false + axis_neuron: 12 + # exclude_types: In the QM-MM descriptor, QM-QM and MM-MM descriptor should be disabled. + # >>> from itertools import combinations_with_replacement, product + # >>> qm = range(5) + # >>> mm = range(5,7) + # >>> list(map(list,list(combinations_with_replacement(qm,2))+list(combinations_with_replacement(mm,2)))) + exclude_types: [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [1, 1], [1, 2], [1, 3], [1, 4], [2, 2], [2, 3], [2, 4], [3, 3], [3, 4], [4, 4], [5, 5], [5, 6], [6, 6]] + set_davg_zero: true + type_one_side: true + precision: "float32" + fitting_net: + neuron: + - 240 + - 240 + - 240 + resnet_dt: true + # atom_ener: In a DPRc model, the atom_ener of MM atoms should be set to 0 + atom_ener: + - null + - null + - null + - null + - null + - 0.0 + - 0.0 + precision: "float32" + # learning_rate: from 1.e-3 to 5.e-8 + learning_rate: + type: exp + start_lr: 1.e-3 + decay_steps: 400 + stop_lr: 5.e-8 + loss: + start_pref_e: 0.02 + limit_pref_e: 1 + start_pref_f: 1000 + limit_pref_f: 1 + start_pref_v: 0 + limit_pref_v: 0 + training: + numb_steps: 400000 + disp_file: lcurve.out + disp_freq: 100 + save_freq: 1000 + disp_training: true + time_training: true + profiling: false + profiling_file: timeline.json +dp_compress: true + +model_devi_f_trust_lo: 0.08 +model_devi_f_trust_hi: 0.25 +training_reuse_iter: 2 + + +# nsteps: the number of steps to run +nsteps: +- 10000 +model_devi_jobs: + - sys_idx: [0] + trj_freq: 40 + #1 + - sys_idx: [0] + trj_freq: 40 + #2 + - sys_idx: [0] + trj_freq: 40 + #3 + - sys_idx: [0] + trj_freq: 40 + #4 + - sys_idx: [0] + trj_freq: 40 + #5 + - sys_idx: [0] + trj_freq: 40 + #6 + - sys_idx: [0] + trj_freq: 40 + #7 + - sys_idx: [0] + trj_freq: 40 + #8 + - sys_idx: [0] + trj_freq: 40 + #9 + - sys_idx: [0] + trj_freq: 40 + #10 + - sys_idx: [0] + trj_freq: 40 + +# fp_task_max: the maximum fp tasks to calculate +fp_task_max: 1000 +fp_task_min: 10 +fp_params: + low_level_mdin: /path/to/low_level.mdin + high_level_mdin: /path/to/high_level.mdin + +low_level: MNDOD +high_level: PBE0 +# cutoff: QM-MM cutoff radius +cutoff: 6. + +# do not change the value below +sys_format: amber/rst7 +init_multi_systems: true +model_devi_clean_traj: false +model_devi_engine: amber +model_devi_skip: 0 +shuffle_poscar: false +fp_style: amber/diff +detailed_report_make_fp: true +use_clusters: true diff --git a/setup.py b/setup.py index 3a86a8bd7..ee5617bba 100755 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ with open(path.join('dpgen', '_date.py'), 'w') as fp : fp.write('date = \'%s\'' % today) -install_requires=['numpy>=1.14.3', 'dpdata>=0.2.6', 'pymatgen>=2019.1.13', 'ase', 'monty>2.0.0', 'paramiko', 'custodian','GromacsWrapper>=0.8.0', 'dpdispatcher>=0.3.11'] +install_requires=['numpy>=1.14.3', 'dpdata>=0.2.6', 'pymatgen>=2019.1.13', 'ase', 'monty>2.0.0', 'paramiko', 'custodian','GromacsWrapper>=0.8.0', 'dpdispatcher>=0.3.11', 'netCDF4'] setuptools.setup( name=NAME, diff --git a/tests/generator/amber/init_-1.20.rst7 b/tests/generator/amber/init_-1.20.rst7 new file mode 100644 index 000000000..e69de29bb diff --git a/tests/generator/amber/ml.mdin b/tests/generator/amber/ml.mdin new file mode 100644 index 000000000..dcbf237e5 --- /dev/null +++ b/tests/generator/amber/ml.mdin @@ -0,0 +1,77 @@ +title +&cntrl +! IO ======================================= + irest = 1 ! 0 = start, 1 = restart + ntx = 5 ! 1 = start, 5 = restart + ntxo = 1 ! read/write rst as formatted file + iwrap = 1 ! wrap crds to unit cell + ioutfm = 1 ! write mdcrd as netcdf + imin = 0 + ntmin = 1 + ntpr = @freq@ + ntwr = @freq@ + ntwx = @freq@ + ntwf = 0 +! DYNAMICS ================================= + nstlim = @nstlim@ ! number of time steps + dt = 0.001 ! ps/step + ntb = 1 ! 1=NVT periodic, 2=NPT periodic, 0=no box +! TEMPERATURE ============================== + temp0 = 298 ! target temp + gamma_ln = 5.0 ! Langevin collision freq + ntt = 3 ! thermostat (3=Langevin) +! PRESSURE ================================ + ntp = 0 ! 0=no scaling, 1=isotropic, 2=anisotropic +! SHAKE ==================================== + ntc = 2 ! 1=no shake, 2=HX constrained, 3=all constrained +noshakemask = "@qm_region@" ! do not shake these + ntf = 1 ! 1=cpt all bond E, 2=ignore HX bond E, 3=ignore all bond E +! MISC ===================================== + cut = 9.0 + ifqnt = 1 + ig = -1 + nmropt = 1 +/ + +&wt +type='DUMPFREQ', istep1=25 +&end +&wt + type='END', +&end +DISANG=TEMPLATE.disang +DUMPAVE=TEMPLATE.dumpave + +&ewald + dsum_tol = 1.e-6 +/ + +&qmmm + qm_theory = '@qm_theory@' + qmmask = '@qm_region@' + qmcharge = @qm_charge@ + spin = 1 + qmshake = 0 + qm_ewald = 1 + qmmm_switch = 1 + scfconv = 1.e-10 + verbosity = 0 + tight_p_conv = 1 + diag_routine = 0 + pseudo_diag = 1 + dftb_maxiter = 100 +/ + +&ml + iml=1 + mlmask="@qm_region@" + rcut = @rcut@ + intrafile="" + alskip = @freq@ + althresh = 1.e-6 + interfile(1)="@GRAPH_FILE0@" + interfile(2)="@GRAPH_FILE1@" + interfile(3)="@GRAPH_FILE2@" + interfile(4)="@GRAPH_FILE3@" +/ + diff --git a/tests/generator/context.py b/tests/generator/context.py index 3ee10b9c8..9b5c23c58 100644 --- a/tests/generator/context.py +++ b/tests/generator/context.py @@ -26,6 +26,7 @@ param_pwmat_file = 'param-pyridine-pwmat.json' param_abacus_file = 'param-pyridine-abacus.json' param_abacus_post_file = 'param-methane-abacus.json' +param_amber_file = "param-amber.json" def my_file_cmp(test, f0, f1): with open(f0) as fp0 : diff --git a/tests/generator/param-amber.json b/tests/generator/param-amber.json new file mode 100644 index 000000000..69df44ed3 --- /dev/null +++ b/tests/generator/param-amber.json @@ -0,0 +1,54 @@ +{ + "model_devi_jobs": [ + { + "sys_idx": [0] + } + ], + "use_multi_systems": true, + "model_devi_engine": "amber", + "fp_style": "amber/diff", + "numb_models": 4, + "mdin_prefix": "amber", + "parm7_prefix": "amber", + "sys_prefix": "amber", + "disang_prefix": "amber", + "sys_configs": [ + [ + "init_-1.20.rst7" + ] + ], + "disang": [ + "init_-1.20.disang" + ], + "parm7": [ + "MON.parm7" + ], + "mdin": [ + "ml.mdin" + ], + "qm_region": [ + ":1" + ], + "qm_charge": [ + 0 + ], + "r": [ + [-1.20] + ], + "nsteps": [ + 1000 + ], + "type_map": ["C", "H", "O", "N", "P", "HW", "OW"], + "shuffle_poscar": false, + "low_level": "DFTB2", + "high_level": "PBE0", + "cutoff": 6.0, + "fp_task_max": 1000, + "model_devi_skip": 0, + "model_devi_f_trust_lo": 0.10, + "model_devi_f_trust_hi": 0.25, + "fp_params": { + "low_level_mdin": "ml.mdin", + "high_level_mdin": "ml.mdin" + } +} \ No newline at end of file diff --git a/tests/generator/test_make_fp.py b/tests/generator/test_make_fp.py index f706d8ea4..5df81692f 100644 --- a/tests/generator/test_make_fp.py +++ b/tests/generator/test_make_fp.py @@ -18,6 +18,7 @@ from .context import param_gaussian_file from .context import param_cp2k_file from .context import param_cp2k_file_exinput +from .context import param_amber_file from .context import ref_cp2k_file_input from .context import ref_cp2k_file_exinput from .context import machine_file @@ -584,6 +585,26 @@ def test_make_fp_abacus(self): _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files']) shutil.rmtree('iter.000000') +class TestMakeFPAMBERDiff(unittest.TestCase): + def test_make_fp_amber_diff(self): + setUpModule() + if os.path.isdir('iter.000000') : + shutil.rmtree('iter.000000') + with open(param_amber_file, 'r') as fp: + jdata = json.load(fp) + jdata['mdin_prefix'] = os.path.abspath(jdata['mdin_prefix']) + task_dir = os.path.join('iter.%06d' % 0, + '01.model_devi', + 'task.%03d.%06d' % (0, 0)) + os.makedirs(task_dir, exist_ok = True) + with open(os.path.join(task_dir, "rc.mdout"), 'w') as f: + f.write("Active learning frame written with max. frc. std.: 3.29037 kcal/mol/A") + import ase + from ase.io.netcdftrajectory import write_netcdftrajectory + write_netcdftrajectory(os.path.join(task_dir, 'rc.nc'), ase.Atoms("C", positions=np.zeros((1, 3)))) + make_fp(0, jdata, {}) + + class TestMakeFPSIESTA(unittest.TestCase): def test_make_fp_siesta(self): setUpModule() diff --git a/tests/generator/test_make_md.py b/tests/generator/test_make_md.py index 02678e30b..e41afeeae 100644 --- a/tests/generator/test_make_md.py +++ b/tests/generator/test_make_md.py @@ -2,6 +2,7 @@ import dpdata import numpy as np import unittest +from pathlib import Path from dpgen.generator.run import parse_cur_job_sys_revmat @@ -10,7 +11,7 @@ from .context import make_model_devi from .context import parse_cur_job from .context import parse_cur_job_revmat -from .context import param_file +from .context import param_file, param_amber_file from .context import machine_file from .context import my_file_cmp from .context import setUpModule @@ -498,5 +499,25 @@ def test_revise_by_key(self): self.assertEqual(tmp, "ddd") +class TestMakeMDAMBER(unittest.TestCase): + def tearDown(self): + if os.path.isdir('iter.000000') : + shutil.rmtree('iter.000000') + + def test_make_model_devi (self) : + if os.path.isdir('iter.000000') : + shutil.rmtree('iter.000000') + with open (param_amber_file, 'r') as fp : + jdata = json.load (fp) + with open (machine_file, 'r') as fp: + mdata = json.load (fp) + jdata['sys_prefix'] = os.path.abspath(jdata['sys_prefix']) + _make_fake_models(0, jdata['numb_models']) + make_model_devi(0, jdata, mdata) + _check_pb(self, 0) + _check_confs(self, 0, jdata) + _check_traj_dir(self, 0) + + if __name__ == '__main__': unittest.main() diff --git a/tests/generator/test_post_fp.py b/tests/generator/test_post_fp.py index f0028db92..30f30b747 100644 --- a/tests/generator/test_post_fp.py +++ b/tests/generator/test_post_fp.py @@ -20,6 +20,7 @@ from .context import param_siesta_file from .context import param_gaussian_file from .context import param_cp2k_file +from .context import param_amber_file from .context import machine_file from .context import setUpModule from .comp_sys import test_atom_names @@ -261,5 +262,23 @@ def setUp(self): self.system_2 = dpdata.LabeledSystem('iter.000000/02.fp/data.000', fmt = 'deepmd/raw') +class TestPostAmberDiff(unittest.TestCase, CompLabeledSys): + def setUp(self): + self.places = 5 + self.e_places = 5 + self.f_places = 5 + self.v_places = 5 + + if os.path.isdir('iter.000000') : + shutil.rmtree('iter.000000') + ms = dpdata.MultiSystems(dpdata.LabeledSystem(os.path.join('data', 'deepmd'), fmt="deepmd/raw")) + ms.to_deepmd_npy(os.path.join('iter.000000', '02.fp', 'task.000.000000', 'dataset')) + self.system_1 = list(ms.systems.values())[0] + with open (param_amber_file, 'r') as fp : + jdata = json.load (fp) + post_fp(0, jdata) + self.system_2 = list(dpdata.MultiSystems().from_deepmd_raw('iter.000000/02.fp/data.000').systems.values())[0] + + if __name__ == '__main__': unittest.main() From 80f2cf7df6fbd41ab2a054637fa828c69c5cbe08 Mon Sep 17 00:00:00 2001 From: LiangWenshuo1118 <73432685+LiangWenshuo1118@users.noreply.github.com> Date: Tue, 31 May 2022 08:41:01 +0800 Subject: [PATCH 04/25] Create Performing the Run process.md (#714) * Create Performing the Run process.md Description of the output files in DP-GEN run process. * Create Specify the Run process.md Description of an example of param.json. * Update and rename Specify the Run process.md to example-of-param.md * Update and rename Performing the Run process.md to Overview-of-the-run-process.md * Update and rename Overview-of-the-run-process.md to overview-of-the-run-process.md Specify the format of the code * Update example-of-param.md Specify the format of code * Update example-of-param.md * Update example-of-param.md * Create example-of-machine * Rename example-of-machine to example-of-machine.md * add param.rst param.rst generated by dargs * Update param.rst * updata dpgen run param parameters * Update param.rst * Update param.rst * Update example-of-param.md * Update overview-of-the-run-process.md * Update example-of-machine.md * Update example-of-machine.md * Update overview-of-the-run-process.md * Create run-process.rst * Update index.rst * Update param.rst * Update param.rst * Update index.rst --- doc/index.rst | 41 ++- doc/run/example-of-machine.md | 118 +++++++ doc/run/example-of-param.md | 128 +++++++ doc/run/overview-of-the-run-process.md | 65 ++++ doc/run/param.rst | 446 +++++++++++++++++++++++++ doc/run/run-process.rst | 9 + 6 files changed, 805 insertions(+), 2 deletions(-) create mode 100644 doc/run/example-of-machine.md create mode 100644 doc/run/example-of-param.md create mode 100644 doc/run/overview-of-the-run-process.md create mode 100644 doc/run/param.rst create mode 100644 doc/run/run-process.rst diff --git a/doc/index.rst b/doc/index.rst index 1ad29e4e7..341ce5d79 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -2,13 +2,50 @@ DPGEN's documentation ========================== -.. _parameters:: +.. _overview:: .. toctree:: :maxdepth: 2 - :caption: Parameters + :caption: Overview + + +.. _installation:: +.. toctree:: + :maxdepth: 2 + :caption: Installation + + +.. _run:: + +.. toctree:: + :maxdepth: 2 + :caption: Run + + run/run-process.rst + run/param.rst run-mdata.rst + +.. _init:: + +.. toctree:: + :maxdepth: 2 + :caption: Init + + +.. _autotest:: + +.. toctree:: + :maxdepth: 2 + :caption: Autotest + + +.. _simplify:: + +.. toctree:: + :maxdepth: 2 + :caption: Simplify + .. _tutorial: diff --git a/doc/run/example-of-machine.md b/doc/run/example-of-machine.md new file mode 100644 index 000000000..569f85026 --- /dev/null +++ b/doc/run/example-of-machine.md @@ -0,0 +1,118 @@ +# Example of machine.json + +## DPDispatcher Update Note + +DPDispatcher has updated and the api of machine.json is changed. DP-GEN will use the new DPDispatcher if the value of key "api_version" in machine.json is equal to or large than 1.0. And for now, DPDispatcher is maintained on a separate repo (https://github.com/deepmodeling/dpdispatcher). Please check the documents (https://deepmd.readthedocs.io/projects/dpdispatcher/en/latest/) for more information about the new DPDispatcher. + +DP-GEN will use the old DPDispatcher if the key "api_version" is not specified in machine.json or the "api_version" is smaller than 1.0. This gurantees that the old machine.json still works. + +## New DPDispatcher + +Each iteration in the run process of DP-GEN is composed of three steps: exploration, labeling, and training. Accordingly, machine.json is composed of three parts: train, model_devi, and fp. Each part is a list of dicts. Each dict can be considered as an independent environment for calculation. + +In this section, we will show you how to perform train task at a local workstation, model_devi task at a local Slurm cluster, and fp task at a remote PBS cluster using the new DPDispatcher. For each task, three types of keys are needed: +- Command: provides the command used to execute each step. +- Machine: specifies the machine environment (local workstation, local or remote cluster, or cloud server). +- Resources: specify the number of groups, nodes, CPU, and GPU; enable the virtual environment. + +### Performing train task at a local workstation + +In this example, we perform the `train` task on a local workstation. + +```json +"train": [ + { + "command": "dp", + "machine": { + "batch_type": "Shell", + "context_type": "local", + "local_root": "./", + "remote_root": "/home/user1234/work_path" + }, + "resources": { + "number_node": 1, + "cpu_per_node": 4, + "gpu_per_node": 1, + "group_size": 1, + "source_list": ["/home/user1234/deepmd.env"] + } + } + ], +``` + +The "command" for the train task in the DeePMD-kit is "dp". + +In machine parameters, "batch_type" specifies the type of job scheduling system. If there is no job scheduling system, we can use the "Shell" to perform the task. "context_type" specifies the method of data transfer, and "local" means copying and moving data via local file storage systems (e.g. cp, mv, etc.). In DP-GEN, the paths of all tasks are automatically located and set by the software, and therefore "local_root" is always set to "./". The input file for each task will be sent to the "remote_root" and the task will be performed there, so we need to make sure that the path exists. + +In the resources parameter, "number_node", "cpu_per_node", and "gpu_per_node" specify the number of nodes, the number of CPUs, and the number of GPUs required for a task respectively. "group_size", which needs to be highlighted, specifies how many tasks will be packed into a group. In the training tasks, we need to train 4 models. If we only have one GPU, we can set the "group_size" to 4. If "group_size" is set to 1, 4 models will be trained on one GPU at the same time, as there is no job scheduling system. Finally, the environment variables can be activated by "source_list". In this example, "source /home/user1234/deepmd.env" is executed before "dp" to load the environment variables necessary to perform the training task. + +### Perform model_devi task at a local Slurm cluster + +In this example, we perform the model_devi task at a local Slurm workstation. + +```json +"model_devi": [ + { + "command": "lmp", + "machine": { + "context_type": "local", + "batch_type": "Slurm", + "local_root": "./", + "remote_root": "/home/user1234/work_path" + }, + "resources": { + "number_node": 1, + "cpu_per_node": 4, + "gpu_per_node": 1, + "queue_name": "QueueGPU", + "custom_flags" : ["#SBATCH --mem=32G"], + "group_size": 10, + "source_list": ["/home/user1234/lammps.env"] + } + } +], +``` + +The "command" for the model_devi task in the LAMMPS is "lmp". + +In the machine parameter, we specify the type of job scheduling system by changing the "batch_type" to "Slurm". + +In the resources parameter, we specify the name of the queue to which the task is submitted by adding "queue_name". We can add additional lines to the calculation script via the "custom_flags". In the model_devi steps, there are frequently many short tasks, so we usually pack multiple tasks (e.g. 10) into a group for submission. Other parameters are similar to that of the local workstation. + +### Perform fp task in a remote PBS cluster + +In this example, we perform the fp task at a remote PBS cluster that can be accessed via SSH. + +```json +"fp": [ + { + "command": "mpirun -n 32 vasp_std", + "machine": { + "context_type": "SSHContext", + "batch_type": "PBS", + "local_root": "./", + "remote_root": "/home/user1234/work_path", + "remote_profile": { + "hostname": "39.xxx.xx.xx", + "username": "user1234" + } + }, + "resources": { + "number_node": 1, + "cpu_per_node": 32, + "gpu_per_node": 0, + "queue_name": "QueueCPU", + "group_size": 5, + "source_list": ["/home/user1234/vasp.env"] + } + } +], +``` + +VASP code is used for fp task and mpi is used for parallel computing, so "mpirun -n 32" is added to specify the number of parallel threads. + +In the machine parameter, "context_type" is modified to "SSHContext" and "batch_type" is modified to "PBS". It is worth noting that "remote_root" should be set to an accessible path on the remote PBS cluster. "remote_profile" is added to specify the information used to connect the remote cluster, including hostname, username, port, etc. + +In the resources parameter, we set "gpu_per_node" to 0 since it is cost-effective to use the CPU for VASP calculations. + +Explicit descriptions of keys in machine.json will be given in the following section. diff --git a/doc/run/example-of-param.md b/doc/run/example-of-param.md new file mode 100644 index 000000000..0490d971f --- /dev/null +++ b/doc/run/example-of-param.md @@ -0,0 +1,128 @@ +# Example-of-param.json + +We have provided different examples of param.json in dpgen/examples/run/. In this section, we give a description of the param.json, taking dpgen/examples/run/dp2.x-lammps-vasp/param_CH4_deepmd-kit-2.0.1.json as an example. This is a param.json for a gas-phase methane molecule. Here, DeePMD-kit (v2.x), LAMMPS and VASP codes are used for training, exploration and labeling respectively. + +## basics + +The basics related keys in param.json are given as follows + +```json + "type_map": [ + "H", + "C" + ], + "mass_map": [ + 1, + 12 + ], +``` + +The basics related keys specify the basic information about the system. "type_map" gives the atom types, i.e. "H" and "C". "mass_map" gives the standard atom weights, i.e. "1" and "12". + +## data + +The data related keys in param.json are given as follows + +```json + "init_data_prefix": "....../init/", + "init_data_sys": [ + "CH4.POSCAR.01x01x01/02.md/sys-0004-0001/deepmd" + ], + + "sys_configs_prefix": "....../init/", + "sys_configs": [ + [ + "CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00000*/POSCAR" + ], + [ + "CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00001*/POSCAR" + ] + ], +``` + +The data related keys specify the init data for training initial DP models and structures used for model_devi calculations. "init_data_prefix" and "init_data_sys" specify the location of the init data. "sys_configs_prefix" and "sys_configs" specify the location of the structures. + +Here, the init data is provided at "...... /init/CH4.POSCAR.01x01x01/02.md/sys-0004-0001/deepmd". These structures are divided into two groups and provided at "....../init/CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00000*/POSCAR" and "....../init/CH4.POSCAR.01x01x01/01.scale_pert/sys-0004-0001/scale*/00001*/POSCAR". + +## training + +The training related keys in param.json are given as follows + +```json + "numb_models": 4, + "train_param": "input.json", + "default_training_param": { + }, +``` +The training related keys specify the details of training tasks. "numb_models" specifies the number of models to be trained. "default_training_param" specifies the training parameters for `deepmd-kit`. + +Here, 4 DP models will be trained in `00.train`. A detailed explanation of training parameters can be found in DeePMD-kit’s documentation (https://docs.deepmodeling.com/projects/deepmd/en/master/). + +## exploration + +The exploration related keys in param.json are given as follows + +```json + "model_devi_dt": 0.002, + "model_devi_skip": 0, + "model_devi_f_trust_lo": 0.05, + "model_devi_f_trust_hi": 0.15, + "model_devi_clean_traj": true, + "model_devi_jobs": [ + { + "sys_idx": [ + 0 + ], + "temps": [ + 100 + ], + "press": [ + 1.0 + ], + "trj_freq": 10, + "nsteps": 300, + "ensemble": "nvt", + "_idx": "00" + }, + { + "sys_idx": [ + 1 + ], + "temps": [ + 100 + ], + "press": [ + 1.0 + ], + "trj_freq": 10, + "nsteps": 3000, + "ensemble": "nvt", + "_idx": "01" + } + ], +``` +The exploration related keys specify the details of exploration tasks. "model_devi_dt" specifies timestep for MD simulation. "model_devi_skip" specifies the number of structures skipped for saving in each MD. "model_devi_f_trust_lo" and "model_devi_f_trust_hi" specify the lower and upper bound of model_devi of forces for the selection. "model_devi_clean_traj" specifies whether to clean traj folders in MD. If type of model_devi_clean_traj is boolean type then it denote whether to clean traj folders in MD since they are too large.In "model_devi_jobs", "sys_idx" specifies the group of structures used for model_devi calculations, "temps" specifies the temperature (K) in MD, "press" specifies the pressure (Bar) in MD, "trj_freq" specifies the frequency of trajectory saved in MD, "nsteps" specifies the running steps of MD, "ensemble" specifies the ensemble used in MD, and "_idx" specifies the index of iteration. + +Here, MD simulations are performed at the temperature of 100 K and the pressure of 1.0 Bar with an integrator time of 2 fs under the nvt ensemble. Two iterations are set in "model_devi_jobs". MD simulations are run for 300 and 3000 time steps with the first and second groups of structures in "sys_configs" in 00 and 01 iterations. We choose to save all structures generated in MD simulations and have set `"trj_freq"` as 10, so 30 and 300 structures are saved in 00 and 01 iterations. If the "max_devi_f" of saved structure falls between 0.05 and 0.15, DP-GEN will treat the structure as a candidate. We choose to clean traj folders in MD since they are too large. If you want to save the most recent n iterations of traj folders, you can set "model_devi_clean_traj" to be an integer. + +## labeling + +The labeling related keys in param.json are given as follows + +```json + "fp_style": "vasp", + "shuffle_poscar": false, + "fp_task_max": 20, + "fp_task_min": 1, + "fp_pp_path": "....../methane/", + "fp_pp_files": [ + "POTCAR" + ], + "fp_incar": "....../INCAR_methane" +``` + +The labeling related keys specify the details of labeling tasks. "fp_style" specifies software for First Principles. "fp_task_max" and "fp_task_min" specify the minimum and maximum of structures to be calculated in `02.fp` of each iteration. "fp_pp_path" and "fp_pp_files" specify the location of the psuedo-potential file to be used for 02.fp. "fp_incar" specifies input file for VASP. INCAR must specify KSPACING and KGAMMA. + +Here, a minimum of 1 and a maximum of 20 structures will be labeled using the VASP code with the INCAR provided at "....../INCAR_methane" and POTCAR provided at "....../methane/POTCAR" in each iteration. Note that the order of elements in POTCAR should correspond to the order in `type_map`. + +All the keys of the DP-GEN are explained in detail in the section Parameters. diff --git a/doc/run/overview-of-the-run-process.md b/doc/run/overview-of-the-run-process.md new file mode 100644 index 000000000..478cc0e27 --- /dev/null +++ b/doc/run/overview-of-the-run-process.md @@ -0,0 +1,65 @@ +# Overview of the Run process + +The run process contains a series of successive iterations, undertaken in order such as heating the system to certain temperatures. Each iteration is composed of three steps: exploration, labeling, and training. Accordingly, there are three sub-folders: 00.train, 01.model_devi, and 02.fp in each iteration. + +00.train: DP-GEN will train several (default 4) models based on initial and generated data. The only difference between these models is the random seed for neural network initialization. + +01.model_devi : represent for model-deviation. DP-GEN will use models obtained from 00.train to run Molecular Dynamics(default LAMMPS). Larger deviation for structure properties (default is the force of atoms) means less accuracy of the models. Using this criterion, a few structures will be selected and put into the next stage 02.fp for more accurate calculation based on First Principles. + +02.fp : Selected structures will be calculated by first-principles methods(default VASP). DP-GEN will obtain some new data and put them together with initial data and data generated in previous iterations. After that, new training will be set up and DP-GEN will enter the next iteration! + +In the run process of the DP-GEN, we need to specify the basic information about the system, the initial data, and details of the training, exploration, and labeling tasks. In addition, we need to specify the software, machine environment, and computing resource and enable the process of job generation, submission, query, and collection automatically. We can perform the run process as we expect by specifying the keywords in param.json and machine.json, and they will be introduced in detail in the following sections. + +Here, we give a general description of the run process. We can execute the run process of DP-GEN easily by: + +```sh +dpgen run param.json machine.json +``` + +The following files or folders will be created and upgraded by codes: + +- iter.00000x contains the main results that DP-GEN generates in the first iteration. +- record.dpgen records the current stage of the run process. +- dpgen.log includes time and iteration information. + +When the first iteration is completed, the folder structure of iter.000000 is like this: + +```sh +$ ls iter.000000 +00.train 01.model_devi 02.fp +``` + +In folder iter.000000/ 00.train: + +- Folder 00x contains the input and output files of the DeePMD-kit, in which a model is trained. +- graph.00x.pb is the model DeePMD-kit generates. The only difference between these models is the random seed for neural network initialization. + +In folder iter.000000/ 01.model_devi: + +- Folder confs contains the initial configurations for LAMMPS MD converted from POSCAR you set in "sys_configs" of param.json. +- Folder task.000.00000x contains the input and output files of the LAMMPS. In folder task.000.00000x, file model_devi.out records the model deviation of concerned labels, energy and force in MD. It serves as the criterion for selecting which structures and doing first-principle calculations. + +In folder iter.000000/ 02.fp: + +- candidate.shuffle.000.out records which structures will be selected from last step 01.model_devi. There are always far more candidates than the maximum you expect to calculate at one time. In this condition, DP-GEN will randomly choose up to `"fp_task_max"` structures and form the folder task.*. +- rest_accurate.shuffle.000.out records the other structures where our model is accurate ("max_devi_f" is less than `"model_devi_f_trust_lo"`, no need to calculate any more), +- rest_failed.shuffled.000.out records the other structures where our model is too inaccurate (larger than `"model_devi_f_trust_hi"`, there may be some error). +- data.000: After first-principle calculations, DP-GEN will collect these data and change them into the format DeePMD-kit needs. In the next iteration's 00.train, these data will be trained together as well as the initial data. + +DP-GEN identifies the stage of the run process by a record file, record.dpgen, which will be created and upgraded by codes. Each line contains two numbers: the first is the index of iteration, and the second, ranging from 0 to 9, records which stage in each iteration is currently running. + +| Index of iterations | Stage in eachiteration | Process | +|:---------------------|:----------------------------|:-----------------| +| 0 | 0 | make_train | +| 0 | 1 | run_train | +| 0 | 2 | post_train | +| 0 | 3 | make_model_devi | +| 0 | 4 | run_model_devi | +| 0 | 5 | post_model_devi | +| 0 | 6 | make_fp | +| 0 | 7 | run_fp | +| 0 | 8 | post_fp | + +0,1,2 correspond to make_train, run_train, post_train. DP-GEN will write scripts in make_train, run the task by specific machine in run_train and collect result in post_train. The records for model_devi and fp stage follow similar rules. + +If the process of DP-GEN stops for some reasons, DP-GEN will automatically recover the main process by record.dpgen. You may also change it manually for your purpose, such as removing the last iterations and recovering from one checkpoint. diff --git a/doc/run/param.rst b/doc/run/param.rst new file mode 100644 index 000000000..749c4e956 --- /dev/null +++ b/doc/run/param.rst @@ -0,0 +1,446 @@ +============================= +dpgen run param parameters +============================= + +type_map: + | type: ``list`` + | argument path: ``type_map`` + + Atom types. + +mass_map: + | type: ``list`` + | argument path: ``mass_map`` + + Standard atom weights. + +use_ele_temp: + | type: ``int`` + | argument path: ``use_ele_temp`` + + Currently only support fp_style vasp. + + - 0: no electron temperature. + + - 1: eletron temperature as frame parameter. + + - 2: electron temperature as atom parameter. + +init_data_prefix: + | type: ``str``, optional + | argument path: ``init_data_prefix`` + + Prefix of initial data directories. + +init_data_prefix: + | type: ``list`` + | argument path: ``init_data_prefix`` + + Directories of initial data. You may use either absolute or relative path here. + +sys_format: + | type: ``str`` + | argument path: ``sys_format`` + + Format of initial data. It will be vasp/poscar if not set. + +init_multi_systems: + | type: ``bool``, optional + | argument path: ``init_multi_systems`` + + If set to true, init_data_sys directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems. + +init_batch_size: + | type: ``str``, optional + | argument path: ``init_batch_size`` + + Each number is the batch_size of corresponding system for training in init_data_sys. One recommended rule for setting the sys_batch_size and init_batch_size is that batch_size mutiply number of atoms ot the stucture should be larger than 32. If set to auto, batch size will be 32 divided by number of atoms. + +sys_configs_prefix: + | type: ``str``, optional + | argument path: ``sys_configs_prefix`` + + Prefix of sys_configs. + +sys_configs: + | type: ``str`` + | argument path: ``sys_configs`` + + Containing directories of structures to be explored in iterations.Wildcard characters are supported here. + +sys_batch_size: + | type: ``list``, optional + | argument path: ``sys_batch_size`` + + Each number is the batch_size for training of corresponding system in sys_configs. If set to auto, batch size will be 32 divided by number of atoms. + +numb_models: + | type: ``int`` + | argument path: ``numb_models`` + + Number of models to be trained in 00.train. 4 is recommend. + +training_iter0_model_path: + | type: ``list``, optional + | argument path: ``training_iter0_model_path`` + + The model used to init the first iter training. Number of element should be equal to numb_models. + +training_init_model: + | type: ``bool``, optional + | argument path: ``training_init_model`` + + Iteration > 0, the model parameters will be initilized from the model trained at the previous iteration. Iteration == 0, the model parameters will be initialized from training_iter0_model_path. + +default_training_param: + | type: ``dict`` + | argument path: ``default_training_param`` + + Training parameters for deepmd-kit in 00.train. You can find instructions from here: (https://github.com/deepmodeling/deepmd-kit). + +dp_compress: + | type: ``bool``, optional, default: ``False`` + | argument path: ``dp_compress`` + + Use dp compress to compress the model. + +model_devi_dt: + | type: ``float`` + | argument path: ``model_devi_dt`` + + Timestep for MD. 0.002 is recommend. + +model_devi_skip: + | type: ``int`` + | argument path: ``model_devi_skip`` + + Number of structures skipped for fp in each MD. + +model_devi_f_trust_lo: + | type: ``list`` | ``float`` + | argument path: ``model_devi_f_trust_lo`` + + Lower bound of forces for the selection. If list, should be set for each index in sys_configs, respectively. + +model_devi_f_trust_hi: + | type: ``list`` | ``float`` + | argument path: ``model_devi_f_trust_hi`` + + Upper bound of forces for the selection. If list, should be set for each index in sys_configs, respectively. + +model_devi_v_trust_lo: + | type: ``list`` | ``float`` + | argument path: ``model_devi_v_trust_lo`` + + Lower bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x. + +model_devi_v_trust_hi: + | type: ``list`` | ``float`` + | argument path: ``model_devi_v_trust_hi`` + + Upper bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x. + +model_devi_adapt_trust_lo: + | type: ``bool``, optional + | argument path: ``model_devi_adapt_trust_lo`` + + Adaptively determines the lower trust levels of force and virial. This option should be used together with model_devi_numb_candi_f, model_devi_numb_candi_v and optionally with model_devi_perc_candi_f and model_devi_perc_candi_v. dpgen will make two sets: + + - 1. From the frames with force model deviation lower than model_devi_f_trust_hi, select max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames) frames with largest force model deviation. + + - 2. From the frames with virial model deviation lower than model_devi_v_trust_hi, select max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames) frames with largest virial model deviation. + + The union of the two sets is made as candidate dataset. + +model_devi_numb_candi_f: + | type: ``int``, optional + | argument path: ``model_devi_numb_candi_f`` + + See model_devi_adapt_trust_lo. + +model_devi_numb_candi_v: + | type: ``int``, optional + | argument path: ``model_devi_numb_candi_v`` + + See model_devi_adapt_trust_lo. + +model_devi_perc_candi_f: + | type: ``float``, optional + | argument path: ``model_devi_perc_candi_f`` + + See model_devi_adapt_trust_lo. + +model_devi_perc_candi_v: + | type: ``float``, optional + | argument path: ``model_devi_perc_candi_v`` + + See model_devi_adapt_trust_lo. + +model_devi_f_avg_relative: + | type: ``bool``, optional + | argument path: ``model_devi_f_avg_relative`` + + Normalized the force model deviations by the RMS force magnitude along the trajectory. This key should not be used with use_relative. + +model_devi_clean_traj: + | type: ``bool`` | ``int`` + | argument path: ``model_devi_clean_traj`` + + If type of model_devi_clean_traj is bool type then it denote whether to clean traj folders in MD since they are too large. If it is Int type, then the most recent n iterations of traj folders will be retained, others will be removed. + +model_devi_nopbc: + | type: ``bool`` + | argument path: ``model_devi_nopbc`` + + Assume open boundary condition in MD simulations. + +model_devi_activation_func: + | type: ``list``, optional + | argument path: ``model_devi_activation_func`` + + Set activation functions for models, length of the list should be the same as numb_models, and two elements in the list of string respectively assign activation functions to the embedding and fitting nets within each model. Backward compatibility: the orginal "list of String" format is still supported, where embedding and fitting nets of one model use the same activation function, and the length of the list should be the same as numb_models. + +model_devi_jobs: + | type: ``dict`` | ``list`` + | argument path: ``model_devi_jobs`` + + Settings for exploration in 01.model_devi. Each dict in the list corresponds to one iteration. The index of model_devi_jobs exactly accord with index of iterations + + sys_idx: + | type: ``list`` + | argument path: ``model_devi_jobs/sys_idx`` + + Systems to be selected as the initial structure of MD and be explored. The index corresponds exactly to the sys_configs. + + temps: + | type: ``list`` + | argument path: ``model_devi_jobs/temps`` + + Temperature (K) in MD. + + press: + | type: ``list`` + | argument path: ``model_devi_jobs/press`` + + Pressure (Bar) in MD. + + trj_freq: + | type: ``int`` + | argument path: ``model_devi_jobs/trj_freq`` + + Frequecy of trajectory saved in MD. + + nsteps: + | type: ``int`` + | argument path: ``model_devi_jobs/nsteps`` + + Running steps of MD. + + ensembles: + | type: ``str`` + | argument path: ``model_devi_jobs/ensembles`` + + Determining which ensemble used in MD, options include “npt” and “nvt”. + + neidelay: + | type: ``int``, optional + | argument path: ``model_devi_jobs/neidelay`` + + Delay building until this many steps since last build. + + taut: + | type: ``float`` | ``str``, optional + | argument path: ``model_devi_jobs/taut`` + + Coupling time of thermostat (ps). + + taup: + | type: ``float`` | ``str``, optional + | argument path: ``model_devi_jobs/taup`` + + Coupling time of barostat (ps). + +fp_style: + | type: ``dict`` + | argument path: ``fp_style`` + + Software for First Principles. Options include “vasp”, “pwscf”, “siesta” and “gaussian” up to now. + + + Depending on the value of *fp_style*, different sub args are accepted. + + fp_style: + | type: ``str`` (flag key) + | argument path: ``fp_style/fp_style`` + | possible choices: vasp, gaussian, siesta, cp2k + + The code used for fp tasks. + + + When *fp_style* is set to ``vasp``: + + fp_pp_path: + | type: ``str`` + | argument path: ``fp_style[vasp]/fp_pp_path`` + + Directory of psuedo-potential file to be used for 02.fp exists. + + fp_pp_files: + | type: ``list`` + | argument path: ``fp_style[vasp]/fp_pp_files`` + + Psuedo-potential file to be used for 02.fp. Note that the order of elements should correspond to the order in type_map. + + fp_incar: + | type: ``str`` + | argument path: ``fp_style[vasp]/fp_incar`` + + Input file for VASP. INCAR must specify KSPACING and KGAMMA. + + fp_aniso_kspacing: + | type: ``list`` + | argument path: ``fp_style[vasp]/fp_aniso_kspacing`` + + Set anisotropic kspacing. Usually useful for 1-D or 2-D materials. Only support VASP. If it is setting the KSPACING key in INCAR will be ignored. + + cvasp: + | type: ``bool`` + | argument path: ``fp_style[vasp]/cvasp`` + + If cvasp is true, DP-GEN will use Custodian to help control VASP calculation. + + + When *fp_style* is set to ``gaussian``: + + use_clusters: + | type: ``bool`` + | argument path: ``fp_style[gaussian]/use_clusters`` + + If set to true, clusters will be taken instead of the whole system. This option does not work with DeePMD-kit 0.x. + + cluster_cutoff: + | type: ``float`` + | argument path: ``fp_style[gaussian]/cluster_cutoff`` + + The cutoff radius of clusters if use_clusters is set to true. + + fp_params: + | type: ``dict`` + | argument path: ``fp_style[gaussian]/fp_params`` + + Parameters for Gaussian calculation. + + doc_keywords: + | type: ``str`` | ``list`` + | argument path: ``fp_style[gaussian]/fp_params/doc_keywords`` + + Keywords for Gaussian input. + + multiplicity: + | type: ``int`` | ``str`` + | argument path: ``fp_style[gaussian]/fp_params/multiplicity`` + + Spin multiplicity for Gaussian input. If set to auto, the spin multiplicity will be detected automatically. If set to frag, the "fragment=N" method will be used. + + nproc: + | type: ``int`` + | argument path: ``fp_style[gaussian]/fp_params/nproc`` + + The number of processors for Gaussian input. + + + When *fp_style* is set to ``siesta``: + + use_clusters: + | type: ``bool`` + | argument path: ``fp_style[siesta]/use_clusters`` + + If set to true, clusters will be taken instead of the whole system. This option does not work with DeePMD-kit 0.x. + + cluster_cutoff: + | type: ``float`` + | argument path: ``fp_style[siesta]/cluster_cutoff`` + + The cutoff radius of clusters if use_clusters is set to true. + + fp_params: + | type: ``dict`` + | argument path: ``fp_style[siesta]/fp_params`` + + Parameters for siesta calculation. + + ecut: + | type: ``int`` + | argument path: ``fp_style[siesta]/fp_params/ecut`` + + Define the plane wave cutoff for grid. + + ediff: + | type: ``float`` + | argument path: ``fp_style[siesta]/fp_params/ediff`` + + Tolerance of Density Matrix. + + kspacing: + | type: ``float`` + | argument path: ``fp_style[siesta]/fp_params/kspacing`` + + Sample factor in Brillouin zones. + + mixingweight: + | type: ``float`` + | argument path: ``fp_style[siesta]/fp_params/mixingweight`` + + Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing). + + NumberPulay: + | type: ``int`` + | argument path: ``fp_style[siesta]/fp_params/NumberPulay`` + + Controls the Pulay convergence accelerator. + + + When *fp_style* is set to ``cp2k``: + + user_fp_params: + | type: ``dict`` + | argument path: ``fp_style[cp2k]/user_fp_params`` + + Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use. we assume that you have basic knowledge for cp2k input. + + external_input_path: + | type: ``str`` + | argument path: ``fp_style[cp2k]/external_input_path`` + + Conflict with key:user_fp_params, use the template input provided by user, some rules should be followed, read the following text in detail. + +fp_task_max: + | type: ``int`` + | argument path: ``fp_task_max`` + + Maximum of structures to be calculated in 02.fp of each iteration. + +fp_task_min: + | type: ``int`` + | argument path: ``fp_task_min`` + + Minimum of structures to be calculated in 02.fp of each iteration. + +fp_accurate_threshold: + | type: ``float``, optional + | argument path: ``fp_accurate_threshold`` + + If the accurate ratio is larger than this number, no fp calculation will be performed, i.e. fp_task_max = 0. + +fp_accurate_soft_threshold: + | type: ``float``, optional + | argument path: ``fp_accurate_soft_threshold`` + + If the accurate ratio is between this number and fp_accurate_threshold, the fp_task_max linearly decays to zero. + +fp_cluster_vacuum: + | type: ``float``, optional + | argument path: ``fp_cluster_vacuum`` + + If the vacuum size is smaller than this value, this cluster will not be choosen for labeling. + diff --git a/doc/run/run-process.rst b/doc/run/run-process.rst new file mode 100644 index 000000000..cb98d7982 --- /dev/null +++ b/doc/run/run-process.rst @@ -0,0 +1,9 @@ +.. _run:: + +.. toctree:: + :maxdepth: 2 + :caption: Run Process + + overview-of-the-run-process.md + example-of-param.md + example-of-machine.md From 60238992104933493737855f9a64fb23b854b19d Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 9 Jun 2022 00:51:49 -0400 Subject: [PATCH 05/25] refactor simplify (#730) * refactor simplify 1. Used `dp model-devi` to calculate model deviation, instead of local calculation. Supported by deepmodeling/deepmd-kit#1618, released in v2.1.1. So the version earlier than 2.1.1 is not supported any more. 2. Assumed all systems are MultiSystems. 3. Removed energy model deviation support * expand path when getting multisystems * let `make_train` and `run_train` expand paths * load numpy array instead * use dpdata to get nframes * fix tests * update README --- README.md | 6 +- dpgen/generator/run.py | 70 +++---- dpgen/simplify/simplify.py | 381 +++++++++---------------------------- dpgen/util.py | 22 +++ 4 files changed, 140 insertions(+), 339 deletions(-) diff --git a/README.md b/README.md index b59725ec9..c833ed059 100644 --- a/README.md +++ b/README.md @@ -499,9 +499,8 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key | **use_ele_temp** | int | 0 | Currently only support fp_style vasp. 0(default): no electron temperature. 1: eletron temperature as frame parameter. 2: electron temperature as atom parameter. | *#Data* | init_data_prefix | String | "/sharedext4/.../data/" | Prefix of initial data directories - | ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here. + | ***init_data_sys*** | List of string|["CH4.POSCAR.01x01x01/.../deepmd"] |Directories of initial data. You may use either absolute or relative path here. Systems will be detected recursively in the directories. | ***sys_format*** | String | "vasp/poscar" | Format of initial data. It will be `vasp/poscar` if not set. - | init_multi_systems | Boolean | false | If set to `true`, `init_data_sys` directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems. | init_batch_size | String of integer | [8] | Each number is the batch_size of corresponding system for training in `init_data_sys`. One recommended rule for setting the `sys_batch_size` and `init_batch_size` is that `batch_size` mutiply number of atoms ot the stucture should be larger than 32. If set to `auto`, batch size will be 32 divided by number of atoms. | | sys_configs_prefix | String | "/sharedext4/.../data/" | Prefix of `sys_configs` | **sys_configs** | List of list of string | [
["/sharedext4/.../POSCAR"],
["....../POSCAR"]
] | Containing directories of structures to be explored in iterations.Wildcard characters are supported here. | @@ -1086,7 +1085,6 @@ Here is an example of `param.json` for QM7 dataset: }, "_comment": "that's all" }, - "use_clusters": true, "fp_style": "gaussian", "shuffle_poscar": false, "fp_task_max": 1000, @@ -1109,7 +1107,7 @@ Here is an example of `param.json` for QM7 dataset: } ``` -Here `pick_data` is the data to simplify and currently only supports `MultiSystems` containing `System` with `deepmd/npy` format, and `use_clusters` should always be `true`. `init_pick_number` and `iter_pick_number` are the numbers of picked frames. `e_trust_lo`, `e_trust_hi` mean the range of the deviation of the frame energy, and `f_trust_lo` and `f_trust_hi` mean the range of the max deviation of atomic forces in a frame. `fp_style` can only be `gaussian` currently. Other parameters are as the same as those of generator. +Here `pick_data` is the directory to data to simplify where the program recursively detects systems `System` with `deepmd/npy` format. `init_pick_number` and `iter_pick_number` are the numbers of picked frames. `e_trust_lo`, `e_trust_hi` mean the range of the deviation of the frame energy, and `f_trust_lo` and `f_trust_hi` mean the range of the max deviation of atomic forces in a frame. `fp_style` can only be `gaussian` currently. Other parameters are as the same as those of generator. ## Set up machine diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index d06c137b3..1bd196cc6 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -61,7 +61,7 @@ from dpgen.generator.lib.ele_temp import NBandsEsti from dpgen.remote.decide_machine import convert_mdata from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks, make_dispatcher, make_submission -from dpgen.util import sepline +from dpgen.util import sepline, expand_sys_str from dpgen import ROOT_PATH from pymatgen.io.vasp import Incar,Kpoints,Potcar from dpgen.auto_test.lib.vasp import make_kspacing_kpoints @@ -288,13 +288,10 @@ def make_train (iter_index, # make sure all init_data_sys has the batch size -- for the following `zip` assert (len(init_data_sys_) <= len(init_batch_size_)) for ii, ss in zip(init_data_sys_, init_batch_size_) : - if jdata.get('init_multi_systems', False): - for single_sys in os.listdir(os.path.join(work_path, 'data.init', ii)): - init_data_sys.append(os.path.join('..', 'data.init', ii, single_sys)) - init_batch_size.append(detect_batch_size(ss, os.path.join(work_path, 'data.init', ii, single_sys))) - else: - init_data_sys.append(os.path.join('..', 'data.init', ii)) - init_batch_size.append(detect_batch_size(ss, os.path.join(work_path, 'data.init', ii))) + sys_paths = expand_sys_str(os.path.join(init_data_prefix, ii)) + for single_sys in sys_paths: + init_data_sys.append(os.path.normpath(os.path.join('..', 'data.init', ii, os.path.relpath(single_sys, os.path.join(init_data_prefix, ii))))) + init_batch_size.append(detect_batch_size(ss, single_sys)) old_range = None if iter_index > 0 : for ii in range(iter_index) : @@ -308,25 +305,16 @@ def make_train (iter_index, sys_batch_size = ["auto" for aa in range(len(sys_list))] for jj in fp_data_sys : sys_idx = int(jj.split('.')[-1]) - if jdata.get('use_clusters', False): - nframes = 0 - for sys_single in os.listdir(jj): - tmp_box = np.loadtxt(os.path.join(jj, sys_single, 'box.raw')) - tmp_box = np.reshape(tmp_box, [-1,9]) - nframes += tmp_box.shape[0] - if nframes < fp_task_min : - log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj)) - continue - for sys_single in os.listdir(jj): - init_data_sys.append(os.path.join('..', 'data.iters', jj, sys_single)) - init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], os.path.join(jj, sys_single))) - else: - nframes = dpdata.System(jj, 'deepmd/npy').get_nframes() - if nframes < fp_task_min : - log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj)) - continue - init_data_sys.append(os.path.join('..', 'data.iters', jj)) - init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], jj)) + sys_paths = expand_sys_str(jj) + nframes = 0 + for sys_single in sys_paths: + nframes += dpdata.LabeledSystem(sys_single, fmt="deepmd/npy").get_nframes() + if nframes < fp_task_min : + log_task('nframes (%d) in data sys %s is too small, skip' % (nframes, jj)) + continue + for sys_single in sys_paths: + init_data_sys.append(os.path.normpath(os.path.join('..', 'data.iters', sys_single))) + init_batch_size.append(detect_batch_size(sys_batch_size[sys_idx], sys_single)) # establish tasks jinput = jdata['default_training_param'] try: @@ -568,25 +556,17 @@ def run_train (iter_index, os.chdir(work_path) fp_data = glob.glob(os.path.join('data.iters', 'iter.*', '02.fp', 'data.*')) for ii in init_data_sys : - if jdata.get('init_multi_systems', False): - for single_sys in os.listdir(os.path.join(ii)): - trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'set.*')) - trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'type*.raw')) - trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'nopbc')) - else: - trans_comm_data += glob.glob(os.path.join(ii, 'set.*')) - trans_comm_data += glob.glob(os.path.join(ii, 'type*.raw')) - trans_comm_data += glob.glob(os.path.join(ii, 'nopbc')) + sys_paths = expand_sys_str(ii) + for single_sys in sys_paths: + trans_comm_data += glob.glob(os.path.join(single_sys, 'set.*')) + trans_comm_data += glob.glob(os.path.join(single_sys, 'type*.raw')) + trans_comm_data += glob.glob(os.path.join(single_sys, 'nopbc')) for ii in fp_data : - if jdata.get('use_clusters', False): - for single_sys in os.listdir(os.path.join(ii)): - trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'set.*')) - trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'type*.raw')) - trans_comm_data += glob.glob(os.path.join(ii, single_sys, 'nopbc')) - else: - trans_comm_data += glob.glob(os.path.join(ii, 'set.*')) - trans_comm_data += glob.glob(os.path.join(ii, 'type*.raw')) - trans_comm_data += glob.glob(os.path.join(ii, 'nopbc')) + sys_paths = expand_sys_str(ii) + for single_sys in sys_paths: + trans_comm_data += glob.glob(os.path.join(single_sys, 'set.*')) + trans_comm_data += glob.glob(os.path.join(single_sys, 'type*.raw')) + trans_comm_data += glob.glob(os.path.join(single_sys, 'nopbc')) os.chdir(cwd) try: diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py index 982db3114..529401519 100644 --- a/dpgen/simplify/simplify.py +++ b/dpgen/simplify/simplify.py @@ -9,6 +9,7 @@ 02: fp (optional, if the original dataset do not have fp data, same as generator) """ import logging +import warnings import queue import os import json @@ -21,7 +22,7 @@ from dpgen import dlog from dpgen import SHORT_CMD -from dpgen.util import sepline +from dpgen.util import sepline, expand_sys_str from distutils.version import LooseVersion from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks, make_dispatcher, make_submission from dpgen.generator.run import make_train, run_train, post_train, run_fp, post_fp, fp_name, model_devi_name, train_name, train_task_fmt, sys_link_fp_vasp_pp, make_fp_vasp_incar, make_fp_vasp_kp, make_fp_vasp_cp_cvasp, data_system_fmt, model_devi_task_fmt, fp_task_fmt @@ -38,17 +39,6 @@ sys_name_fmt = 'sys.' + data_system_fmt sys_name_pattern = 'sys.[0-9]*[0-9]' -def expand_sys_str(root_dir): - matches = [] - for root, dirnames, filenames in os.walk(root_dir, followlinks=True): - for filename in fnmatch.filter(filenames, 'type.raw'): - matches.append(root) - matches.sort() - dirnames = [os.path.basename(ii) for ii in matches] - if (len(list(set(dirnames))) != len(matches)) : - raise RuntimeError('duplicated system name: it is highly recommend to place all systems in the same level of directory and has different names') - return matches - def get_system_cls(jdata): if jdata.get("labeled", False): @@ -58,28 +48,12 @@ def get_system_cls(jdata): def get_multi_system(path, jdata): system = get_system_cls(jdata) + system_paths = expand_sys_str(path) systems = dpdata.MultiSystems( - *[system(os.path.join(path, s), fmt='deepmd/npy') for s in os.listdir(path)]) - return systems - - -def get_systems(path, jdata): - system_cls = get_system_cls(jdata) - system_paths = expand_sys_str(path) - systems = {} - for ii in system_paths: - systems[os.path.basename(ii)] = system_cls(ii, fmt='deepmd/npy') + *[system(s, fmt='deepmd/npy') for s in system_paths]) return systems -def get_system_idx(path): - system_paths = expand_sys_str(path) - sys_idx_map = {} - for idx,ii in enumerate(system_paths): - sys_idx_map[os.path.basename(ii)] = idx - return sys_idx_map - - def init_model(iter_index, jdata, mdata): training_init_model = jdata.get('training_init_model', False) if not training_init_model: @@ -111,20 +85,13 @@ def init_pick(iter_index, jdata, mdata): """pick up init data from dataset randomly""" pick_data = jdata['pick_data'] init_pick_number = jdata['init_pick_number'] - use_clusters = jdata.get('use_clusters', False) # use MultiSystems with System # TODO: support System and LabeledSystem # TODO: support other format - if use_clusters: - systems = get_multi_system(pick_data, jdata) - else: - systems = get_systems(pick_data, jdata) + systems = get_multi_system(pick_data, jdata) # label the system labels = [] - if use_clusters: - items = systems.systems.items() - else: - items = systems.items() + items = systems.systems.items() for key, system in items: labels.extend([(key, j) for j in range(len(system))]) @@ -146,48 +113,18 @@ def init_pick(iter_index, jdata, mdata): _init_dump_selected_frames(systems, labels, rest_idx, sys_data_path, jdata) -def _add_system(systems, key, system): - if key in systems.keys(): - systems[key].append(system) - else: - systems[key] = system - return systems - - def _init_dump_selected_frames(systems, labels, selc_idx, sys_data_path, jdata): - pick_data = jdata['pick_data'] - use_clusters = jdata.get('use_clusters', False) - if use_clusters: - selc_systems = dpdata.MultiSystems() - for j in selc_idx: - sys_name, sys_id = labels[j] - selc_systems.append(systems[sys_name][sys_id]) - selc_systems.to_deepmd_raw(sys_data_path) - selc_systems.to_deepmd_npy(sys_data_path, set_size=selc_idx.size) - else: - selc_systems = {} - for j in selc_idx: - sys_name, sys_id = labels[j] - selc_systems = _add_system(selc_systems, sys_name, systems[sys_name][sys_id]) - sys_idx_map = get_system_idx(pick_data) - for kk in selc_systems.keys(): - sub_path = os.path.join(sys_data_path, sys_name_fmt % sys_idx_map[kk]) - selc_systems[kk].to_deepmd_raw(sub_path) - selc_systems[kk].to_deepmd_npy(sub_path, set_size=selc_idx.size) - with open(os.path.join(sys_data_path, 'sys_idx_map.json'), 'w') as fp: - json.dump(sys_idx_map, fp, indent=4) - -def _dump_system_dict(systems, path): - for kk in systems: - sub_path = os.path.join(path, sys_name_fmt % (int(kk))) - systems[kk].to_deepmd_raw(sub_path) - systems[kk].to_deepmd_npy(sub_path, set_size=systems[kk].get_nframes()) + selc_systems = dpdata.MultiSystems() + for j in selc_idx: + sys_name, sys_id = labels[j] + selc_systems.append(systems[sys_name][sys_id]) + selc_systems.to_deepmd_raw(sys_data_path) + selc_systems.to_deepmd_npy(sys_data_path, set_size=selc_idx.size) def make_model_devi(iter_index, jdata, mdata): """calculate the model deviation of the rest idx""" pick_data = jdata['pick_data'] - use_clusters = jdata.get('use_clusters', False) iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, model_devi_name) create_path(work_path) @@ -203,25 +140,7 @@ def make_model_devi(iter_index, jdata, mdata): rest_data_path = os.path.join(last_iter_name, model_devi_name, rest_data_name) if not os.path.exists(rest_data_path): return False - if use_clusters: - for jj, subsystem in enumerate(os.listdir(rest_data_path)): - task_name = "task." + model_devi_task_fmt % (0, jj) - task_path = os.path.join(work_path, task_name) - create_path(task_path) - os.symlink(os.path.abspath(os.path.join(rest_data_path, subsystem)), - os.path.abspath(os.path.join(task_path, rest_data_name))) - else: - rest_data_path = os.path.abspath(rest_data_path) - sys_path = glob.glob(os.path.join(rest_data_path, sys_name_pattern)) - cwd = os.getcwd() - for ii in sys_path: - task_name = "task." + model_devi_task_fmt % (int(os.path.basename(ii).split('.')[1]), 0) - task_path = os.path.join(work_path, task_name) - create_path(task_path) - os.chdir(task_path) - os.symlink(os.path.relpath(ii), rest_data_name) - os.chdir(cwd) - os.chdir(cwd) + os.symlink(os.path.abspath(rest_data_path), os.path.join(work_path, rest_data_name + ".old")) return True @@ -231,43 +150,28 @@ def run_model_devi(iter_index, jdata, mdata): work_path = os.path.join(iter_name, model_devi_name) # generate command commands = [] - tasks = glob.glob(os.path.join(work_path, "task.*")) - run_tasks = [os.path.basename(ii) for ii in tasks] + run_tasks = ["."] # get models models = glob.glob(os.path.join(work_path, "graph*pb")) model_names = [os.path.basename(ii) for ii in models] task_model_list = [] for ii in model_names: - task_model_list.append(os.path.join('..', ii)) - # get max data size - data_size = max([len(dpdata.System(os.path.join( - task, rest_data_name), fmt="deepmd/npy")) for task in tasks]) + task_model_list.append(os.path.join('.', ii)) # models commands = [] - detail_file_names = [] - for ii, mm in enumerate(task_model_list): - detail_file_name = "{prefix}-{ii}".format( - prefix=detail_file_name_prefix, - ii=ii, - ) - # TODO: support 0.x? - command = "{python} -m deepmd test -m {model} -s {system} -n {numb_test} -d {detail_file}".format( - python=mdata['python_test_path'], - model=mm, - system=rest_data_name, - numb_test=data_size, - detail_file=detail_file_name, - ) - commands.append(command) - detail_file_names.append(detail_file_name) + detail_file_name = detail_file_name_prefix + command = "{dp} model-devi -m {model} -s {system} -o {detail_file}".format( + dp=mdata.get('model_devi_command', 'dp'), + model=" ".join(task_model_list), + system=rest_data_name + ".old", + detail_file=detail_file_name, + ) + commands = [command] # submit - try: - model_devi_group_size = mdata['model_devi_group_size'] - except Exception: - model_devi_group_size = 1 + model_devi_group_size = mdata.get('model_devi_group_size', 1) - forward_files = [rest_data_name] - backward_files = sum([[pf+".e.out", pf+".f.out", pf+".v.out"] for pf in detail_file_names], []) + forward_files = [rest_data_name + ".old"] + backward_files = [detail_file_name] api_version = mdata.get('api_version', '0.9') if LooseVersion(api_version) < LooseVersion('1.0'): @@ -303,102 +207,50 @@ def run_model_devi(iter_index, jdata, mdata): def post_model_devi(iter_index, jdata, mdata): """calculate the model deviation""" - use_clusters = jdata.get('use_clusters', False) iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, model_devi_name) - tasks = glob.glob(os.path.join(work_path, "task.*")) - tasks.sort() - - e_trust_lo = jdata['e_trust_lo'] - e_trust_hi = jdata['e_trust_hi'] - f_trust_lo = jdata['f_trust_lo'] - f_trust_hi = jdata['f_trust_hi'] - - if use_clusters: - sys_accurate = dpdata.MultiSystems() - sys_candinate = dpdata.MultiSystems() - sys_failed = dpdata.MultiSystems() - else: - sys_accurate = {} - sys_candinate = {} - sys_failed = {} - all_names = set() - - for task in tasks: - if not use_clusters: - sys_name = os.path.basename(task).split('.')[1] - all_names.add(sys_name) - # e.out - details_e = glob.glob(os.path.join(task, "{}-*.e.out".format(detail_file_name_prefix))) - e_all = np.array([np.loadtxt(detail_e, ndmin=2)[:, 1] for detail_e in details_e]) - e_std = np.std(e_all, axis=0) - n_frame = e_std.size - - # f.out - details_f = glob.glob(os.path.join(task, "{}-*.f.out".format(detail_file_name_prefix))) - f_all = np.array([np.loadtxt(detail_f, ndmin=2)[:, 3:6].reshape((n_frame, -1, 3)) for detail_f in details_f]) - # (n_model, n_frame, n_atom, 3) - f_std = np.std(f_all, axis=0) - # (n_frame, n_atom, 3) - f_std = np.linalg.norm(f_std, axis=2) - # (n_frame, n_atom) - f_std = np.max(f_std, axis=1) - # (n_frame,) - - system_cls = get_system_cls(jdata) - for subsys, e_devi, f_devi in zip(system_cls(os.path.join(task, rest_data_name), fmt='deepmd/npy'), e_std, f_std): - if (e_devi < e_trust_hi and e_devi >= e_trust_lo) or (f_devi < f_trust_hi and f_devi >= f_trust_lo) : - if use_clusters: + + f_trust_lo = jdata['model_devi_f_trust_lo'] + f_trust_hi = jdata['model_devi_f_trust_hi'] + + sys_accurate = dpdata.MultiSystems() + sys_candinate = dpdata.MultiSystems() + sys_failed = dpdata.MultiSystems() + + sys_entire = dpdata.MultiSystems().from_deepmd_npy(os.path.join(work_path, rest_data_name + ".old")) + + detail_file_name = detail_file_name_prefix + with open(os.path.join(work_path, detail_file_name)) as f: + for line in f: + if line.startswith("# data.rest.old"): + name = (line.split()[1]).split("/")[-1] + elif line.startswith("#"): + pass + else: + idx = int(line.split()[0]) + f_devi = float(line.split()[4]) + subsys = sys_entire[name][idx] + if f_trust_lo <= f_devi < f_trust_hi: sys_candinate.append(subsys) - else: - sys_candinate = _add_system(sys_candinate, sys_name, subsys) - elif (e_devi >= e_trust_hi ) or (f_devi >= f_trust_hi ): - if use_clusters: + elif f_devi >= f_trust_hi: sys_failed.append(subsys) - else: - sys_failed = _add_system(sys_failed, sys_name, subsys) - elif (e_devi < e_trust_lo and f_devi < f_trust_lo ): - if use_clusters: + elif f_devi < f_trust_lo: sys_accurate.append(subsys) else: - sys_accurate = _add_system(sys_accurate, sys_name, subsys) - else: - raise RuntimeError('reach a place that should NOT be reached...') - if use_clusters: - counter = {"candidate": sys_candinate.get_nframes(), "accurate": sys_accurate.get_nframes(), "failed": sys_failed.get_nframes()} - fp_sum = sum(counter.values()) - for cc_key, cc_value in counter.items(): - dlog.info("{0:9s} : {1:6d} in {2:6d} {3:6.2f} %".format(cc_key, cc_value, fp_sum, cc_value/fp_sum*100)) - else: - all_names = list(all_names) - all_names.sort() - counter = {"candidate": 0, "accurate": 0, "failed": 0} - for kk in all_names: - sys_counter = {"candidate": 0, "accurate": 0, "failed": 0} - if kk in sys_candinate.keys(): - sys_counter['candidate'] += sys_candinate[kk].get_nframes() - if kk in sys_accurate.keys(): - sys_counter['accurate'] += sys_accurate[kk].get_nframes() - if kk in sys_failed.keys(): - sys_counter['failed'] += sys_failed[kk].get_nframes() - fp_sum = sum(sys_counter.values()) - for cc_key, cc_value in sys_counter.items(): - if fp_sum != 0: - dlog.info("sys{0:s} {1:9s} : {2:6d} in {3:6d} {4:6.2f} %".format(kk, cc_key, cc_value, fp_sum, cc_value/fp_sum*100)) - else: - dlog.info("sys{0:s} {1:9s} : {2:6d} in {3:6d} {4:6.2f} %".format(kk, cc_key, cc_value, fp_sum, 0*100)) - for ii in ['candidate', 'accurate', 'failed']: - counter[ii] += sys_counter[ii] + raise RuntimeError('reach a place that should NOT be reached...') + + counter = {"candidate": sys_candinate.get_nframes(), "accurate": sys_accurate.get_nframes(), "failed": sys_failed.get_nframes()} + fp_sum = sum(counter.values()) + for cc_key, cc_value in counter.items(): + dlog.info("{0:9s} : {1:6d} in {2:6d} {3:6.2f} %".format(cc_key, cc_value, fp_sum, cc_value/fp_sum*100)) if counter['candidate'] == 0 and counter['failed'] > 0: raise RuntimeError('no candidate but still have failed cases, stop. You may want to refine the training or to increase the trust level hi') # label the candidate system labels = [] - if use_clusters: - items = sys_candinate.systems.items() - else: - items = sys_candinate.items() + items = sys_candinate.systems.items() + for key, system in items: labels.extend([(key, j) for j in range(len(system))]) # candinate: pick up randomly @@ -412,112 +264,61 @@ def post_model_devi(iter_index, jdata, mdata): (counter['candidate'], len(pick_idx), float(len(pick_idx))/counter['candidate']*100., len(rest_idx), float(len(rest_idx))/counter['candidate']*100.)) # dump the picked candinate data - if use_clusters: - picked_systems = dpdata.MultiSystems() - for j in pick_idx: - sys_name, sys_id = labels[j] - picked_systems.append(sys_candinate[sys_name][sys_id]) - sys_data_path = os.path.join(work_path, picked_data_name) - picked_systems.to_deepmd_raw(sys_data_path) - picked_systems.to_deepmd_npy(sys_data_path, set_size=iter_pick_number) - else: - selc_systems = {} - for j in pick_idx: - sys_name, sys_id = labels[j] - selc_systems = _add_system(selc_systems, sys_name, sys_candinate[sys_name][sys_id]) - sys_data_path = os.path.join(work_path, picked_data_name) - _dump_system_dict(selc_systems, sys_data_path) + picked_systems = dpdata.MultiSystems() + for j in pick_idx: + sys_name, sys_id = labels[j] + picked_systems.append(sys_candinate[sys_name][sys_id]) + sys_data_path = os.path.join(work_path, picked_data_name) + picked_systems.to_deepmd_raw(sys_data_path) + picked_systems.to_deepmd_npy(sys_data_path, set_size=iter_pick_number) + # dump the rest data (not picked candinate data and failed data) - if use_clusters: - rest_systems = dpdata.MultiSystems() - for j in rest_idx: - sys_name, sys_id = labels[j] - rest_systems.append(sys_candinate[sys_name][sys_id]) - rest_systems += sys_failed - sys_data_path = os.path.join(work_path, rest_data_name) - rest_systems.to_deepmd_raw(sys_data_path) + rest_systems = dpdata.MultiSystems() + for j in rest_idx: + sys_name, sys_id = labels[j] + rest_systems.append(sys_candinate[sys_name][sys_id]) + rest_systems += sys_failed + sys_data_path = os.path.join(work_path, rest_data_name) + rest_systems.to_deepmd_raw(sys_data_path) + if rest_idx.size: rest_systems.to_deepmd_npy(sys_data_path, set_size=rest_idx.size) - else: - selc_systems = {} - for j in rest_idx: - sys_name, sys_id = labels[j] - selc_systems = _add_system(selc_systems, sys_name, sys_candinate[sys_name][sys_id]) - for kk in sys_failed.keys(): - selc_systems = _add_system(selc_systems, kk, sys_failed[kk]) - sys_data_path = os.path.join(work_path, rest_data_name) - _dump_system_dict(selc_systems, sys_data_path) + # dump the accurate data -- to another directory - if use_clusters: - sys_data_path = os.path.join(work_path, accurate_data_name) - sys_accurate.to_deepmd_raw(sys_data_path) - sys_accurate.to_deepmd_npy(sys_data_path, set_size=sys_accurate.get_nframes()) - else: - sys_data_path = os.path.join(work_path, accurate_data_name) - _dump_system_dict(sys_accurate, sys_data_path) + sys_data_path = os.path.join(work_path, accurate_data_name) + sys_accurate.to_deepmd_raw(sys_data_path) + sys_accurate.to_deepmd_npy(sys_data_path, set_size=sys_accurate.get_nframes()) def make_fp_labeled(iter_index, jdata): dlog.info("already labeled, skip make_fp and link data directly") pick_data = jdata['pick_data'] - use_clusters = jdata.get('use_clusters', False) iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, fp_name) create_path(work_path) picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name) - if use_clusters: - os.symlink(os.path.abspath(picked_data_path), os.path.abspath( - os.path.join(work_path, "task." + data_system_fmt % 0))) - os.symlink(os.path.abspath(picked_data_path), os.path.abspath( - os.path.join(work_path, "data." + data_system_fmt % 0))) - else: - picked_data_path = os.path.abspath(picked_data_path) - sys_path = glob.glob(os.path.join(picked_data_path, sys_name_pattern)) - cwd = os.getcwd() - os.chdir(work_path) - for ii in sys_path: - sys_idx = os.path.basename(ii).split('.')[1] - data_dir = 'data.' + data_system_fmt % int(sys_idx) - task_dir = 'task.' + data_system_fmt % int(sys_idx) - os.symlink(os.path.relpath(ii), data_dir) - os.symlink(os.path.relpath(ii), task_dir) - os.chdir(cwd) + os.symlink(os.path.abspath(picked_data_path), os.path.abspath( + os.path.join(work_path, "task." + data_system_fmt % 0))) + os.symlink(os.path.abspath(picked_data_path), os.path.abspath( + os.path.join(work_path, "data." + data_system_fmt % 0))) def make_fp_configs(iter_index, jdata): pick_data = jdata['pick_data'] - use_clusters = jdata.get('use_clusters', False) iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, fp_name) create_path(work_path) picked_data_path = os.path.join(iter_name, model_devi_name, picked_data_name) - if use_clusters: - systems = get_multi_system(picked_data_path, jdata) - jj = 0 - for system in systems: - for subsys in system: - task_name = "task." + fp_task_fmt % (0, jj) - task_path = os.path.join(work_path, task_name) - create_path(task_path) - subsys.to('vasp/poscar', os.path.join(task_path, 'POSCAR')) - jj += 1 - else: - picked_data_path = os.path.abspath(picked_data_path) - sys_path = glob.glob(os.path.join(picked_data_path, sys_name_pattern)) - for ii in sys_path: - tmp_sys = dpdata.System(ii, fmt = 'deepmd/npy') - sys_idx = os.path.basename(ii).split('.')[1] - jj = 0 - for ss in tmp_sys: - task_name = "task." + fp_task_fmt % (int(sys_idx), jj) - task_path = os.path.join(work_path, task_name) - create_path(task_path) - ss.to('vasp/poscar', os.path.join(task_path, 'POSCAR')) - job = {} - with open(os.path.join(task_path, 'job.json'), 'w') as fp: - json.dump(job, fp, indent=4) - jj += 1 + systems = get_multi_system(picked_data_path, jdata) + jj = 0 + for system in systems: + for subsys in system: + task_name = "task." + fp_task_fmt % (0, jj) + task_path = os.path.join(work_path, task_name) + create_path(task_path) + subsys.to('vasp/poscar', os.path.join(task_path, 'POSCAR')) + jj += 1 def make_fp_gaussian(iter_index, jdata): diff --git a/dpgen/util.py b/dpgen/util.py index aa805e7e5..9491cdc30 100644 --- a/dpgen/util.py +++ b/dpgen/util.py @@ -1,5 +1,7 @@ #!/usr/bin/env python # coding: utf-8 +from typing import Union, List +from pathlib import Path from dpgen import dlog @@ -25,3 +27,23 @@ def box_center(ch='',fill=' ',sp="|"): ''' strs=ch.center(Len,fill) dlog.info(sp+strs[1:len(strs)-1:]+sp) + + +def expand_sys_str(root_dir: Union[str, Path]) -> List[str]: + """Recursively iterate over directories taking those that contain `type.raw` file. + + Parameters + ---------- + root_dir : Union[str, Path] + starting directory + + Returns + ------- + List[str] + list of string pointing to system directories + """ + root_dir = Path(root_dir) + matches = [str(d) for d in root_dir.rglob("*") if (d / "type.raw").is_file()] + if (root_dir / "type.raw").is_file(): + matches.append(str(root_dir)) + return matches From c25cea3aac2e84f86f481afb5663d6bc87252bbc Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 9 Jun 2022 00:53:16 -0400 Subject: [PATCH 06/25] supports non-list mdata (#748) * supports non-list mdata The mdata of a task is a list of a single dict. This "list" looks useless and not clear enough. So this commit supports using the dict without a list. Note that old parameters are still supported, so no breaking changes are made. The "list" is just removed from all examples. Both list and non-list parameters are in the unittest. * fix typo --- README.md | 26 +++++++------------ doc/run/example-of-machine.md | 11 +++----- dpgen/remote/decide_machine.py | 13 +++++++--- .../machine/DeePMD-kit-1.x/machine-ali.json | 13 ++++------ .../machine-lsf-slurm-cp2k.json | 13 ++++------ .../DeePMD-kit-1.x/machine-slurm-qe.json | 12 +++------ .../DeePMD-kit-2.x/lebesgue_v2_machine.json | 11 ++++---- tests/tools/machine_fp_single2.json | 14 ++++++++++ tests/tools/test_convert_mdata.py | 10 +++++-- 9 files changed, 65 insertions(+), 58 deletions(-) create mode 100644 tests/tools/machine_fp_single2.json diff --git a/README.md b/README.md index c833ed059..fb280e226 100644 --- a/README.md +++ b/README.md @@ -1137,7 +1137,7 @@ an example of new dpgen's machine.json ```json { "api_version": "1.0", - "train": [ + "train": { "command": "dp", "machine": { @@ -1161,9 +1161,8 @@ an example of new dpgen's machine.json "para_deg": 3, "source_list": ["/home/user1234/deepmd.1.2.4.env"] } - } - ], - "model_devi":[ + }, + "model_devi": { "command": "lmp", "machine":{ @@ -1184,9 +1183,8 @@ an example of new dpgen's machine.json "group_size": 5, "source_list": ["/home/user1234/deepmd.1.2.4.env"] } - } - ], - "fp":[ + }, + "fp": { "command": "vasp_std", "machine":{ @@ -1208,7 +1206,6 @@ an example of new dpgen's machine.json "source_list": ["~/vasp.env"] } } - ] } ``` note1: the key "local_root" in dpgen's machine.json is always `./` @@ -1220,7 +1217,7 @@ When switching into a new machine, you may modifying the `MACHINE`, according to An example for `MACHINE` is: ```json { - "train": [ + "train": { "machine": { "batch": "slurm", @@ -1243,9 +1240,8 @@ An example for `MACHINE` is: "qos": "data" }, "command": "USERPATH/dp" - } - ], - "model_devi": [ + }, + "model_devi": { "machine": { "batch": "slurm", @@ -1269,9 +1265,8 @@ An example for `MACHINE` is: }, "command": "lmp_serial", "group_size": 1 - } - ], - "fp": [ + }, + "fp": { "machine": { "batch": "slurm", @@ -1298,7 +1293,6 @@ An example for `MACHINE` is: "command": "vasp_gpu", "group_size": 1 } - ] } ``` Following table illustrates which key is needed for three types of machine: `train`,`model_devi` and `fp`. Each of them is a list of dicts. Each dict can be considered as an independent environmnet for calculation. diff --git a/doc/run/example-of-machine.md b/doc/run/example-of-machine.md index 569f85026..247c50e4f 100644 --- a/doc/run/example-of-machine.md +++ b/doc/run/example-of-machine.md @@ -20,7 +20,7 @@ In this section, we will show you how to perform train task at a local workstati In this example, we perform the `train` task on a local workstation. ```json -"train": [ +"train": { "command": "dp", "machine": { @@ -36,8 +36,7 @@ In this example, we perform the `train` task on a local workstation. "group_size": 1, "source_list": ["/home/user1234/deepmd.env"] } - } - ], + }, ``` The "command" for the train task in the DeePMD-kit is "dp". @@ -51,7 +50,7 @@ In the resources parameter, "number_node", "cpu_per_node", and "gpu_per_node" sp In this example, we perform the model_devi task at a local Slurm workstation. ```json -"model_devi": [ +"model_devi": { "command": "lmp", "machine": { @@ -70,7 +69,6 @@ In this example, we perform the model_devi task at a local Slurm workstation. "source_list": ["/home/user1234/lammps.env"] } } -], ``` The "command" for the model_devi task in the LAMMPS is "lmp". @@ -84,7 +82,7 @@ In the resources parameter, we specify the name of the queue to which the task i In this example, we perform the fp task at a remote PBS cluster that can be accessed via SSH. ```json -"fp": [ +"fp": { "command": "mpirun -n 32 vasp_std", "machine": { @@ -106,7 +104,6 @@ In this example, we perform the fp task at a remote PBS cluster that can be acce "source_list": ["/home/user1234/vasp.env"] } } -], ``` VASP code is used for fp task and mpi is used for parallel computing, so "mpirun -n 32" is added to specify the number of parallel threads. diff --git a/dpgen/remote/decide_machine.py b/dpgen/remote/decide_machine.py index 31691f322..c551be44b 100644 --- a/dpgen/remote/decide_machine.py +++ b/dpgen/remote/decide_machine.py @@ -36,11 +36,18 @@ def convert_mdata(mdata, task_types=["train", "model_devi", "fp"]): ''' for task_type in task_types: if task_type in mdata: - for key, item in mdata[task_type][0].items(): + if isinstance(mdata[task_type], dict): + task_data = mdata[task_type] + elif isinstance(mdata[task_type], (list, tuple)): + task_data = mdata[task_type][0] + else: + raise TypeError("mdata/%s should be dict or list!" % task_type) + for key, item in task_data.items(): if "comments" not in key: mdata[task_type + "_" + key] = item - group_size = mdata[task_type][0]["resources"].get("group_size", 1) - if group_size == 1: group_size = mdata[task_type][0].get("group_size", 1) + group_size = task_data["resources"].get("group_size", 1) + if group_size == 1: + group_size = task_data.get("group_size", 1) mdata[task_type + "_" + "group_size"] = group_size return mdata diff --git a/examples/machine/DeePMD-kit-1.x/machine-ali.json b/examples/machine/DeePMD-kit-1.x/machine-ali.json index a2a338af4..e78fc9dd4 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-ali.json +++ b/examples/machine/DeePMD-kit-1.x/machine-ali.json @@ -1,5 +1,5 @@ { - "train": [ + "train": { "machine": { "batch": "shell", @@ -34,10 +34,9 @@ }, "command": "/root/deepmd-kit/bin/dp", "group_size": 2 - } - ], + }, - "model_devi": [ + "model_devi": { "machine": { "batch": "shell", @@ -71,10 +70,9 @@ }, "command": "/root/deepmd-kit/bin/lmp", "group_size": 2 - } - ], + }, - "fp": [ + "fp": { "machine": { "batch": "shell", @@ -108,7 +106,6 @@ "command": "mpirun -n 16 /root/deepmd-pkg/vasp.5.4.4/bin/vasp_std", "group_size": 1 } - ] } diff --git a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json index b56d022ec..4fb5845ee 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json +++ b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json @@ -1,5 +1,5 @@ { - "train": [ + "train": { "machine": { "machine_type": "slurm", @@ -25,9 +25,8 @@ "submit_wait_time": 60 }, "python_path": "/share/apps/deepmd/compress/bin/python3.8" - } - ], - "model_devi": [ + }, + "model_devi": { "machine": { "machine_type": "slurm", @@ -54,9 +53,8 @@ }, "command": "lmp_mpi", "group_size": 5 - } - ], - "fp": [ + }, + "fp": { "machine": { "machine_type": "lsf", @@ -87,5 +85,4 @@ "command": "cp2k.popt -i input.inp", "group_size": 50 } - ] } diff --git a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json index 22a3fdbbd..5f15303d6 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json +++ b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json @@ -1,7 +1,7 @@ { "_comment" : "This is an example of DP-GEN on Slurm", "_comment" : "Last updated on 2021.4.30 for DP-GEN 0.9.2 by Yuzhi Zhang", - "train" :[ + "train" : { "_comment" : "Specify the installed path of DeePMD-kit", "_comment" : "The version of DeePMD-kit should be 1.*", @@ -49,10 +49,9 @@ "time_limit": "23:0:0", "_comment": "that's all" } - } - ], + }, - "model_devi": [ + "model_devi": { "machine": { "machine_type": "slurm", @@ -81,10 +80,8 @@ "command": "lmp_serial", "_comment" : "DP-GEN will put 5 tasks together in one submitting script.", "group_size": 5 - } - ], + }, "fp": - [ { "machine": { "machine_type": "slurm", @@ -113,5 +110,4 @@ "command": "mpirun -n 8 /gpfs/share/home/1600017784/yuzhi/soft/QE-mpi/PW/src/pw.x < input", "group_size": 1 } - ] } diff --git a/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json b/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json index 6b9ead467..0ecba4fa6 100644 --- a/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json +++ b/examples/machine/DeePMD-kit-2.x/lebesgue_v2_machine.json @@ -1,7 +1,7 @@ { "api_version": "1.0", "deepmd_version": "2.0.1", - "train" :[ + "train" : { "command": "dp", "machine": { @@ -34,9 +34,9 @@ "queue_name": "GPU", "group_size": 1 } - }], + }, "model_devi": - [{ + { "command": "lmp -i input.lammps -v restart 0", "machine": { "batch_type": "DpCloudServer", @@ -68,9 +68,9 @@ "queue_name": "GPU", "group_size": 5 } - }], + }, "fp": - [{ + { "command": "mpirun -n 16 vasp_std", "machine": { "batch_type": "DpCloudServer", @@ -104,5 +104,4 @@ "source_list": ["/opt/intel/oneapi/setvars.sh"] } } - ] } diff --git a/tests/tools/machine_fp_single2.json b/tests/tools/machine_fp_single2.json new file mode 100644 index 000000000..8c2212927 --- /dev/null +++ b/tests/tools/machine_fp_single2.json @@ -0,0 +1,14 @@ +{ + "fp": + { + "command": "vasp_std", + "machine":{ + "batch_type": "PBS" + }, + "resources": { + "group_size" : 8 + }, + "_comments" : "In user_forward_files, define input files to be uploaded.", + "user_forward_files" : ["vdw_kernel.bindat"] + } +} \ No newline at end of file diff --git a/tests/tools/test_convert_mdata.py b/tests/tools/test_convert_mdata.py index 5458b0faa..5dc1b944e 100644 --- a/tests/tools/test_convert_mdata.py +++ b/tests/tools/test_convert_mdata.py @@ -6,12 +6,18 @@ __package__ = 'tools' from dpgen.remote.decide_machine import convert_mdata from .context import setUpModule -machine_file = 'machine_fp_single.json' + class TestConvertMdata(unittest.TestCase): + machine_file = 'machine_fp_single.json' + def test_convert_mdata (self): - mdata = json.load(open(machine_file)) + mdata = json.load(open(self.machine_file)) mdata = convert_mdata(mdata, ["fp"]) self.assertEqual(mdata["fp_command"], "vasp_std") self.assertEqual(mdata["fp_group_size"], 8) self.assertEqual(mdata["fp_machine"]["batch_type"], "PBS") self.assertEqual(mdata["fp_user_forward_files"], ["vdw_kernel.bindat"]) + + +class TestConvertMdata2(TestConvertMdata): + machine_file = 'machine_fp_single2.json' From 828024b4696b2e06dfa72d2694aae541f2003e3e Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 9 Jun 2022 00:53:45 -0400 Subject: [PATCH 07/25] upgrade all tasks to dpdispatcher (#749) * upgrade all tasks to dpdispatcher This commit upgrades init_reaction and init_surf to use dpdispatcher * fix method args * fix typo * change the variable name from `work_dir` to `work_path` --- dpgen/data/reaction.py | 38 ++++++++-------- dpgen/data/surf.py | 11 ++--- dpgen/dispatcher/Dispatcher.py | 79 ++++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 22 deletions(-) diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py index b9574d525..0abfeb965 100644 --- a/dpgen/data/reaction.py +++ b/dpgen/data/reaction.py @@ -7,7 +7,7 @@ output: data """ -import argparse +import warnings import glob import json import os @@ -15,7 +15,8 @@ import dpdata from dpgen import dlog -from dpgen.dispatcher.Dispatcher import make_dispatcher +from dpgen.dispatcher.Dispatcher import make_submission_compat +from dpgen.remote.decide_machine import convert_mdata from dpgen.generator.run import create_path, make_fp_task_name from dpgen.util import sepline @@ -73,14 +74,15 @@ def make_lmp(jdata): return lmp_string -def run_reaxff(jdata, mdata, dispatcher, log_file="reaxff_log"): +def run_reaxff(jdata, mdata, log_file="reaxff_log"): work_path = reaxff_path reaxff_command = "{} -in {}".format(mdata["reaxff_command"], lmp_path) run_tasks = glob.glob(os.path.join(work_path, 'task.*')) run_tasks.sort() run_tasks = [os.path.basename(ii) for ii in run_tasks] - dispatcher.run_jobs(mdata['reaxff_resources'], + make_submission_compat(mdata['reaxff_machine'], + mdata['reaxff_resources'], [reaxff_command], work_path, run_tasks, @@ -89,7 +91,8 @@ def run_reaxff(jdata, mdata, dispatcher, log_file="reaxff_log"): [ff_path, data_init_path, control_path, lmp_path], [trj_path], outlog=log_file, - errlog=log_file) + errlog=log_file, + api_version=mdata.get("api_version", "0.9")) def link_trj(jdata): @@ -102,7 +105,7 @@ def link_trj(jdata): os.path.join(task_path, trj_path))) -def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"): +def run_build_dataset(jdata, mdata, log_file="build_log"): work_path = build_path build_command = "{cmd} -n {dataset_name} -a {type_map} -d {lammpstrj} -c {cutoff} -s {dataset_size} -k \"{qmkeywords}\" --nprocjob {nprocjob} --nproc {nproc}".format( cmd=mdata["build_command"], @@ -119,7 +122,8 @@ def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"): run_tasks.sort() run_tasks = [os.path.basename(ii) for ii in run_tasks] - dispatcher.run_jobs(mdata['build_resources'], + make_submission_compat(mdata['build_machine'], + mdata['build_resources'], [build_command], work_path, run_tasks, @@ -128,7 +132,8 @@ def run_build_dataset(jdata, mdata, dispatcher, log_file="build_log"): [trj_path], [f"dataset_{dataset_name}_gjf"], outlog=log_file, - errlog=log_file) + errlog=log_file, + api_version=mdata.get("api_version", "0.9")) def link_fp_input(): @@ -146,7 +151,6 @@ def link_fp_input(): def run_fp(jdata, mdata, - dispatcher, log_file="output", forward_common_files=[]): fp_command = mdata['fp_command'] @@ -162,7 +166,8 @@ def run_fp(jdata, run_tasks = [os.path.basename(ii) for ii in fp_run_tasks] - dispatcher.run_jobs(mdata['fp_resources'], + make_submission_compat(mdata['fp_machine'], + mdata['fp_resources'], [fp_command], work_path, run_tasks, @@ -171,7 +176,8 @@ def run_fp(jdata, ["input"], [log_file], outlog=log_file, - errlog=log_file) + errlog=log_file, + api_version=mdata.get("api_version", "0.9")) def convert_data(jdata): @@ -198,6 +204,7 @@ def gen_init_reaction(args): with open(args.MACHINE, "r") as fp: mdata = json.load(fp) + mdata = convert_mdata(mdata, ["reaxff", "build", "fp"]) record = "record.reaction" iter_rec = -1 numb_task = 7 @@ -213,18 +220,15 @@ def gen_init_reaction(args): elif ii == 0: link_reaxff(jdata) elif ii == 1: - dispatcher = make_dispatcher(mdata["reaxff_machine"]) - run_reaxff(jdata, mdata, dispatcher) + run_reaxff(jdata, mdata) elif ii == 2: link_trj(jdata) elif ii == 3: - dispatcher = make_dispatcher(mdata["build_machine"]) - run_build_dataset(jdata, mdata, dispatcher) + run_build_dataset(jdata, mdata) elif ii == 4: link_fp_input() elif ii == 5: - dispatcher = make_dispatcher(mdata["fp_machine"]) - run_fp(jdata, mdata, dispatcher) + run_fp(jdata, mdata) elif ii == 6: convert_data(jdata) with open(record, "a") as frec: diff --git a/dpgen/data/surf.py b/dpgen/data/surf.py index bc31b6705..543f02bc8 100644 --- a/dpgen/data/surf.py +++ b/dpgen/data/surf.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import time +import warnings import os,json,shutil,re,glob,argparse import numpy as np import subprocess as sp @@ -12,7 +12,7 @@ from dpgen import dlog from dpgen import ROOT_PATH from dpgen.remote.decide_machine import convert_mdata -from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher +from dpgen.dispatcher.Dispatcher import make_submission_compat #-----PMG--------- from pymatgen.io.vasp import Poscar from pymatgen.core import Structure, Element @@ -565,15 +565,16 @@ def run_vasp_relax(jdata, mdata): run_tasks = [ii.replace(work_dir+"/", "") for ii in relax_run_tasks] #dlog.info(run_tasks) - dispatcher = make_dispatcher(mdata['fp_machine'], mdata['fp_resources'], work_dir, run_tasks, fp_group_size) - dispatcher.run_jobs(fp_resources, + make_submission_compat(mdata['fp_machine'], + fp_resources, [fp_command], work_dir, run_tasks, fp_group_size, forward_common_files, forward_files, - backward_files) + backward_files, + api_version=mdata.get("api_version", "0.9")) def gen_init_surf(args): try: diff --git a/dpgen/dispatcher/Dispatcher.py b/dpgen/dispatcher/Dispatcher.py index 29bea5669..abbe493b8 100644 --- a/dpgen/dispatcher/Dispatcher.py +++ b/dpgen/dispatcher/Dispatcher.py @@ -1,5 +1,6 @@ from distutils.version import LooseVersion import os,sys,time,random,json,glob +import warnings from typing import List from dpdispatcher import Task, Submission, Resources, Machine from dpgen.dispatcher.LocalContext import LocalSession @@ -406,3 +407,81 @@ def mdata_arginfo() -> List[Argument]: return [ command_arginfo, machine_arginfo, resources_arginfo, ] + + +def make_submission_compat( + machine: dict, + resources: dict, + commands: List[str], + work_path: str, + run_tasks: List[str], + group_size: int, + forward_common_files: List[str], + forward_files: List[str], + backward_files: List[str], + outlog: str="log", + errlog: str="err", + api_version: str="0.9", + ) -> None: + """Make submission with compatibility of both dispatcher API v0 and v1. + + If `api_version` is less than 1.0, use `make_dispatcher`. If + `api_version` is large than 1.0, use `make_submission`. + + Parameters + ---------- + machine : dict + machine dict + resources : dict + resource dict + commands : list[str] + list of commands + work_path : str + working directory + run_tasks : list[str] + list of paths to running tasks + group_size : int + group size + forward_common_files : list[str] + forwarded common files shared for all tasks + forward_files : list[str] + forwarded files for each task + backward_files : list[str] + backwarded files for each task + outlog : str, default=log + path to log from stdout + errlog : str, default=err + path to log from stderr + api_version : str, default=0.9 + API version. 1.0 is recommended + """ + if LooseVersion(api_version) < LooseVersion('1.0'): + warnings.warn(f"the dpdispatcher will be updated to new version." + f"And the interface may be changed. Please check the documents for more details") + dispatcher = make_dispatcher(machine, resources, work_dir, run_tasks, group_size) + dispatcher.run_jobs(resources, + commands, + work_path, + run_tasks, + group_size, + forward_common_files, + forward_files, + backward_files, + outlog=outlog, + errlog=errlog) + + elif LooseVersion(api_version) >= LooseVersion('1.0'): + submission = make_submission( + machine, + resources, + commands=commands, + work_path=work_path, + run_tasks=run_tasks, + group_size=group_size, + forward_common_files=forward_common_files, + forward_files=forward_files, + backward_files=backward_files, + outlog=outlog, + errlog=errlog) + submission.run_submission() + From 7a5dc0c7fec46a2dcdc49cd758a42c88cfd53462 Mon Sep 17 00:00:00 2001 From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com> Date: Sun, 12 Jun 2022 12:43:44 +0800 Subject: [PATCH 08/25] update: add a new option of absolute volume in /dpgen/dpgen/auto_test/EOS.py (#741) * update: add a new option of absolute volume in ./dpgen/auto_test/EOS.py * update: add doc in /dpgen/doc/toymodels/ * update: change the description for eos, change the doc in /dpgen/doc/toymodels/ * update: change the notice of absolute volume from print into dlog.info --- doc/toymodels/JiamengHuang_pr.md | 9 +++++++++ dpgen/auto_test/EOS.py | 15 +++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 doc/toymodels/JiamengHuang_pr.md diff --git a/doc/toymodels/JiamengHuang_pr.md b/doc/toymodels/JiamengHuang_pr.md new file mode 100644 index 000000000..3b1210228 --- /dev/null +++ b/doc/toymodels/JiamengHuang_pr.md @@ -0,0 +1,9 @@ +A new parameter "vol_abs" is added. If you want to use absolute volume to get EOS, you can add + + "vol_abs": true, + +in the "eos" part of property.json +if it's not mentioned, "False" is set defaultly +when you are using absolute volume, there will be a notation in the last line of output during "make" process, which is like + +treat vol_start and vol_end as absolute volume diff --git a/dpgen/auto_test/EOS.py b/dpgen/auto_test/EOS.py index 4f332198b..f824dba8e 100644 --- a/dpgen/auto_test/EOS.py +++ b/dpgen/auto_test/EOS.py @@ -24,6 +24,8 @@ def __init__(self, self.vol_start = parameter['vol_start'] self.vol_end = parameter['vol_end'] self.vol_step = parameter['vol_step'] + parameter['vol_abs'] = parameter.get('vol_abs', False) + self.vol_abs = parameter['vol_abs'] parameter['cal_type'] = parameter.get('cal_type', 'relaxation') self.cal_type = parameter['cal_type'] default_cal_setting = {"relax_pos": True, @@ -117,6 +119,10 @@ def make_confs(self, else: print('gen eos from ' + str(self.vol_start) + ' to ' + str(self.vol_end) + ' by every ' + str(self.vol_step)) + if self.vol_abs : + dlog.info('treat vol_start and vol_end as absolute volume') + else : + dlog.info('treat vol_start and vol_end as relative volume') equi_contcar = os.path.join(path_to_equi, 'CONTCAR') if not os.path.exists(equi_contcar): raise RuntimeError("please do relaxation first") @@ -138,8 +144,13 @@ def make_confs(self, task_list.append(output_task) os.symlink(os.path.relpath(equi_contcar), 'POSCAR.orig') # scale = (vol / vol_to_poscar) ** (1. / 3.) - scale = vol ** (1. / 3.) - eos_params = {'volume': vol * vol_to_poscar, 'scale': scale} + + if self.vol_abs : + scale = (vol / vol_to_poscar) ** (1. / 3.) + eos_params = {'volume': vol, 'scale': scale} + else : + scale = vol ** (1. / 3.) + eos_params = {'volume': vol * vol_to_poscar, 'scale': scale} dumpfn(eos_params, 'eos.json', indent=4) self.parameter['scale2equi'].append(scale) # 06/22 vasp.poscar_scale('POSCAR.orig', 'POSCAR', scale) From 800fb519681b9dc00b07627ebde4f0ce3dfa20b0 Mon Sep 17 00:00:00 2001 From: Liu Renxi <75369672+Liu-RX@users.noreply.github.com> Date: Tue, 14 Jun 2022 14:05:50 +0800 Subject: [PATCH 09/25] fix a bug in make_abacus_scf_input (#754) Co-authored-by: LiuRenxi --- README.md | 2 +- dpgen/generator/lib/abacus_scf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fb280e226..f26fb51df 100644 --- a/README.md +++ b/README.md @@ -570,7 +570,7 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key | **user_fp_params** | Dict | |Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use. we assume that you have basic knowledge for cp2k input. | **external_input_path** | String | | Conflict with key:user_fp_params, use the template input provided by user, some rules should be followed, read the following text in detail. | *fp_style == ABACUS* -| **user_fp_params** | Dict | |Parameters for ABACUS INPUT. find detail [Here](https://github.com/deepmodeling/abacus-develop/blob/develop/docs/input-main.md#out-descriptor). If `deepks_model` is set, the model file should be in the pseudopotential directory. +| **user_fp_params** | Dict | |Parameters for ABACUS INPUT. find detail [Here](https://github.com/deepmodeling/abacus-develop/blob/develop/docs/input-main.md#out-descriptor). If `deepks_model` is set, the model file should be in the pseudopotential directory. You can also set `KPT` file by adding `k_points` that corresponds to a list of six integers in this dictionary. | **fp_orb_files** | List | |List of atomic orbital files. The files should be in pseudopotential directory. | **fp_dpks_descriptor** | String | |DeePKS descriptor file name. The file should be in pseudopotential directory. diff --git a/dpgen/generator/lib/abacus_scf.py b/dpgen/generator/lib/abacus_scf.py index 256eb1d9d..1a9882979 100644 --- a/dpgen/generator/lib/abacus_scf.py +++ b/dpgen/generator/lib/abacus_scf.py @@ -83,7 +83,7 @@ def make_abacus_scf_input(fp_params): ret += "deepks_scf %d\n" % fp_params["deepks_scf"] elif key == "deepks_model": ret += "deepks_model %s\n" % fp_params["deepks_model"] - else: + elif key != "k_points": # "k_points key is used to generate KPT file." ret += "%s %s\n" % (key, str(fp_params[key])) return ret From aca0d3229408b3082693149bd888b9527db85684 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 14 Jun 2022 02:17:46 -0400 Subject: [PATCH 10/25] init_reaction: fix compatibility with new dpdispatcher (#755) fix compatibility as the key was changed in the dpdispatcher --- dpgen/data/reaction.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py index 0abfeb965..5e900f9de 100644 --- a/dpgen/data/reaction.py +++ b/dpgen/data/reaction.py @@ -107,6 +107,9 @@ def link_trj(jdata): def run_build_dataset(jdata, mdata, log_file="build_log"): work_path = build_path + # compatible with new dpdispatcher and old dpgen.dispatcher + build_ntasks = mdata["build_resources"].get("cpu_per_node", mdata["build_resources"]["task_per_node"]) + fp_ntasks = mdata["fp_resources"].get("cpu_per_node", mdata["fp_resources"]["task_per_node"]) build_command = "{cmd} -n {dataset_name} -a {type_map} -d {lammpstrj} -c {cutoff} -s {dataset_size} -k \"{qmkeywords}\" --nprocjob {nprocjob} --nproc {nproc}".format( cmd=mdata["build_command"], type_map=" ".join(jdata["type_map"]), @@ -114,8 +117,8 @@ def run_build_dataset(jdata, mdata, log_file="build_log"): cutoff=jdata["cutoff"], dataset_size=jdata["dataset_size"], qmkeywords=jdata["qmkeywords"], - nprocjob=mdata["fp_resources"]["task_per_node"], - nproc=mdata["build_resources"]["task_per_node"], + nprocjob=fp_ntasks, + nproc=build_ntasks, dataset_name=dataset_name ) run_tasks = glob.glob(os.path.join(work_path, 'task.*')) From bace57e624dbdc0e40b3d5e65cc5967db16af234 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sun, 19 Jun 2022 12:09:51 -0400 Subject: [PATCH 11/25] generate machine parameter docs for simplify and init (#751) * generate machine parameter page for simplify and init * switching to new dargs directive --- doc/conf.py | 1 + doc/index.rst | 5 +++++ doc/init/init-bulk-mdata.rst | 6 ++++++ doc/init/init-reaction-mdata.rst | 6 ++++++ doc/init/init-surf-mdata.rst | 6 ++++++ doc/simplify/simplify-mdata.rst | 6 ++++++ dpgen/arginfo.py | 35 +++++++++++++++++++++++++++++++ dpgen/data/arginfo.py | 36 ++++++++++++++++++++++++++++++++ dpgen/generator/arginfo.py | 16 ++------------ dpgen/simplify/arginfo.py | 13 ++++++++++++ 10 files changed, 116 insertions(+), 14 deletions(-) create mode 100644 doc/init/init-bulk-mdata.rst create mode 100644 doc/init/init-reaction-mdata.rst create mode 100644 doc/init/init-surf-mdata.rst create mode 100644 doc/simplify/simplify-mdata.rst create mode 100644 dpgen/arginfo.py create mode 100644 dpgen/data/arginfo.py create mode 100644 dpgen/simplify/arginfo.py diff --git a/doc/conf.py b/doc/conf.py index 9d5ecc006..99dce21b5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -40,6 +40,7 @@ extensions = [ 'deepmodeling_sphinx', + 'dargs.sphinx', "sphinx_rtd_theme", 'myst_parser', 'sphinx.ext.autosummary', diff --git a/doc/index.rst b/doc/index.rst index 341ce5d79..6eea4d95b 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -32,6 +32,9 @@ DPGEN's documentation :maxdepth: 2 :caption: Init + init/init-bulk-mdata + init/init-surf-mdata + init/init-reaction-mdata .. _autotest:: @@ -46,6 +49,8 @@ DPGEN's documentation :maxdepth: 2 :caption: Simplify + simplify/simplify-mdata + .. _tutorial: diff --git a/doc/init/init-bulk-mdata.rst b/doc/init/init-bulk-mdata.rst new file mode 100644 index 000000000..b3098e906 --- /dev/null +++ b/doc/init/init-bulk-mdata.rst @@ -0,0 +1,6 @@ +dpgen init_bulk machine parameters +================================== + +.. dargs:: + :module: dpgen.data.arginfo + :func: init_bulk_mdata_arginfo diff --git a/doc/init/init-reaction-mdata.rst b/doc/init/init-reaction-mdata.rst new file mode 100644 index 000000000..2fe35a0d8 --- /dev/null +++ b/doc/init/init-reaction-mdata.rst @@ -0,0 +1,6 @@ +dpgen init_reaction machine parameters +====================================== + +.. dargs:: + :module: dpgen.data.arginfo + :func: init_reaction_mdata_arginfo diff --git a/doc/init/init-surf-mdata.rst b/doc/init/init-surf-mdata.rst new file mode 100644 index 000000000..35e8e322f --- /dev/null +++ b/doc/init/init-surf-mdata.rst @@ -0,0 +1,6 @@ +dpgen init_surf machine parameters +================================== + +.. dargs:: + :module: dpgen.data.arginfo + :func: init_surf_mdata_arginfo diff --git a/doc/simplify/simplify-mdata.rst b/doc/simplify/simplify-mdata.rst new file mode 100644 index 000000000..995fc90f8 --- /dev/null +++ b/doc/simplify/simplify-mdata.rst @@ -0,0 +1,6 @@ +dpgen simplify machine parameters +================================= + +.. dargs:: + :module: dpgen.simplify.arginfo + :func: simplify_mdata_arginfo diff --git a/dpgen/arginfo.py b/dpgen/arginfo.py new file mode 100644 index 000000000..3f657942a --- /dev/null +++ b/dpgen/arginfo.py @@ -0,0 +1,35 @@ +from typing import Tuple + +from dargs import Argument + +from dpgen.dispatcher.Dispatcher import mdata_arginfo + + +def general_mdata_arginfo(name: str, tasks: Tuple[str]) -> Argument: + """Generate arginfo for general mdata. + + Parameters + ---------- + name : str + mdata name + tasks : tuple[str] + tuple of task keys, e.g. ("train", "model_devi", "fp") + + Returns + ------- + Argument + arginfo + """ + + doc_api_version = "Please set to 1.0" + doc_run_mdata = "machine.json file" + arg_api_version = Argument("api_version", str, optional=False, doc=doc_api_version) + + sub_fields = [arg_api_version] + doc_mdata = "Parameters of command, machine, and resources for %s" + for task in tasks: + sub_fields.append(Argument( + task, dict, optional=False, sub_fields=mdata_arginfo(), + doc=doc_mdata % task, + )) + return Argument(name, dict, sub_fields=sub_fields, doc=doc_run_mdata) diff --git a/dpgen/data/arginfo.py b/dpgen/data/arginfo.py new file mode 100644 index 000000000..d5814c036 --- /dev/null +++ b/dpgen/data/arginfo.py @@ -0,0 +1,36 @@ +from dargs import Argument + +from dpgen.arginfo import general_mdata_arginfo + + +def init_bulk_mdata_arginfo() -> Argument: + """Generate arginfo for dpgen init_bulk mdata. + + Returns + ------- + Argument + arginfo + """ + return general_mdata_arginfo("init_bulk_mdata", ("fp",)) + + +def init_surf_mdata_arginfo() -> Argument: + """Generate arginfo for dpgen init_surf mdata. + + Returns + ------- + Argument + arginfo + """ + return general_mdata_arginfo("init_surf_mdata", ("fp",)) + + +def init_reaction_mdata_arginfo() -> Argument: + """Generate arginfo for dpgen init_reaction mdata. + + Returns + ------- + Argument + arginfo + """ + return general_mdata_arginfo("init_reaction_mdata", ("reaxff", "build", "fp")) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index f8815862d..cb2fb887b 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -1,6 +1,6 @@ from dargs import Argument -from dpgen.dispatcher.Dispatcher import mdata_arginfo +from dpgen.arginfo import general_mdata_arginfo def run_mdata_arginfo() -> Argument: """Generate arginfo for dpgen run mdata. @@ -10,16 +10,4 @@ def run_mdata_arginfo() -> Argument: Argument arginfo """ - - doc_api_version = "Please set to 1.0" - doc_run_mdata = "machine.json file" - arg_api_version = Argument("api_version", str, optional=False, doc=doc_api_version) - - sub_fields = [arg_api_version] - doc_mdata = "Parameters of command, machine, and resources for %s" - for task in ("train", "model_devi", "fp"): - sub_fields.append(Argument( - task, dict, optional=False, sub_fields=mdata_arginfo(), - doc=doc_mdata % task, - )) - return Argument("run_mdata", dict, sub_fields=sub_fields, doc=doc_run_mdata) + return general_mdata_arginfo("run_mdata", ("train", "model_devi", "fp")) diff --git a/dpgen/simplify/arginfo.py b/dpgen/simplify/arginfo.py new file mode 100644 index 000000000..0fbfe606e --- /dev/null +++ b/dpgen/simplify/arginfo.py @@ -0,0 +1,13 @@ +from dargs import Argument + +from dpgen.arginfo import general_mdata_arginfo + +def simplify_mdata_arginfo() -> Argument: + """Generate arginfo for dpgen simplify mdata. + + Returns + ------- + Argument + arginfo + """ + return general_mdata_arginfo("simplify_mdata", ("train", "model_devi", "fp")) From 9083b1d795a8c2dcfd6c3c002705b00b96506dea Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 20 Jun 2022 23:17:34 -0400 Subject: [PATCH 12/25] add auto cli docs (#759) * add auto cli docs * fix typo * fix package name... * forgot to return parser * add the blank line --- doc/conf.py | 1 + doc/index.rst | 2 ++ doc/overview/cli.rst | 7 +++++++ doc/requirements.txt | 1 + dpgen/main.py | 16 +++++++++++++--- 5 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 doc/overview/cli.rst diff --git a/doc/conf.py b/doc/conf.py index 99dce21b5..97b4b206b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -45,6 +45,7 @@ 'myst_parser', 'sphinx.ext.autosummary', 'sphinx.ext.viewcode', + 'sphinxarg.ext', ] diff --git a/doc/index.rst b/doc/index.rst index 6eea4d95b..eaa229813 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -7,6 +7,8 @@ DPGEN's documentation .. toctree:: :maxdepth: 2 :caption: Overview + + overview/cli .. _installation:: diff --git a/doc/overview/cli.rst b/doc/overview/cli.rst new file mode 100644 index 000000000..e57f1b064 --- /dev/null +++ b/doc/overview/cli.rst @@ -0,0 +1,7 @@ +Command line interface +====================== + +.. argparse:: + :module: dpgen.main + :func: main_parser + :prog: dpgen diff --git a/doc/requirements.txt b/doc/requirements.txt index 33ad28e39..0ae5c76f1 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -2,6 +2,7 @@ sphinx>=4.0.2 recommonmark sphinx_rtd_theme sphinx_markdown_tables +sphinx-argparse myst-parser deepmodeling_sphinx . diff --git a/dpgen/main.py b/dpgen/main.py index 6dcdc4ccd..c93c41ef4 100644 --- a/dpgen/main.py +++ b/dpgen/main.py @@ -30,9 +30,14 @@ __email__ = "" -def main(): - info() - print("Description\n------------") +def main_parser() -> argparse.ArgumentParser: + """Returns parser for `dpgen` command. + + Returns + ------- + argparse.ArgumentParser + parser for `dpgen` command + """ parser = argparse.ArgumentParser(description=""" dpgen is a convenient script that uses DeepGenerator to prepare initial data, drive DeepMDkit and analyze results. This script works based on @@ -156,8 +161,13 @@ def main(): help="parameter file, json format") parser_db.set_defaults(func=db_run) + return parser +def main(): + info() + print("Description\n------------") + parser = main_parser() try: import argcomplete argcomplete.autocomplete(parser) From f5c317228d1673f9428edbd5453c7b3a41aaf330 Mon Sep 17 00:00:00 2001 From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com> Date: Wed, 29 Jun 2022 10:01:41 +0800 Subject: [PATCH 13/25] correct the wrong spelling of 'failure' (#764) --- dpgen/dispatcher/DispatcherList.py | 6 +++--- examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dpgen/dispatcher/DispatcherList.py b/dpgen/dispatcher/DispatcherList.py index 085ae5d9a..22b77fd50 100644 --- a/dpgen/dispatcher/DispatcherList.py +++ b/dpgen/dispatcher/DispatcherList.py @@ -45,7 +45,7 @@ def run_jobs(self, mark_failure = False, outlog = 'log', errlog = 'err'): - ratio_failure = self.mdata_resources.get("ratio_failue", 0) + ratio_failure = self.mdata_resources.get("ratio_failure", 0) while True: if self.check_all_dispatchers_finished(ratio_failure): self.clean() @@ -188,7 +188,7 @@ def make_dispatcher(self, ii): # Base - def check_dispatcher_status(self, ii, allow_failue=False): + def check_dispatcher_status(self, ii, allow_failure=False): '''catch running dispatcher exception if no exception occured, check finished''' if self.dispatcher_list[ii]["dispatcher_status"] == "running": @@ -198,7 +198,7 @@ def check_dispatcher_status(self, ii, allow_failue=False): clean = self.mdata_resources.get("clean", False) try: # avoid raising ssh exception in download proceess - finished = self.dispatcher_list[ii]["dispatcher"].all_finished(self.dispatcher_list[ii]["entity"].job_handler, allow_failue, clean) + finished = self.dispatcher_list[ii]["dispatcher"].all_finished(self.dispatcher_list[ii]["entity"].job_handler, allow_failure, clean) if finished: self.dispatcher_list[ii]["dispatcher_status"] = "finished" except Exception: diff --git a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json index 3de59661f..e2db8d254 100644 --- a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json +++ b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json @@ -64,7 +64,7 @@ }, "resources": { "allow_failure": true, - "ratio_failue": 0.05, + "ratio_failure": 0.05, "task_per_node": 16, "with_mpi": true, "_comment" : "Load the intel compiler.", From 66d856cd9862e9b7becef596c4d8a8cd504cee06 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 1 Jul 2022 01:22:16 -0400 Subject: [PATCH 14/25] upgrade machine examples to new dpdispatcher (#762) --- .../DeePMD-kit-1.0/machine-local-4GPU.json | 165 +++++++------ .../machine/DeePMD-kit-1.x/machine-local.json | 103 ++++---- .../machine-lsf-slurm-cp2k.json | 178 +++++++------- .../DeePMD-kit-1.x/machine-pbs-gaussian.json | 163 +++++++------ .../DeePMD-kit-1.x/machine-slurm-qe.json | 223 +++++++++--------- 5 files changed, 442 insertions(+), 390 deletions(-) diff --git a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json index e2db8d254..e0e6bfca0 100644 --- a/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json +++ b/examples/machine/DeePMD-kit-1.0/machine-local-4GPU.json @@ -1,79 +1,90 @@ { - "_comment" : "This is an example of DP-GEN on Local device running with 4 GPUs", - "_comment" : "Last updated on 2021.5.9 for DP-GEN 0.9.2 by Ke XU", - "train": [ - { - "_comment" : "Specify the installed path of DeePMD-kit", - "_comment" : "The version of DeePMD-kit should be 1.*", - "command": "/home/user/anaconda3/bin/dp", - "_comment" : "Specify machine settings", - "machine": { - "_comment" : "Supported batches include slurm, pbs, shell, lsf.", - "batch": "shell", - "work_path": "/tmp/dpwork", - "_comment": "that's all" - }, - "resources":{ - "_comment" : "The number of nodes.", - "numb_node": 1, - "_comment" : "If you choose to run with multiple GPUs simultaneously, just ignore numb_gpu.", - "numb_gpu": 0, - "_comment" : "The number of CPUs.", - "task_per_node": 4, - "_comment" : "The number of GPUs that can be used for each task.", - "manual_cuda_devices": 4, - "_comment" : "The number of tasks that can be run in each GPU.", - "manual_cuda_multiplicity":1, - "_comment" : "Allow the multi-GPU task running.", - "cuda_multi_task": true, - "module_list": [], - "_comment" : "Environment to be activated. This will generate source xxx/psxevars.sh in scripts. ", - "source_list": ["/opt/intel/parallel_studio_xe_2020/psxevars.sh"] - }, - "_comment" : "DP-GEN will put 4 tasks together in one submitting script.", - "group_size": 4 - } - ], - - "model_devi": [ - { - "machine": { - "batch": "shell", - "work_path": "/tmp/dpwork" - }, - "resources": { - "numb_node": 1, - "numb_gpu": 0, - "task_per_node": 4, - "manual_cuda_devices": 4, - "manual_cuda_multiplicity":1, - "cuda_multi_task": true, - "source_list": [], - "module_list": [] - }, - "command": "/home/user/Soft/Deepmd/lammps-stable_29Oct2020/src/lmp_mpi", - "group_size": 4 - } - ], - - "fp": [ - { - "machine": { - "batch": "shell", - "work_path": "/tmp/dpwork" - }, - "resources": { - "allow_failure": true, - "ratio_failure": 0.05, - "task_per_node": 16, - "with_mpi": true, - "_comment" : "Load the intel compiler.", - "source_list": ["/opt/intel/parallel_studio_xe_2020/psxevars.sh"], - "envs": {"PATH" : "/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH"}, - "_comment" : "This will generate export PATH=/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH in scripts;" - }, - "command": "vasp_std", - "group_size": 1 - } - ] + "_comment" : "This is an example of DP-GEN on Local device running with 4 GPUs", + "_comment": "Last updated on 2021.5.9 for DP-GEN 0.9.2 by Ke XU", + "train": { + "_comment" : "Specify the installed path of DeePMD-kit", + "command": "/home/user/anaconda3/bin/dp", + "_comment" : "Specify machine settings", + "machine": { + "_comment": "Supported batches include slurm, pbs, shell, lsf.", + "batch_type": "shell", + "context_type": "local", + "remote_root": "/tmp/dpwork", + "local_root": "./" + }, + "resources": { + "module_list": [], + "_comment": "Environment to be activated. This will generate source xxx/psxevars.sh in scripts. ", + "source_list": [ + "/opt/intel/parallel_studio_xe_2020/psxevars.sh" + ], + "batch_type": "shell", + "_comment": "DP-GEN will put 4 tasks together in one submitting script.", + "group_size": 4, + "_comment" : "The number of nodes.", + "number_node": 1, + "_comment" : "The number of CPUs.", + "cpu_per_node": 4, + "_comment" : "If you choose to run with multiple GPUs simultaneously, just ignore numb_gpu.", + "gpu_per_node": 0, + "kwargs": {}, + "strategy": { + "_comment" : "Allow the multi-GPU task running.", + "if_cuda_multi_devices": true + }, + "para_deg": 4, + "queue_name": "" + } + }, + "model_devi": { + "machine": { + "batch_type": "shell", + "context_type": "local", + "remote_root": "/tmp/dpwork", + "local_root": "./" + }, + "resources": { + "source_list": [], + "module_list": [], + "batch_type": "shell", + "group_size": 4, + "number_node": 1, + "cpu_per_node": 4, + "gpu_per_node": 0, + "kwargs": {}, + "strategy": { + "if_cuda_multi_devices": true + }, + "para_deg": 4, + "queue_name": "" + }, + "command": "/home/user/Soft/Deepmd/lammps-stable_29Oct2020/src/lmp_mpi" + }, + "fp": { + "machine": { + "batch_type": "shell", + "context_type": "local", + "remote_root": "/tmp/dpwork", + "local_root": "./" + }, + "resources": { + "_comment" : "Load the intel compiler.", + "source_list": [ + "/opt/intel/parallel_studio_xe_2020/psxevars.sh" + ], + "_comment": "This will generate export PATH=/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH in scripts;", + "envs": { + "PATH": "/home/user/Soft/VASP/vasp.5.4.4-allbak/bin:$PATH" + }, + "batch_type": "shell", + "group_size": 1, + "cpu_per_node": 16, + "kwargs": {}, + "queue_name": "", + "number_node": 1, + "gpu_per_node": 1 + }, + "command": "mpirun -n 16 vasp_std || :" + }, + "api_version": "1.0" } diff --git a/examples/machine/DeePMD-kit-1.x/machine-local.json b/examples/machine/DeePMD-kit-1.x/machine-local.json index a266f712b..c8134d750 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-local.json +++ b/examples/machine/DeePMD-kit-1.x/machine-local.json @@ -1,42 +1,65 @@ { - "_comment": "training on localhost ", - "_comment" : "This is for DeePMD-kit 1.*", - "train_command" : "/home/wanghan/local/deepmd/1.*/dp", - "train_machine": { - "batch": "shell", - "work_path" : "/home/wanghan/tmp/subs/" - }, - "train_resources": { - "envs": { - } - }, - - - "_comment": "model_devi on localhost ", - "model_devi_command": "/home/wanghan/local/bin/lmp_mpi_010", - "model_devi_group_size": 5, - "model_devi_machine": { - "batch": "shell", - "_comment" : "If lazy_local is true, calculations are done directly in current folders.", - "lazy_local" : true - }, - "model_devi_resources": { - }, - - "_comment": "fp on localhost ", - "fp_command": "/home/wanghan/local/bin/vasp_std", - "fp_group_size": 2, - "fp_machine": { - "batch": "shell", - "work_path" : "/home/wanghan/tmp/subs/", - "_comment" : "that's all" - }, - "fp_resources": { - "module_list": ["mpi"], - "task_per_node":4, - "with_mpi": true, - "_comment": "that's all" + "api_version": "1.0", + "train": { + "_comment": "training on localhost", + "machine": { + "batch_type": "shell", + "context_type": "local", + "remote_root": "/home/wanghan/tmp/subs/", + "local_root": "./" + }, + "resources": { + "envs": {}, + "batch_type": "shell", + "group_size": 1, + "kwargs": {}, + "queue_name": "", + "number_node": 1, + "cpu_per_node": 1, + "gpu_per_node": 1 + }, + "command": "/home/wanghan/local/deepmd/1.*/dp" }, - - "_comment": " that's all " -} + "model_devi": { + "_comment": "model devi on localhost", + "machine": { + "_comment": "If lazy_local, calculations are done directly in current folders.", + "batch_type": "shell", + "context_type": "lazylocal", + "local_root": "./" + }, + "resources": { + "batch_type": "shell", + "group_size": 5, + "kwargs": {}, + "queue_name": "", + "number_node": 1, + "cpu_per_node": 1, + "gpu_per_node": 1 + }, + "command": "/home/wanghan/local/bin/lmp_mpi_010" + }, + "fp": { + "_comment": "fp on localhost", + "machine": { + "batch_type": "shell", + "context_type": "local", + "remote_root": "/home/wanghan/tmp/subs/", + "local_root": "./" + }, + "resources": { + "module_list": [ + "mpi" + ], + "_comment": "that's all", + "batch_type": "shell", + "group_size": 2, + "cpu_per_node": 4, + "kwargs": {}, + "queue_name": "", + "number_node": 1, + "gpu_per_node": 1 + }, + "command": "mpirun -n 4 /home/wanghan/local/bin/vasp_std" + } +} \ No newline at end of file diff --git a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json index 4fb5845ee..348609c1e 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json +++ b/examples/machine/DeePMD-kit-1.x/machine-lsf-slurm-cp2k.json @@ -1,88 +1,98 @@ { - "train": - { - "machine": { - "machine_type": "slurm", - "hostname": "210.34.15.205", - "port": 22, - "username": "ybzhuang", - "work_path": "/home/ybzhuang/workdir" - }, - "resources": { - "numb_gpu": 1, - "numb_node": 1, - "task_per_node": 1, - "partition": "gpu", - "job_name": "train", - "qos":"emergency", - "exclude_list": [], - "source_list": [ - ], - "module_list": [ - "deepmd/1.2" - ], - "time_limit": "96:0:0", - "submit_wait_time": 60 - }, - "python_path": "/share/apps/deepmd/compress/bin/python3.8" + "train": { + "machine": { + "batch_type": "slurm", + "context_type": "ssh", + "remote_profile": { + "hostname": "210.34.15.205", + "port": 22, + "username": "ybzhuang" + }, + "remote_root": "/home/ybzhuang/workdir", + "local_root": "./" + }, + "resources": { + "source_list": [], + "module_list": [ + "deepmd/1.2" + ], + "batch_type": "slurm", + "number_node": 1, + "cpu_per_node": 1, + "gpu_per_node": 1, + "queue_name": "emergency", + "custom_flags": [ + "#SBATCH -t 96:0:0" + ], + "kwargs": {}, + "wait_time": 60, + "group_size": 1 + }, + "command": "/share/apps/deepmd/compress/bin/python3.8-m deepmd" }, - "model_devi": - { - "machine": { - "machine_type": "slurm", - "hostname": "210.34.15.205", - "port": 22, - "username": "ybzhuang", - "work_path": "/home/ybzhuang/workdir" - }, - "resources": { - "numb_gpu": 1, - "numb_node": 1, - "task_per_node": 1, - "partition": "gpu", - "job_name": "md", - "qos":"emergency", - "exclude_list": [], - "source_list": [ - ], - "module_list": [ - "deepmd/1.2" - ], - "time_limit": "96:0:0", - "submit_wait_time": 60 - }, - "command": "lmp_mpi", - "group_size": 5 + "model_devi": { + "machine": { + "batch_type": "slurm", + "context_type": "ssh", + "remote_profile": { + "hostname": "210.34.15.205", + "port": 22, + "username": "ybzhuang" + }, + "remote_root": "/home/ybzhuang/workdir", + "local_root": "./" + }, + "resources": { + "source_list": [], + "module_list": [ + "deepmd/1.2" + ], + "batch_type": "slurm", + "group_size": 5, + "number_node": 1, + "cpu_per_node": 1, + "gpu_per_node": 1, + "queue_name": "emergency", + "custom_flags": [ + "#SBATCH -t 96:0:0" + ], + "kwargs": {}, + "wait_time": 60 + }, + "command": "lmp_mpi" }, - "fp": - { - "machine": { - "machine_type": "lsf", - "hostname": "localhost", - "port": 6666, - "username": "ybzhuang", - "work_path": "/data/ybzhuang/methane-dpgen/dpgen-tutorial-2020-08-23/dpgen-tutorial-mathane/workpath" - }, - "resources": { - "cvasp": false, - "task_per_node": 32, - "numb_node": 1, - "node_cpu": 32, - "exclude_list": [], - "with_mpi": true, - "source_list": [ - ], - "module_list": [ - "intel/17.5.239", - "mpi/intel/2017.5.239", - "gcc/5.5.0", - "cp2k/7.1" + "fp": { + "machine": { + "batch_type": "lsf", + "context_type": "ssh", + "remote_profile": { + "hostname": "localhost", + "port": 6666, + "username": "ybzhuang" + }, + "remote_root": "/data/ybzhuang/methane-dpgen/dpgen-tutorial-2020-08-23/dpgen-tutorial-mathane/workpath", + "local_root": "./" + }, + "resources": { + "source_list": [], + "module_list": [ + "intel/17.5.239", + "mpi/intel/2017.5.239", + "gcc/5.5.0", + "cp2k/7.1" + ], + "batch_type": "lsf", + "group_size": 50, + "number_node": 1, + "cpu_per_node": 32, + "queue_name": "53-medium", + "custom_flags": [ + "#BSUB -W 12:00:00" ], - "time_limit": "12:00:00", - "partition": "53-medium", - "_comment": "that's Bel" - }, - "command": "cp2k.popt -i input.inp", - "group_size": 50 - } -} + "kwargs": {}, + "gpu_per_node": 1 + }, + "command": "mpirun -n 32 cp2k.popt -i input.inp" + }, + "api_version": "1.0" +} \ No newline at end of file diff --git a/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json b/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json index 6893471c5..daa743dcc 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json +++ b/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json @@ -1,79 +1,88 @@ { - "_comment": "training on localhost ", - "train_command": "/gpfs/home/tzhu/anaconda3/envs/python3.6/bin/dp", - "train_machine": { - "machine_type": "lsf", - "hostname" : "xxx.xxx.xxx.xxx", - "port" : 22, - "username": "tzhu", - "work_path" : "/gpfs/home/tzhu/jzzeng/dpgen_workdir", - "_comment" : "that's all" - }, - "train_resources": { - "source_list": [ "activate deepmd" ], - "envs": { - "KMP_BLOCKTIME": 0, - "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" - }, - "numb_gpu": 1, - "numb_node": 1, - "node_cpu": 0, - "partition": "newgpu", - "job_name": "dpgen_jzzeng", - "with_mpi": false, - "time_limit": false, - "_comment": "that's all" - }, - - - "_comment": "model_devi on localhost ", - "model_devi_command": "/gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2", - "model_devi_group_size": 1, - "model_devi_machine": { - "machine_type": "lsf", - "hostname" : "xxx.xxx.xxx.xxx", - "port" : 22, - "username": "tzhu", - "work_path" : "/gpfs/home/tzhu/jzzeng/dpgen_workdir", - "_comment" : "that's all" - }, - "model_devi_resources": { - "envs": { - "KMP_BLOCKTIME": 0 - }, - "source_list": [ "activate deepmd" ], - "numb_gpu": 1, - "numb_node": 1, - "node_cpu": 0, - "time_limit": false, - "partition": "newgpu", - "job_name": "dpgen_jzzeng", - "with_mpi": true, - "task_per_node": 1, - "_comment": "that's all" - }, - - "_comment": "fp on lsf //localhost ", - "fp_command": "/public/home/tzhu/g16/g16 < input", - "fp_group_size": 1, - "fp_machine": { - "machine_type": "pbs", - "hostname" : "xxx.xxx.xxx.xxx", - "port" : 2323, - "username": "tzhu", - "work_path" : "/public/home/tzhu/jzzeng/dpgen_workdir", - "_comment" : "that's all" - }, - "fp_resources": { - "node_cpu":28, - "numb_node": 1, - "job_name": "dpgen_jzzeng", - "task_per_node": 28, - "with_mpi": false, - "time_limit": "10:00:00", - "allow_failure": true, - "partition": "small", - "_comment": "that's all" + "api_version": "1.0", + "train": { + "machine": { + "batch_type": "lsf", + "context_type": "ssh", + "remote_profile": { + "hostname": "xxx.xxx.xxx.xxx", + "port": 22, + "username": "tzhu" + }, + "remote_root": "/gpfs/home/tzhu/jzzeng/dpgen_workdir", + "local_root": "./" + }, + "resources": { + "source_list": [ + "activate deepmd" + ], + "envs": { + "KMP_BLOCKTIME": 0, + "KMP_AFFINITY": "granularity=fine,verbose,compact,1,0" + }, + "batch_type": "lsf", + "group_size": 1, + "number_node": 1, + "cpu_per_node": 0, + "gpu_per_node": 1, + "queue_name": "newgpu", + "kwargs": {} + }, + "command": "/gpfs/home/tzhu/anaconda3/envs/python3.6/bin/dp" }, - "_comment": " that's all " -} + "model_devi": { + "machine": { + "batch_type": "lsf", + "context_type": "ssh", + "remote_profile": { + "hostname": "xxx.xxx.xxx.xxx", + "port": 22, + "username": "tzhu" + }, + "remote_root": "/gpfs/home/tzhu/jzzeng/dpgen_workdir", + "local_root": "./" + }, + "resources": { + "envs": { + "KMP_BLOCKTIME": 0 + }, + "source_list": [ + "activate deepmd" + ], + "batch_type": "lsf", + "group_size": 1, + "number_node": 1, + "cpu_per_node": 0, + "gpu_per_node": 1, + "queue_name": "newgpu", + "kwargs": {} + }, + "command": "mpirun -n 0 /gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2" + }, + "fp": { + "machine": { + "batch_type": "pbs", + "context_type": "ssh", + "remote_profile": { + "hostname": "xxx.xxx.xxx.xxx", + "port": 2323, + "username": "tzhu" + }, + "remote_root": "/public/home/tzhu/jzzeng/dpgen_workdir", + "local_root": "./" + }, + "resources": { + "batch_type": "pbs", + "group_size": 1, + "number_node": 1, + "cpu_per_node": 28, + "queue_name": "small", + "custom_flags": [ + "#PBS -l walltime=10:00:00" + ], + "kwargs": {}, + "gpu_per_node": 1 + }, + "command": "/public/home/tzhu/g16/g16 < input || :" + } +} \ No newline at end of file diff --git a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json index 5f15303d6..2ff5b4a4b 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json +++ b/examples/machine/DeePMD-kit-1.x/machine-slurm-qe.json @@ -1,113 +1,112 @@ - { - "_comment" : "This is an example of DP-GEN on Slurm", - "_comment" : "Last updated on 2021.4.30 for DP-GEN 0.9.2 by Yuzhi Zhang", - "train" : - { - "_comment" : "Specify the installed path of DeePMD-kit", - "_comment" : "The version of DeePMD-kit should be 1.*", - "command": "PATH_TO_DEEPMD/dp", - "_comment" : "Specify machine settings", - "machine": { - "_comment" : "Supported batches include slurm, pbs, shell, lsf.", - "batch": "slurm", - "_comment" : "If your jobs are executed on a local workstation, you can let hostname be localhost.", - "_comment" : "Otherwise you should provide the IP of machine you want to connect via ssh.", - "hostname": "localhost", - "_comment" : "The port for connection, most common settings is 22", - "port": 22, - "_comment" : "Specify your username. Sometimes you may need specify password. Exactly the name of key is password. ", - "username": "USERNAME", - "_comment" : "Specify where you want your job executes, all of tasks will be sent to work_path on this machine.", - "_comment" : "You should alwasy make sure that directory of work_path exits. ", - "work_path": "PATH_TO_WORK", - "_comment": "that's all" - }, - "resources": { - "_comment" : "The number of nodes. This will generate #SBATCH -N 1 in your script. ", - "numb_node": 1, - "_comment" : "The number of GPU cards. #SBATCH --gres=gpu:1", - "numb_gpu": 1, - "_comment" : "The number of CPUs. #SBATCH -n 4", - "task_per_node": 4, - "_comment" : "Partition. #SBATCH -p all", - "partition": "all", - "_comment" : "Memory limit. #SBATCH --mem=16G", - "mem_limit": 16, - "_comment" : "Nodelist to be excluded. #SBATCH --exclude=gpu06,gpu07", - "exclude_list": [ - "gpu06", - "gpu07" - ], - "_comment" : "Environment to be activated. This will generate source PATH/train_new.env . ", - "source_list": [ - "PATH/train_new.env" - ], - "_comment" : " Module is a common tools on HPC clustes to manage softwares for multiple users.", - "_comment" : "Modules to be loaded. This will generate module load intel", - "module_list": ["intel"], - "_comment" : "Time limit. ", - "time_limit": "23:0:0", - "_comment": "that's all" - } +{ + "_comment": "Last updated on 2021.4.30 for DP-GEN 0.9.2 by Yuzhi Zhang", + "train": { + "_comment" : "Specify the installed path of DeePMD-kit", + "command": "PATH_TO_DEEPMD/dp", + "_comment" : "Specify machine settings", + "machine": { + "batch_type": "slurm", + "context_type": "ssh", + "remote_profile": { + "hostname": "localhost", + "_comment" : "The port for connection, most common settings is 22", + "port": 22, + "_comment" : "Specify your username.", + "username": "USERNAME" + }, + "_comment" : "You should alwasy make sure that directory of work_path exits. ", + "remote_root": "PATH_TO_WORK", + "local_root": "./" + }, + "resources": { + "_comment" : "Environment to be activated. This will generate source PATH/train_new.env . ", + "source_list": [ + "PATH/train_new.env" + ], + "_comment" : " Module is a common tools on HPC clustes to manage softwares for multiple users.", + "_comment" : "Modules to be loaded. This will generate module load intel", + "module_list": [ + "intel" + ], + "batch_type": "slurm", + "_comment" : "The number of nodes. This will generate #SBATCH -N 1 in your script. ", + "number_node": 1, + "_comment" : "The number of CPUs. #SBATCH -n 4", + "cpu_per_node": 4, + "_comment" : "The number of GPU cards. #SBATCH --gres=gpu:1", + "gpu_per_node": 1, + "queue_name": "all", + "custom_flags": [ + "#SBATCH -t 23:0:0", + "#SBATCH --mem=16G", + "#SBATCH --exclude=gpu06,gpu07" + ], + "kwargs": {}, + "group_size": 1 + } }, - - "model_devi": - { - "machine": { - "machine_type": "slurm", - "hostname": "localhost", - "port": 22, - "username": "USERNAME", - "work_path": "PATH_TO_WORK", - "_comment": "that's all" - }, - "resources": { - "numb_node": 1, - "numb_gpu": 1, - "task_per_node": 4, - "partition": "all", - "mem_limit": 16, - "exclude_list": [ - - ], - "source_list": [ - "PATH/lmp_new.env" - ], - "module_list": [], - "time_limit": "23:0:0", - "_comment": "that's all" - }, - "command": "lmp_serial", - "_comment" : "DP-GEN will put 5 tasks together in one submitting script.", - "group_size": 5 - }, - "fp": - { - "machine": { - "machine_type": "slurm", - "hostname": "xxx.xxx.xxx.xxx", - "port": 22, - "username": "USERNAME", - "work_path": "PATH_TO_WORK" - }, - "resources": { - "task_per_node": 8, - "numb_gpu": 0, - "exclude_list": [], - "_comment" : "If you set with_mpi to true, the defaulted parallelling command of Slurm, srun, will be appended as prefix.", - "_comment" : "If you do not want this, you can set with_mpi to false, and specify parallelling command yourself. ", - "_comment" : "Notice that in json format, the upper/lower case is strict. You should write true instead of True and false instead of False", - "with_mpi": false, - "source_list": [ - ], - "module_list": [ - "mpich/3.2.1-intel-2017.1" - ], - "time_limit": "120:0:0", - "partition": "C032M0128G", - "_comment": "that's all" - }, - "command": "mpirun -n 8 /gpfs/share/home/1600017784/yuzhi/soft/QE-mpi/PW/src/pw.x < input", - "group_size": 1 - } -} + "model_devi": { + "machine": { + "batch_type": "slurm", + "context_type": "ssh", + "remote_profile": { + "hostname": "localhost", + "port": 22, + "username": "USERNAME" + }, + "remote_root": "PATH_TO_WORK", + "local_root": "./" + }, + "resources": { + "source_list": [ + "PATH/lmp_new.env" + ], + "module_list": [], + "batch_type": "slurm", + "_comment": "DP-GEN will put 5 tasks together in one submitting script.", + "group_size": 5, + "number_node": 1, + "cpu_per_node": 4, + "gpu_per_node": 1, + "queue_name": "all", + "custom_flags": [ + "#SBATCH -t 23:0:0", + "#SBATCH --mem=16G", + "#SBATCH --exclude=" + ], + "kwargs": {} + }, + "command": "lmp_serial" + }, + "fp": { + "machine": { + "batch_type": "slurm", + "context_type": "ssh", + "remote_profile": { + "hostname": "xxx.xxx.xxx.xxx", + "port": 22, + "username": "USERNAME" + }, + "remote_root": "PATH_TO_WORK", + "local_root": "./" + }, + "resources": { + "source_list": [], + "module_list": [ + "mpich/3.2.1-intel-2017.1" + ], + "batch_type": "slurm", + "group_size": 1, + "cpu_per_node": 8, + "gpu_per_node": 0, + "queue_name": "C032M0128G", + "custom_flags": [ + "#SBATCH -t 120:0:0" + ], + "kwargs": {}, + "number_node": 1 + }, + "command": "mpirun -n 8 /gpfs/share/home/1600017784/yuzhi/soft/QE-mpi/PW/src/pw.x < input" + }, + "api_version": "1.0" +} \ No newline at end of file From 2e82464b38759c65673e309430b7547d04c46fd8 Mon Sep 17 00:00:00 2001 From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com> Date: Fri, 1 Jul 2022 13:23:55 +0800 Subject: [PATCH 15/25] =?UTF-8?q?fix=20=E2=80=98post=5Ffp=5Fcp2k=E2=80=99,?= =?UTF-8?q?=20add=20param=20rfailed=20(#765)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix ‘post_fp_cp2k’, add param rfailed * Update run.py --- dpgen/generator/run.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index 1bd196cc6..f716e2266 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -3342,7 +3342,10 @@ def post_fp_gaussian (iter_index, def post_fp_cp2k (iter_index, - jdata): + jdata, + rfailed=None): + + ratio_failed = rfailed if rfailed else jdata.get('ratio_failed',0.10) model_devi_jobs = jdata['model_devi_jobs'] assert (iter_index < len(model_devi_jobs)) @@ -3373,7 +3376,7 @@ def post_fp_cp2k (iter_index, all_sys = None for oo in sys_output : _sys = dpdata.LabeledSystem(oo, fmt = 'cp2k/output') - _sys.check_type_map(type_map = jdata['type_map']) + #_sys.check_type_map(type_map = jdata['type_map']) if all_sys is None: all_sys = _sys else: @@ -3385,8 +3388,12 @@ def post_fp_cp2k (iter_index, sys_data_path = os.path.join(work_path, 'data.%s'%ss) all_sys.to_deepmd_raw(sys_data_path) all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output)) - dlog.info("failed frame number: %s "%(tcount-icount)) - dlog.info("total frame number: %s "%tcount) + + rfail=float(tcount - icount)/float(tcount) + dlog.info("failed frame: %6d in %6d %6.2f %% " % (tcount - icount, tcount, rfail * 100.)) + + if rfail>ratio_failed: + raise RuntimeError("find too many unsuccessfully terminated jobs. Too many FP tasks are not converged. Please check your files in directories \'iter.*.*/02.fp/task.*.*/.\'") def post_fp_pwmat (iter_index, From 48f2abed5289e86955f562492792c0e8e2f4575a Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 5 Jul 2022 10:53:18 -0400 Subject: [PATCH 16/25] add init-reaction docs and args (#758) * add init-reaction docs and args The commit includes: - add docs - add detailed arguments - add strict argument checking - strict check the init-reaction example * init-reaction -> init_reaction * fix links * add the example to doc * add Geom=PrintInputOrient to keywords --- doc/index.rst | 2 ++ doc/init/init-reaction-jdata.rst | 6 +++++ doc/init/init-reaction.md | 21 ++++++++++++++++ dpgen/data/arginfo.py | 43 +++++++++++++++++++++++++++++++- dpgen/data/reaction.py | 6 ++++- dpgen/util.py | 23 +++++++++++++++++ examples/init/reaction.json | 2 +- tests/test_check_examples.py | 32 ++++++++++++++++++++++++ 8 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 doc/init/init-reaction-jdata.rst create mode 100644 doc/init/init-reaction.md create mode 100644 tests/test_check_examples.py diff --git a/doc/index.rst b/doc/index.rst index eaa229813..aaca55d73 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -36,6 +36,8 @@ DPGEN's documentation init/init-bulk-mdata init/init-surf-mdata + init/init-reaction + init/init-reaction-jdata init/init-reaction-mdata .. _autotest:: diff --git a/doc/init/init-reaction-jdata.rst b/doc/init/init-reaction-jdata.rst new file mode 100644 index 000000000..253cae682 --- /dev/null +++ b/doc/init/init-reaction-jdata.rst @@ -0,0 +1,6 @@ +dpgen init_reaction parameters +====================================== + +.. dargs:: + :module: dpgen.data.arginfo + :func: init_reaction_jdata_arginfo diff --git a/doc/init/init-reaction.md b/doc/init/init-reaction.md new file mode 100644 index 000000000..c7366dcbf --- /dev/null +++ b/doc/init/init-reaction.md @@ -0,0 +1,21 @@ +# init_reaction + +`dpgen init_reaction` is a workflow to initilize data for reactive systems of small gas-phase molecules. The workflow was introduced in the "Initialization" section of [Energy & Fuels, 2021, 35 (1), 762–769](https://10.1021/acs.energyfuels.0c03211). + +To start the workflow, one needs a box containing reactive systems. The following packages are required for each of the step: +- Exploring: [LAMMPS](https://github.com/lammps/lammps) +- Sampling: [MDDatasetBuilder](https://github.com/tongzhugroup/mddatasetbuilder) +- Labeling: [Gaussian](https://gaussian.com/) + +The Exploring step uses LAMMPS [pair_style reaxff](https://docs.lammps.org/latest/pair_reaxff.html) to run a short ReaxMD NVT MD simulation. In the Sampling step, molecular clusters are taken and k-means clustering algorithm is applied to remove the redundancy, which is described in [Nature Communications, 11, 5713 (2020)](https://doi.org/10.1038/s41467-020-19497-z). The Labeling step calculates energies and forces using the Gaussian package. + +An example of `reaction.json` is given below: + +```{literalinclude} ../../examples/init/reaction.json +:language: json +:linenos: +``` + +For detailed parameters, see [parametes](init-reaction-jdata.rst) and [machine parameters](init-reaction-mdata.rst). + +The genereated data can be used to continue DP-GEN concurrent learning workflow. Read [Energy & Fuels, 2021, 35 (1), 762–769](https://10.1021/acs.energyfuels.0c03211) for details. diff --git a/dpgen/data/arginfo.py b/dpgen/data/arginfo.py index d5814c036..fea20ae65 100644 --- a/dpgen/data/arginfo.py +++ b/dpgen/data/arginfo.py @@ -1,4 +1,4 @@ -from dargs import Argument +from dargs import Argument, ArgumentEncoder from dpgen.arginfo import general_mdata_arginfo @@ -34,3 +34,44 @@ def init_reaction_mdata_arginfo() -> Argument: arginfo """ return general_mdata_arginfo("init_reaction_mdata", ("reaxff", "build", "fp")) + + +def init_reaction_jdata_arginfo() -> Argument: + """Generate arginfo for dpgen init_reaction jdata. + + Returns + ------- + Argument + dpgen init_reaction jdata arginfo + """ + doc_init_reaction = "Generate initial data for reactive systems for small gas-phase molecules, from a ReaxFF NVT MD trajectory." + doc_type_map = "Type map, which should match types in the initial data. e.g. [\"C\", \"H\", \"O\"]" + doc_reaxff = "Parameters for ReaxFF NVT MD." + doc_data = "Path to initial LAMMPS data file. The atom_style should be charge." + doc_ff = "Path to ReaxFF force field file. Available in the lammps/potentials directory." + doc_control = "Path to ReaxFF control file." + doc_temp = "Target Temperature for the NVT MD simulation. Unit: K." + doc_dt = "Real time for every time step. Unit: fs." + doc_tau_t = "Time to determine how rapidly the temperature. Unit: fs." + doc_dump_frep = "Frequency of time steps to collect trajectory." + doc_nstep = "Total steps to run the ReaxFF MD simulation." + doc_cutoff = "Cutoff radius to take clusters from the trajectory. Note that only a complete molecule or free radical will be taken." + doc_dataset_size = "Collected dataset size for each bond type." + doc_qmkeywords = "Gaussian keywords for first-principle calculations. e.g. force mn15/6-31g** Geom=PrintInputOrient. Note that \"force\" job is necessary to collect data. Geom=PrintInputOrient should be used when there are more than 50 atoms in a cluster." + + return Argument("init_reaction_jdata", dict, [ + Argument("type_map", list, doc=doc_type_map), + Argument("reaxff", dict, [ + Argument("data", str, doc=doc_data), + Argument("ff", str, doc=doc_ff), + Argument("control", str, doc=doc_control), + Argument("temp", [float, int], doc=doc_temp), + Argument("dt", [float, int], doc=doc_dt), + Argument("tau_t", [float, int], doc=doc_tau_t), + Argument("dump_freq", int, doc=doc_dump_frep), + Argument("nstep", int, doc=doc_nstep), + ], doc=doc_reaxff), + Argument("cutoff", float, doc=doc_cutoff), + Argument("dataset_size", int, doc=doc_dataset_size), + Argument("qmkeywords", str, doc=doc_qmkeywords), + ], doc=doc_init_reaction) diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py index 5e900f9de..51be3b111 100644 --- a/dpgen/data/reaction.py +++ b/dpgen/data/reaction.py @@ -18,7 +18,8 @@ from dpgen.dispatcher.Dispatcher import make_submission_compat from dpgen.remote.decide_machine import convert_mdata from dpgen.generator.run import create_path, make_fp_task_name -from dpgen.util import sepline +from dpgen.util import sepline, normalize +from .arginfo import init_reaction_jdata_arginfo reaxff_path = "00.reaxff" build_path = "01.build" @@ -207,6 +208,9 @@ def gen_init_reaction(args): with open(args.MACHINE, "r") as fp: mdata = json.load(fp) + jdata_arginfo = init_reaction_jdata_arginfo() + jdata = normalize(jdata_arginfo, jdata) + mdata = convert_mdata(mdata, ["reaxff", "build", "fp"]) record = "record.reaction" iter_rec = -1 diff --git a/dpgen/util.py b/dpgen/util.py index 9491cdc30..bab822a83 100644 --- a/dpgen/util.py +++ b/dpgen/util.py @@ -3,6 +3,8 @@ from typing import Union, List from pathlib import Path +from dargs import Argument + from dpgen import dlog """ @@ -47,3 +49,24 @@ def expand_sys_str(root_dir: Union[str, Path]) -> List[str]: if (root_dir / "type.raw").is_file(): matches.append(str(root_dir)) return matches + +def normalize(arginfo: Argument, data: dict, strict_check: bool = True) -> dict: + """Normalize and check input data. + + Parameters + ---------- + arginfo : dargs.Argument + argument information + data : dict + input data + strict_check : bool, default=True + strict check data or not + + Returns + ------- + dict + normalized data + """ + data = arginfo.normalize_value(data, trim_pattern="_*") + arginfo.check_value(data, strict=strict_check) + return data diff --git a/examples/init/reaction.json b/examples/init/reaction.json index 46e327a9f..12322333c 100644 --- a/examples/init/reaction.json +++ b/examples/init/reaction.json @@ -15,5 +15,5 @@ }, "cutoff": 3.5, "dataset_size": 100, - "qmkeywords": "b3lyp/6-31g** force" + "qmkeywords": "b3lyp/6-31g** force Geom=PrintInputOrient" } \ No newline at end of file diff --git a/tests/test_check_examples.py b/tests/test_check_examples.py new file mode 100644 index 000000000..102e98490 --- /dev/null +++ b/tests/test_check_examples.py @@ -0,0 +1,32 @@ +"""This module ensures input in the examples directory +could pass the argument checking. +""" +import unittest +import json +from pathlib import Path + +from dpgen.util import normalize +from dpgen.data.arginfo import ( + init_reaction_jdata_arginfo, +) + +init_reaction_jdata = init_reaction_jdata_arginfo() + +# directory of examples +p_examples = Path(__file__).parent.parent / "examples" + +# input_files : tuple[tuple[Argument, Path]] +# tuple of example list +input_files = ( + (init_reaction_jdata, p_examples / "init" / "reaction.json"), +) + + +class TestExamples(unittest.TestCase): + def test_arguments(self): + for arginfo, fn in input_files: + fn = str(fn) + with self.subTest(fn=fn): + with open(fn) as f: + data = json.load(f) + normalize(arginfo, data) From fd07f1966a48c609136bbcabf74409c729b34ecb Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 5 Jul 2022 10:54:03 -0400 Subject: [PATCH 17/25] generate run arg doc from scripts (#747) * generate run arg doc from scripts This commit generates run argument documentations from scripts at runtime, so we can work on the script to improve the docs. * remove `init_multi_systems` see #730 * set repeat=True for model_devi_jobs * switch to dargs directive * switch for mdata * add extension * the current model_devi_args is indeed lammps arguments * fix error * fix error --- doc/conf.py | 4 - doc/gen_arginfo.py | 5 - doc/index.rst | 6 +- doc/requirements.txt | 1 + doc/{run-mdata.rst => run/mdata.rst} | 4 +- doc/run/param.rst | 445 +-------------------------- dpgen/generator/arginfo.py | 299 +++++++++++++++++- 7 files changed, 308 insertions(+), 456 deletions(-) delete mode 100644 doc/gen_arginfo.py rename doc/{run-mdata.rst => run/mdata.rst} (81%) diff --git a/doc/conf.py b/doc/conf.py index 97b4b206b..a6ebe43ed 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -107,9 +107,5 @@ def run_apidoc(_): main(['-M', '--tocfile', 'api', '-H', 'DP-GEN API', '-o', os.path.join(cur_dir, "api"), module, '--force']) -def generate_arginfo(app): - subprocess.check_output((sys.executable, "gen_arginfo.py"), universal_newlines=True) - def setup(app): app.connect('builder-inited', run_apidoc) - app.connect('builder-inited', generate_arginfo) diff --git a/doc/gen_arginfo.py b/doc/gen_arginfo.py deleted file mode 100644 index e51d44dc8..000000000 --- a/doc/gen_arginfo.py +++ /dev/null @@ -1,5 +0,0 @@ -from dpgen.generator.arginfo import run_mdata_arginfo - -run_mdata_doc = run_mdata_arginfo().gen_doc() -with open('run-mdata-auto.rst', 'w') as f: - f.write(run_mdata_doc) diff --git a/doc/index.rst b/doc/index.rst index aaca55d73..acd30efbd 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -26,7 +26,7 @@ DPGEN's documentation run/run-process.rst run/param.rst - run-mdata.rst + run/mdata.rst .. _init:: @@ -63,14 +63,14 @@ DPGEN's documentation :caption: Tutorial :glob: - toymodels/* + Tutorials .. _Contribution: .. toctree:: :maxdepth: 2 - :caption: Contribution Guild + :caption: Contribution Guide README.md api/api diff --git a/doc/requirements.txt b/doc/requirements.txt index 0ae5c76f1..fc51a9264 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -5,4 +5,5 @@ sphinx_markdown_tables sphinx-argparse myst-parser deepmodeling_sphinx +dargs>=0.3.1 . diff --git a/doc/run-mdata.rst b/doc/run/mdata.rst similarity index 81% rename from doc/run-mdata.rst rename to doc/run/mdata.rst index 4f92fc4fc..cad09bec8 100644 --- a/doc/run-mdata.rst +++ b/doc/run/mdata.rst @@ -3,4 +3,6 @@ dpgen run machine parameters .. note:: One can load, modify, and export the input file by using our effective web-based tool `DP-GUI `_. All parameters below can be set in DP-GUI. By clicking "SAVE JSON", one can download the input file. -.. include:: run-mdata-auto.rst +.. dargs:: + :module: dpgen.generator.arginfo + :func: run_mdata_arginfo diff --git a/doc/run/param.rst b/doc/run/param.rst index 749c4e956..592f8f31c 100644 --- a/doc/run/param.rst +++ b/doc/run/param.rst @@ -2,445 +2,6 @@ dpgen run param parameters ============================= -type_map: - | type: ``list`` - | argument path: ``type_map`` - - Atom types. - -mass_map: - | type: ``list`` - | argument path: ``mass_map`` - - Standard atom weights. - -use_ele_temp: - | type: ``int`` - | argument path: ``use_ele_temp`` - - Currently only support fp_style vasp. - - - 0: no electron temperature. - - - 1: eletron temperature as frame parameter. - - - 2: electron temperature as atom parameter. - -init_data_prefix: - | type: ``str``, optional - | argument path: ``init_data_prefix`` - - Prefix of initial data directories. - -init_data_prefix: - | type: ``list`` - | argument path: ``init_data_prefix`` - - Directories of initial data. You may use either absolute or relative path here. - -sys_format: - | type: ``str`` - | argument path: ``sys_format`` - - Format of initial data. It will be vasp/poscar if not set. - -init_multi_systems: - | type: ``bool``, optional - | argument path: ``init_multi_systems`` - - If set to true, init_data_sys directories should contain sub-directories of various systems. DP-GEN will regard all of these sub-directories as inital data systems. - -init_batch_size: - | type: ``str``, optional - | argument path: ``init_batch_size`` - - Each number is the batch_size of corresponding system for training in init_data_sys. One recommended rule for setting the sys_batch_size and init_batch_size is that batch_size mutiply number of atoms ot the stucture should be larger than 32. If set to auto, batch size will be 32 divided by number of atoms. - -sys_configs_prefix: - | type: ``str``, optional - | argument path: ``sys_configs_prefix`` - - Prefix of sys_configs. - -sys_configs: - | type: ``str`` - | argument path: ``sys_configs`` - - Containing directories of structures to be explored in iterations.Wildcard characters are supported here. - -sys_batch_size: - | type: ``list``, optional - | argument path: ``sys_batch_size`` - - Each number is the batch_size for training of corresponding system in sys_configs. If set to auto, batch size will be 32 divided by number of atoms. - -numb_models: - | type: ``int`` - | argument path: ``numb_models`` - - Number of models to be trained in 00.train. 4 is recommend. - -training_iter0_model_path: - | type: ``list``, optional - | argument path: ``training_iter0_model_path`` - - The model used to init the first iter training. Number of element should be equal to numb_models. - -training_init_model: - | type: ``bool``, optional - | argument path: ``training_init_model`` - - Iteration > 0, the model parameters will be initilized from the model trained at the previous iteration. Iteration == 0, the model parameters will be initialized from training_iter0_model_path. - -default_training_param: - | type: ``dict`` - | argument path: ``default_training_param`` - - Training parameters for deepmd-kit in 00.train. You can find instructions from here: (https://github.com/deepmodeling/deepmd-kit). - -dp_compress: - | type: ``bool``, optional, default: ``False`` - | argument path: ``dp_compress`` - - Use dp compress to compress the model. - -model_devi_dt: - | type: ``float`` - | argument path: ``model_devi_dt`` - - Timestep for MD. 0.002 is recommend. - -model_devi_skip: - | type: ``int`` - | argument path: ``model_devi_skip`` - - Number of structures skipped for fp in each MD. - -model_devi_f_trust_lo: - | type: ``list`` | ``float`` - | argument path: ``model_devi_f_trust_lo`` - - Lower bound of forces for the selection. If list, should be set for each index in sys_configs, respectively. - -model_devi_f_trust_hi: - | type: ``list`` | ``float`` - | argument path: ``model_devi_f_trust_hi`` - - Upper bound of forces for the selection. If list, should be set for each index in sys_configs, respectively. - -model_devi_v_trust_lo: - | type: ``list`` | ``float`` - | argument path: ``model_devi_v_trust_lo`` - - Lower bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x. - -model_devi_v_trust_hi: - | type: ``list`` | ``float`` - | argument path: ``model_devi_v_trust_hi`` - - Upper bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x. - -model_devi_adapt_trust_lo: - | type: ``bool``, optional - | argument path: ``model_devi_adapt_trust_lo`` - - Adaptively determines the lower trust levels of force and virial. This option should be used together with model_devi_numb_candi_f, model_devi_numb_candi_v and optionally with model_devi_perc_candi_f and model_devi_perc_candi_v. dpgen will make two sets: - - - 1. From the frames with force model deviation lower than model_devi_f_trust_hi, select max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames) frames with largest force model deviation. - - - 2. From the frames with virial model deviation lower than model_devi_v_trust_hi, select max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames) frames with largest virial model deviation. - - The union of the two sets is made as candidate dataset. - -model_devi_numb_candi_f: - | type: ``int``, optional - | argument path: ``model_devi_numb_candi_f`` - - See model_devi_adapt_trust_lo. - -model_devi_numb_candi_v: - | type: ``int``, optional - | argument path: ``model_devi_numb_candi_v`` - - See model_devi_adapt_trust_lo. - -model_devi_perc_candi_f: - | type: ``float``, optional - | argument path: ``model_devi_perc_candi_f`` - - See model_devi_adapt_trust_lo. - -model_devi_perc_candi_v: - | type: ``float``, optional - | argument path: ``model_devi_perc_candi_v`` - - See model_devi_adapt_trust_lo. - -model_devi_f_avg_relative: - | type: ``bool``, optional - | argument path: ``model_devi_f_avg_relative`` - - Normalized the force model deviations by the RMS force magnitude along the trajectory. This key should not be used with use_relative. - -model_devi_clean_traj: - | type: ``bool`` | ``int`` - | argument path: ``model_devi_clean_traj`` - - If type of model_devi_clean_traj is bool type then it denote whether to clean traj folders in MD since they are too large. If it is Int type, then the most recent n iterations of traj folders will be retained, others will be removed. - -model_devi_nopbc: - | type: ``bool`` - | argument path: ``model_devi_nopbc`` - - Assume open boundary condition in MD simulations. - -model_devi_activation_func: - | type: ``list``, optional - | argument path: ``model_devi_activation_func`` - - Set activation functions for models, length of the list should be the same as numb_models, and two elements in the list of string respectively assign activation functions to the embedding and fitting nets within each model. Backward compatibility: the orginal "list of String" format is still supported, where embedding and fitting nets of one model use the same activation function, and the length of the list should be the same as numb_models. - -model_devi_jobs: - | type: ``dict`` | ``list`` - | argument path: ``model_devi_jobs`` - - Settings for exploration in 01.model_devi. Each dict in the list corresponds to one iteration. The index of model_devi_jobs exactly accord with index of iterations - - sys_idx: - | type: ``list`` - | argument path: ``model_devi_jobs/sys_idx`` - - Systems to be selected as the initial structure of MD and be explored. The index corresponds exactly to the sys_configs. - - temps: - | type: ``list`` - | argument path: ``model_devi_jobs/temps`` - - Temperature (K) in MD. - - press: - | type: ``list`` - | argument path: ``model_devi_jobs/press`` - - Pressure (Bar) in MD. - - trj_freq: - | type: ``int`` - | argument path: ``model_devi_jobs/trj_freq`` - - Frequecy of trajectory saved in MD. - - nsteps: - | type: ``int`` - | argument path: ``model_devi_jobs/nsteps`` - - Running steps of MD. - - ensembles: - | type: ``str`` - | argument path: ``model_devi_jobs/ensembles`` - - Determining which ensemble used in MD, options include “npt” and “nvt”. - - neidelay: - | type: ``int``, optional - | argument path: ``model_devi_jobs/neidelay`` - - Delay building until this many steps since last build. - - taut: - | type: ``float`` | ``str``, optional - | argument path: ``model_devi_jobs/taut`` - - Coupling time of thermostat (ps). - - taup: - | type: ``float`` | ``str``, optional - | argument path: ``model_devi_jobs/taup`` - - Coupling time of barostat (ps). - -fp_style: - | type: ``dict`` - | argument path: ``fp_style`` - - Software for First Principles. Options include “vasp”, “pwscf”, “siesta” and “gaussian” up to now. - - - Depending on the value of *fp_style*, different sub args are accepted. - - fp_style: - | type: ``str`` (flag key) - | argument path: ``fp_style/fp_style`` - | possible choices: vasp, gaussian, siesta, cp2k - - The code used for fp tasks. - - - When *fp_style* is set to ``vasp``: - - fp_pp_path: - | type: ``str`` - | argument path: ``fp_style[vasp]/fp_pp_path`` - - Directory of psuedo-potential file to be used for 02.fp exists. - - fp_pp_files: - | type: ``list`` - | argument path: ``fp_style[vasp]/fp_pp_files`` - - Psuedo-potential file to be used for 02.fp. Note that the order of elements should correspond to the order in type_map. - - fp_incar: - | type: ``str`` - | argument path: ``fp_style[vasp]/fp_incar`` - - Input file for VASP. INCAR must specify KSPACING and KGAMMA. - - fp_aniso_kspacing: - | type: ``list`` - | argument path: ``fp_style[vasp]/fp_aniso_kspacing`` - - Set anisotropic kspacing. Usually useful for 1-D or 2-D materials. Only support VASP. If it is setting the KSPACING key in INCAR will be ignored. - - cvasp: - | type: ``bool`` - | argument path: ``fp_style[vasp]/cvasp`` - - If cvasp is true, DP-GEN will use Custodian to help control VASP calculation. - - - When *fp_style* is set to ``gaussian``: - - use_clusters: - | type: ``bool`` - | argument path: ``fp_style[gaussian]/use_clusters`` - - If set to true, clusters will be taken instead of the whole system. This option does not work with DeePMD-kit 0.x. - - cluster_cutoff: - | type: ``float`` - | argument path: ``fp_style[gaussian]/cluster_cutoff`` - - The cutoff radius of clusters if use_clusters is set to true. - - fp_params: - | type: ``dict`` - | argument path: ``fp_style[gaussian]/fp_params`` - - Parameters for Gaussian calculation. - - doc_keywords: - | type: ``str`` | ``list`` - | argument path: ``fp_style[gaussian]/fp_params/doc_keywords`` - - Keywords for Gaussian input. - - multiplicity: - | type: ``int`` | ``str`` - | argument path: ``fp_style[gaussian]/fp_params/multiplicity`` - - Spin multiplicity for Gaussian input. If set to auto, the spin multiplicity will be detected automatically. If set to frag, the "fragment=N" method will be used. - - nproc: - | type: ``int`` - | argument path: ``fp_style[gaussian]/fp_params/nproc`` - - The number of processors for Gaussian input. - - - When *fp_style* is set to ``siesta``: - - use_clusters: - | type: ``bool`` - | argument path: ``fp_style[siesta]/use_clusters`` - - If set to true, clusters will be taken instead of the whole system. This option does not work with DeePMD-kit 0.x. - - cluster_cutoff: - | type: ``float`` - | argument path: ``fp_style[siesta]/cluster_cutoff`` - - The cutoff radius of clusters if use_clusters is set to true. - - fp_params: - | type: ``dict`` - | argument path: ``fp_style[siesta]/fp_params`` - - Parameters for siesta calculation. - - ecut: - | type: ``int`` - | argument path: ``fp_style[siesta]/fp_params/ecut`` - - Define the plane wave cutoff for grid. - - ediff: - | type: ``float`` - | argument path: ``fp_style[siesta]/fp_params/ediff`` - - Tolerance of Density Matrix. - - kspacing: - | type: ``float`` - | argument path: ``fp_style[siesta]/fp_params/kspacing`` - - Sample factor in Brillouin zones. - - mixingweight: - | type: ``float`` - | argument path: ``fp_style[siesta]/fp_params/mixingweight`` - - Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing). - - NumberPulay: - | type: ``int`` - | argument path: ``fp_style[siesta]/fp_params/NumberPulay`` - - Controls the Pulay convergence accelerator. - - - When *fp_style* is set to ``cp2k``: - - user_fp_params: - | type: ``dict`` - | argument path: ``fp_style[cp2k]/user_fp_params`` - - Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use. we assume that you have basic knowledge for cp2k input. - - external_input_path: - | type: ``str`` - | argument path: ``fp_style[cp2k]/external_input_path`` - - Conflict with key:user_fp_params, use the template input provided by user, some rules should be followed, read the following text in detail. - -fp_task_max: - | type: ``int`` - | argument path: ``fp_task_max`` - - Maximum of structures to be calculated in 02.fp of each iteration. - -fp_task_min: - | type: ``int`` - | argument path: ``fp_task_min`` - - Minimum of structures to be calculated in 02.fp of each iteration. - -fp_accurate_threshold: - | type: ``float``, optional - | argument path: ``fp_accurate_threshold`` - - If the accurate ratio is larger than this number, no fp calculation will be performed, i.e. fp_task_max = 0. - -fp_accurate_soft_threshold: - | type: ``float``, optional - | argument path: ``fp_accurate_soft_threshold`` - - If the accurate ratio is between this number and fp_accurate_threshold, the fp_task_max linearly decays to zero. - -fp_cluster_vacuum: - | type: ``float``, optional - | argument path: ``fp_cluster_vacuum`` - - If the vacuum size is smaller than this value, this cluster will not be choosen for labeling. - +.. dargs:: + :module: dpgen.generator.arginfo + :func: run_jdata_arginfo diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index cb2fb887b..a9f05d429 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -1,7 +1,9 @@ -from dargs import Argument +from typing import List +from dargs import Argument, Variant from dpgen.arginfo import general_mdata_arginfo + def run_mdata_arginfo() -> Argument: """Generate arginfo for dpgen run mdata. @@ -11,3 +13,298 @@ def run_mdata_arginfo() -> Argument: arginfo """ return general_mdata_arginfo("run_mdata", ("train", "model_devi", "fp")) + +# basics +def basic_args() -> List[Argument]: + doc_type_map = 'Atom types.' + doc_mass_map = 'Standard atom weights.' + doc_use_ele_temp = 'Currently only support fp_style vasp. \n\n\ +- 0: no electron temperature. \n\n\ +- 1: eletron temperature as frame parameter. \n\n\ +- 2: electron temperature as atom parameter.' + + return [ + Argument("type_map", list, optional=False, doc=doc_type_map), + Argument("mass_map", list, optional=False, doc=doc_mass_map), + Argument("use_ele_temp", int, optional=False, + default=0, doc=doc_use_ele_temp), + ] + + +def data_args() -> List[Argument]: + doc_init_data_prefix = 'Prefix of initial data directories.' + doc_init_data_sys = 'Directories of initial data. You may use either absolute or relative path here. Systems will be detected recursively in the directories.' + doc_sys_format = 'Format of initial data. It will be vasp/poscar if not set.' + doc_init_batch_size = 'Each number is the batch_size of corresponding system for training in init_data_sys. One recommended rule for setting the sys_batch_size and init_batch_size is that batch_size mutiply number of atoms ot the stucture should be larger than 32. If set to auto, batch size will be 32 divided by number of atoms.' + doc_sys_configs_prefix = 'Prefix of sys_configs.' + doc_sys_configs = 'Containing directories of structures to be explored in iterations.Wildcard characters are supported here.' + doc_sys_batch_size = 'Each number is the batch_size for training of corresponding system in sys_configs. If set to auto, batch size will be 32 divided by number of atoms.' + + return [ + Argument("init_data_prefix", str, optional=True, + doc=doc_init_data_prefix), + Argument("init_data_sys", list, + optional=False, doc=doc_init_data_sys), + Argument("sys_format", str, optional=False, doc=doc_sys_format), + Argument("init_batch_size", str, optional=True, + doc=doc_init_batch_size), + Argument("sys_configs_prefix", str, optional=True, + doc=doc_sys_configs_prefix), + Argument("sys_configs", str, optional=False, doc=doc_sys_configs), + Argument("sys_batch_size", list, optional=True, + doc=doc_sys_batch_size), + ] + +# Training + + +def training_args() -> List[Argument]: + doc_numb_models = 'Number of models to be trained in 00.train. 4 is recommend.' + doc_training_iter0_model_path = 'The model used to init the first iter training. Number of element should be equal to numb_models.' + doc_training_init_model = 'Iteration > 0, the model parameters will be initilized from the model trained at the previous iteration. Iteration == 0, the model parameters will be initialized from training_iter0_model_path.' + doc_default_training_param = 'Training parameters for deepmd-kit in 00.train. You can find instructions from here: (https://github.com/deepmodeling/deepmd-kit).' + doc_dp_compress = 'Use dp compress to compress the model.' + + return [ + Argument("numb_models", int, optional=False, doc=doc_numb_models), + Argument("training_iter0_model_path", list, optional=True, + doc=doc_training_iter0_model_path), + Argument("training_init_model", bool, optional=True, + doc=doc_training_init_model), + Argument("default_training_param", dict, optional=False, + doc=doc_default_training_param), + Argument("dp_compress", bool, optional=True, + default=False, doc=doc_dp_compress), + ] + + +# Exploration +def model_devi_jobs_args() -> List[Argument]: + # this may be not correct + doc_sys_idx = 'Systems to be selected as the initial structure of MD and be explored. The index corresponds exactly to the sys_configs.' + doc_temps = 'Temperature (K) in MD.' + doc_press = 'Pressure (Bar) in MD.' + doc_trj_freq = 'Frequecy of trajectory saved in MD.' + doc_nsteps = 'Running steps of MD.' + doc_ensembles = 'Determining which ensemble used in MD, options include “npt” and “nvt”.' + doc_neidelay = 'delay building until this many steps since last build.' + doc_taut = 'Coupling time of thermostat (ps).' + doc_taup = 'Coupling time of barostat (ps).' + + args = [ + Argument("sys_idx", list, optional=False, doc=doc_sys_idx), + Argument("temps", list, optional=False, doc=doc_temps), + Argument("press", list, optional=False, doc=doc_press), + Argument("trj_freq", int, optional=False, doc=doc_trj_freq), + Argument("nsteps", int, optional=False, doc=doc_nsteps), + Argument("ensembles", str, optional=False, doc=doc_ensembles), + Argument("neidelay", int, optional=True, doc=doc_neidelay), + Argument("taut", float, optional=True, doc=doc_taut), + Argument("taup", float, optional=True, doc=doc_taup), + ] + + doc_model_devi_jobs = 'Settings for exploration in 01.model_devi. Each dict in the list corresponds to one iteration. The index of model_devi_jobs exactly accord with index of iterations' + return Argument("model_devi_jobs", list, args, [], repeat=True, doc=doc_model_devi_jobs) + + +def model_devi_lmp_args() -> List[Argument]: + doc_model_devi_dt = 'Timestep for MD. 0.002 is recommend.' + doc_model_devi_skip = 'Number of structures skipped for fp in each MD.' + doc_model_devi_f_trust_lo = 'Lower bound of forces for the selection. If list, should be set for each index in sys_configs, respectively.' + doc_model_devi_f_trust_hi = 'Upper bound of forces for the selection. If list, should be set for each index in sys_configs, respectively.' + doc_model_devi_v_trust_lo = 'Lower bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.' + doc_model_devi_v_trust_hi = 'Upper bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.' + doc_model_devi_adapt_trust_lo = 'Adaptively determines the lower trust levels of force and virial. This option should be used together with model_devi_numb_candi_f, model_devi_numb_candi_v and optionally with model_devi_perc_candi_f and model_devi_perc_candi_v. dpgen will make two sets:\n\n\ +- 1. From the frames with force model deviation lower than model_devi_f_trust_hi, select max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames) frames with largest force model deviation. \n\n\ +- 2. From the frames with virial model deviation lower than model_devi_v_trust_hi, select max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames) frames with largest virial model deviation. \n\n\ +The union of the two sets is made as candidate dataset.' + doc_model_devi_numb_candi_f = 'See model_devi_adapt_trust_lo.' + doc_model_devi_numb_candi_v = 'See model_devi_adapt_trust_lo.' + doc_model_devi_perc_candi_f = 'See model_devi_adapt_trust_lo.' + doc_model_devi_perc_candi_v = 'See model_devi_adapt_trust_lo.' + doc_model_devi_f_avg_relative = 'Normalized the force model deviations by the RMS force magnitude along the trajectory. This key should not be used with use_relative.' + doc_model_devi_clean_traj = 'If type of model_devi_clean_traj is bool type then it denote whether to clean traj folders in MD since they are too large. If it is Int type, then the most recent n iterations of traj folders will be retained, others will be removed.' + doc_model_devi_nopbc = 'Assume open boundary condition in MD simulations.' + doc_model_devi_activation_func = 'Set activation functions for models, length of the list should be the same as numb_models, and two elements in the list of string respectively assign activation functions to the embedding and fitting nets within each model. Backward compatibility: the orginal "list of String" format is still supported, where embedding and fitting nets of one model use the same activation function, and the length of the list should be the same as numb_models.' + + return [ + model_devi_jobs_args(), + Argument("model_devi_dt", float, + optional=False, doc=doc_model_devi_dt), + Argument("model_devi_skip", int, optional=False, + doc=doc_model_devi_skip), + Argument("model_devi_f_trust_lo", [ + float, list], optional=False, doc=doc_model_devi_f_trust_lo), + Argument("model_devi_f_trust_hi", [ + float, list], optional=False, doc=doc_model_devi_f_trust_hi), + Argument("model_devi_v_trust_lo", [ + float, list], optional=False, doc=doc_model_devi_v_trust_lo), + Argument("model_devi_v_trust_hi", [ + float, list], optional=False, doc=doc_model_devi_v_trust_hi), + Argument("model_devi_adapt_trust_lo", bool, optional=True, + doc=doc_model_devi_adapt_trust_lo), + Argument("model_devi_numb_candi_f", int, optional=True, + doc=doc_model_devi_numb_candi_f), + Argument("model_devi_numb_candi_v", int, optional=True, + doc=doc_model_devi_numb_candi_v), + Argument("model_devi_perc_candi_f", float, + optional=True, doc=doc_model_devi_perc_candi_f), + Argument("model_devi_perc_candi_v", float, + optional=True, doc=doc_model_devi_perc_candi_v), + Argument("model_devi_f_avg_relative", bool, optional=True, + doc=doc_model_devi_f_avg_relative), + Argument("model_devi_clean_traj", [ + bool, int], optional=False, doc=doc_model_devi_clean_traj), + Argument("model_devi_nopbc", bool, optional=False, + doc=doc_model_devi_nopbc), + Argument("model_devi_activation_func", list, optional=True, + doc=doc_model_devi_activation_func), + ] + + +def model_devi_args() -> List[Variant]: + doc_model_devi_engine = "Engine for the model deviation task." + return [Variant("model_devi_engine", [ + Argument("lammps", dict, model_devi_lmp_args(), doc="LAMMPS"), + ], default_tag="lammps", optional=True, doc=doc_model_devi_engine)] + + +# Labeling +# vasp +def fp_style_vasp_args() -> List[Argument]: + doc_fp_pp_path = 'Directory of psuedo-potential file to be used for 02.fp exists.' + doc_fp_pp_files = 'Psuedo-potential file to be used for 02.fp. Note that the order of elements should correspond to the order in type_map.' + doc_fp_incar = 'Input file for VASP. INCAR must specify KSPACING and KGAMMA.' + doc_fp_aniso_kspacing = 'Set anisotropic kspacing. Usually useful for 1-D or 2-D materials. Only support VASP. If it is setting the KSPACING key in INCAR will be ignored.' + doc_cvasp = 'If cvasp is true, DP-GEN will use Custodian to help control VASP calculation.' + + return [ + Argument("fp_pp_path", str, optional=False, doc=doc_fp_pp_path), + Argument("fp_pp_files", list, optional=False, doc=doc_fp_pp_files), + Argument("fp_incar", str, optional=False, doc=doc_fp_incar), + Argument("fp_aniso_kspacing", list, optional=False, + doc=doc_fp_aniso_kspacing), + Argument("cvasp", bool, optional=False, doc=doc_cvasp), + ] + + +# gaussian +def fp_style_gaussian_args() -> List[Argument]: + doc_keywords = 'Keywords for Gaussian input.' + doc_multiplicity = 'Spin multiplicity for Gaussian input. If set to auto, the spin multiplicity will be detected automatically. If set to frag, the "fragment=N" method will be used.' + doc_nproc = 'The number of processors for Gaussian input.' + + args = [ + Argument("doc_keywords", [str or list], + optional=False, doc=doc_keywords), + Argument("multiplicity", [int or str], + optional=False, doc=doc_multiplicity), + Argument("nproc", int, optional=False, doc=doc_nproc), + ] + + doc_use_clusters = 'If set to true, clusters will be taken instead of the whole system. This option does not work with DeePMD-kit 0.x.' + doc_cluster_cutoff = 'The cutoff radius of clusters if use_clusters is set to true.' + doc_fp_params_gaussian = 'Parameters for Gaussian calculation.' + + return [ + Argument("use_clusters", bool, optional=False, doc=doc_use_clusters), + Argument("cluster_cutoff", float, + optional=False, doc=doc_cluster_cutoff), + Argument("fp_params", dict, args, [], + optional=False, doc=doc_fp_params_gaussian), + ] + +# siesta +def fp_style_siesta_args() -> List[Argument]: + doc_ecut = 'Define the plane wave cutoff for grid.' + doc_ediff = 'Tolerance of Density Matrix.' + doc_kspacing = 'Sample factor in Brillouin zones.' + doc_mixingweight = 'Proportion a of output Density Matrix to be used for the input Density Matrix of next SCF cycle (linear mixing).' + doc_NumberPulay = 'Controls the Pulay convergence accelerator.' + + args = [ + Argument("ecut", int, optional=False, doc=doc_ecut), + Argument("ediff", float, optional=False, doc=doc_ediff), + Argument("kspacing", float, optional=False, doc=doc_kspacing), + Argument("mixingweight", float, optional=False, doc=doc_mixingweight), + Argument("NumberPulay", int, optional=False, doc=doc_NumberPulay), + ] + + doc_use_clusters = 'If set to true, clusters will be taken instead of the whole system. This option does not work with DeePMD-kit 0.x.' + doc_cluster_cutoff = 'The cutoff radius of clusters if use_clusters is set to true.' + doc_fp_params_siesta = 'Parameters for siesta calculation.' + + return [ + Argument("use_clusters", bool, optional=False, doc=doc_use_clusters), + Argument("cluster_cutoff", float, + optional=False, doc=doc_cluster_cutoff), + Argument("fp_params", dict, args, [], + optional=False, doc=doc_fp_params_siesta), + ] + +# cp2k + + +def fp_style_cp2k_args() -> List[Argument]: + doc_user_fp_params = 'Parameters for cp2k calculation. find detail in manual.cp2k.org. only the kind section must be set before use. we assume that you have basic knowledge for cp2k input.' + doc_external_input_path = 'Conflict with key:user_fp_params, use the template input provided by user, some rules should be followed, read the following text in detail.' + + return [ + Argument("user_fp_params", dict, optional=False, + doc=doc_user_fp_params), + Argument("external_input_path", str, optional=False, + doc=doc_external_input_path), + ] + + +def fp_style_variant_type_args() -> Variant: + doc_fp_style = 'The style of fp.' + + return Variant("fp_style", [Argument("vasp", dict, fp_style_vasp_args()), + Argument("gaussian", dict, + fp_style_gaussian_args()), + Argument("siesta", dict, + fp_style_siesta_args()), + Argument("cp2k", dict, fp_style_cp2k_args())], + optional=False, + doc=doc_fp_style) + + +def fp_args() -> List[Argument]: + doc_fp_style = 'Software for First Principles. Options include “vasp”, “pwscf”, “siesta” and “gaussian” up to now.' + doc_fp_task_max = 'Maximum of structures to be calculated in 02.fp of each iteration.' + doc_fp_task_min = 'Minimum of structures to be calculated in 02.fp of each iteration.' + doc_fp_accurate_threshold = 'If the accurate ratio is larger than this number, no fp calculation will be performed, i.e. fp_task_max = 0.' + doc_fp_accurate_soft_threshold = 'If the accurate ratio is between this number and fp_accurate_threshold, the fp_task_max linearly decays to zero.' + doc_fp_cluster_vacuum = 'If the vacuum size is smaller than this value, this cluster will not be choosen for labeling.' + + return [ + Argument('fp_style', dict, [], + [fp_style_variant_type_args()], + optional=False, + doc=doc_fp_style), + Argument("fp_task_max", int, optional=False, doc=doc_fp_task_max), + Argument("fp_task_min", int, optional=False, doc=doc_fp_task_min), + Argument("fp_accurate_threshold", float, + optional=True, doc=doc_fp_accurate_threshold), + Argument("fp_accurate_soft_threshold", float, + optional=True, doc=doc_fp_accurate_soft_threshold), + Argument("fp_cluster_vacuum", float, + optional=True, doc=doc_fp_cluster_vacuum), + ] + + +def run_jdata_arginfo() -> Argument: + """Argument information for dpgen run mdata. + + Returns + ------- + Argument + argument information + """ + doc_run_jdata = "param.json file" + return Argument("run_jdata", + dict, + sub_fields=basic_args() + data_args() + training_args() + fp_args(), + sub_variants=model_devi_args(), + doc=doc_run_jdata) From 25cb29f685b886edb674db3b1f447625ad1b7e3d Mon Sep 17 00:00:00 2001 From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com> Date: Tue, 5 Jul 2022 22:54:20 +0800 Subject: [PATCH 18/25] Fix simplify (#767) * Update simplify.py * Update simplify.py Co-authored-by: Han Wang --- dpgen/simplify/simplify.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py index 529401519..a8d19d18d 100644 --- a/dpgen/simplify/simplify.py +++ b/dpgen/simplify/simplify.py @@ -260,7 +260,10 @@ def post_model_devi(iter_index, jdata, mdata): np.random.shuffle(idx) pick_idx = idx[:iter_pick_number] rest_idx = idx[iter_pick_number:] - dlog.info("total candidate {0:6d} picked {1:6d} ({2:6.2f} %) rest {3:6d} ({4:6.2f} % )".format\ + if(counter['candidate'] == 0) : + dlog.info("no candidate") + else : + dlog.info("total candidate {0:6d} picked {1:6d} ({2:6.2f} %) rest {3:6d} ({4:6.2f} % )".format\ (counter['candidate'], len(pick_idx), float(len(pick_idx))/counter['candidate']*100., len(rest_idx), float(len(rest_idx))/counter['candidate']*100.)) # dump the picked candinate data From 32b72b4e6a34e0503b0dac45f13472fabb28c9ab Mon Sep 17 00:00:00 2001 From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com> Date: Wed, 6 Jul 2022 07:50:17 +0800 Subject: [PATCH 19/25] for sys_entire, set labeled=False (#768) * Update simplify.py for sys_entire, set labeled=False * get labeled from jdata * Update simplify.py --- dpgen/simplify/simplify.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py index a8d19d18d..9e8fc3632 100644 --- a/dpgen/simplify/simplify.py +++ b/dpgen/simplify/simplify.py @@ -216,8 +216,9 @@ def post_model_devi(iter_index, jdata, mdata): sys_accurate = dpdata.MultiSystems() sys_candinate = dpdata.MultiSystems() sys_failed = dpdata.MultiSystems() - - sys_entire = dpdata.MultiSystems().from_deepmd_npy(os.path.join(work_path, rest_data_name + ".old")) + + labeled = jdata.get("labeled", False) + sys_entire = dpdata.MultiSystems().from_deepmd_npy(os.path.join(work_path, rest_data_name + ".old"), labeled=labeled) detail_file_name = detail_file_name_prefix with open(os.path.join(work_path, detail_file_name)) as f: From adb261df2a6269a3806120c08ad672b2b7b3f416 Mon Sep 17 00:00:00 2001 From: HuangJiameng <105633685+HuangJiameng@users.noreply.github.com> Date: Thu, 7 Jul 2022 12:43:38 +0800 Subject: [PATCH 20/25] fix the error when tcount == 0 (#769) --- dpgen/generator/run.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index f716e2266..43ff1bba3 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -3154,8 +3154,12 @@ def post_fp_vasp (iter_index, else: raise RuntimeError('invalid setting of use_ele_temp ' + str(use_ele_temp)) - rfail=float(icount)/float(tcount) - dlog.info("failed frame: %6d in %6d %6.2f %% " % (icount, tcount, rfail * 100.)) + if(tcount == 0) : + rfail = 0.0 + dlog.info("failed frame: %6d in %6d " % (icount, tcount)) + else : + rfail=float(icount)/float(tcount) + dlog.info("failed frame: %6d in %6d %6.2f %% " % (icount, tcount, rfail * 100.)) if rfail>ratio_failed: raise RuntimeError("find too many unsuccessfully terminated jobs. Too many FP tasks are not converged. Please check your input parameters (e.g. INCAR) or configuration (e.g. POSCAR) in directories \'iter.*.*/02.fp/task.*.*/.\'") @@ -3389,8 +3393,12 @@ def post_fp_cp2k (iter_index, all_sys.to_deepmd_raw(sys_data_path) all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output)) - rfail=float(tcount - icount)/float(tcount) - dlog.info("failed frame: %6d in %6d %6.2f %% " % (tcount - icount, tcount, rfail * 100.)) + if(tcount == 0) : + rfail = 0.0 + dlog.info("failed frame: %6d in %6d " % (tcount - icount, tcount)) + else : + rfail=float(tcount - icount)/float(tcount) + dlog.info("failed frame: %6d in %6d %6.2f %% " % (tcount - icount, tcount, rfail * 100.)) if rfail>ratio_failed: raise RuntimeError("find too many unsuccessfully terminated jobs. Too many FP tasks are not converged. Please check your files in directories \'iter.*.*/02.fp/task.*.*/.\'") From 7416dd3f4e4ab0a0bc7c1e5374c05ec943c4b992 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 9 Jul 2022 06:19:02 -0400 Subject: [PATCH 21/25] fix fp style argument (#788) The way to organize fp style argument is incorrect before. --- dpgen/generator/arginfo.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index a9f05d429..4cc055ad7 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -258,7 +258,7 @@ def fp_style_cp2k_args() -> List[Argument]: def fp_style_variant_type_args() -> Variant: - doc_fp_style = 'The style of fp.' + doc_fp_style = 'Software for First Principles.' return Variant("fp_style", [Argument("vasp", dict, fp_style_vasp_args()), Argument("gaussian", dict, @@ -271,7 +271,6 @@ def fp_style_variant_type_args() -> Variant: def fp_args() -> List[Argument]: - doc_fp_style = 'Software for First Principles. Options include “vasp”, “pwscf”, “siesta” and “gaussian” up to now.' doc_fp_task_max = 'Maximum of structures to be calculated in 02.fp of each iteration.' doc_fp_task_min = 'Minimum of structures to be calculated in 02.fp of each iteration.' doc_fp_accurate_threshold = 'If the accurate ratio is larger than this number, no fp calculation will be performed, i.e. fp_task_max = 0.' @@ -279,10 +278,6 @@ def fp_args() -> List[Argument]: doc_fp_cluster_vacuum = 'If the vacuum size is smaller than this value, this cluster will not be choosen for labeling.' return [ - Argument('fp_style', dict, [], - [fp_style_variant_type_args()], - optional=False, - doc=doc_fp_style), Argument("fp_task_max", int, optional=False, doc=doc_fp_task_max), Argument("fp_task_min", int, optional=False, doc=doc_fp_task_min), Argument("fp_accurate_threshold", float, @@ -306,5 +301,5 @@ def run_jdata_arginfo() -> Argument: return Argument("run_jdata", dict, sub_fields=basic_args() + data_args() + training_args() + fp_args(), - sub_variants=model_devi_args(), + sub_variants=model_devi_args() + [fp_style_variant_type_args()], doc=doc_run_jdata) From 32a9996956b21d89f78db2da8e15fc6693778c04 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 9 Jul 2022 06:20:04 -0400 Subject: [PATCH 22/25] add simplify jdata docs; fix and check example (#787) * add simplify jdata docs; fix and check example Fix #779. * `use_ele_temp` should be optional * `sys_format` should be optional * `sys_configs` should be list * `labeled` should be optional * `use_clusters` and `cluster_cutoff` are optional * fix keyword option * add to toc --- doc/index.rst | 1 + doc/simplify/simplify-jdata.rst | 6 ++ dpgen/generator/arginfo.py | 14 ++--- dpgen/simplify/arginfo.py | 107 +++++++++++++++++++++++++++++++- examples/simplify/qm7.json | 11 +--- tests/test_check_examples.py | 5 ++ 6 files changed, 127 insertions(+), 17 deletions(-) create mode 100644 doc/simplify/simplify-jdata.rst diff --git a/doc/index.rst b/doc/index.rst index acd30efbd..65161739d 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -53,6 +53,7 @@ DPGEN's documentation :maxdepth: 2 :caption: Simplify + simplify/simplify-jdata simplify/simplify-mdata diff --git a/doc/simplify/simplify-jdata.rst b/doc/simplify/simplify-jdata.rst new file mode 100644 index 000000000..520c889ab --- /dev/null +++ b/doc/simplify/simplify-jdata.rst @@ -0,0 +1,6 @@ +dpgen simplify parameters +========================= + +.. dargs:: + :module: dpgen.simplify.arginfo + :func: simplify_jdata_arginfo diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index 4cc055ad7..9357ee912 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -26,7 +26,7 @@ def basic_args() -> List[Argument]: return [ Argument("type_map", list, optional=False, doc=doc_type_map), Argument("mass_map", list, optional=False, doc=doc_mass_map), - Argument("use_ele_temp", int, optional=False, + Argument("use_ele_temp", int, optional=True, default=0, doc=doc_use_ele_temp), ] @@ -34,7 +34,7 @@ def basic_args() -> List[Argument]: def data_args() -> List[Argument]: doc_init_data_prefix = 'Prefix of initial data directories.' doc_init_data_sys = 'Directories of initial data. You may use either absolute or relative path here. Systems will be detected recursively in the directories.' - doc_sys_format = 'Format of initial data. It will be vasp/poscar if not set.' + doc_sys_format = 'Format of initial data.' doc_init_batch_size = 'Each number is the batch_size of corresponding system for training in init_data_sys. One recommended rule for setting the sys_batch_size and init_batch_size is that batch_size mutiply number of atoms ot the stucture should be larger than 32. If set to auto, batch size will be 32 divided by number of atoms.' doc_sys_configs_prefix = 'Prefix of sys_configs.' doc_sys_configs = 'Containing directories of structures to be explored in iterations.Wildcard characters are supported here.' @@ -45,12 +45,12 @@ def data_args() -> List[Argument]: doc=doc_init_data_prefix), Argument("init_data_sys", list, optional=False, doc=doc_init_data_sys), - Argument("sys_format", str, optional=False, doc=doc_sys_format), + Argument("sys_format", str, optional=True, default='vasp/poscar', doc=doc_sys_format), Argument("init_batch_size", str, optional=True, doc=doc_init_batch_size), Argument("sys_configs_prefix", str, optional=True, doc=doc_sys_configs_prefix), - Argument("sys_configs", str, optional=False, doc=doc_sys_configs), + Argument("sys_configs", list, optional=False, doc=doc_sys_configs), Argument("sys_batch_size", list, optional=True, doc=doc_sys_batch_size), ] @@ -195,7 +195,7 @@ def fp_style_gaussian_args() -> List[Argument]: doc_nproc = 'The number of processors for Gaussian input.' args = [ - Argument("doc_keywords", [str or list], + Argument("keywords", [str or list], optional=False, doc=doc_keywords), Argument("multiplicity", [int or str], optional=False, doc=doc_multiplicity), @@ -207,9 +207,9 @@ def fp_style_gaussian_args() -> List[Argument]: doc_fp_params_gaussian = 'Parameters for Gaussian calculation.' return [ - Argument("use_clusters", bool, optional=False, doc=doc_use_clusters), + Argument("use_clusters", bool, optional=True, default=False, doc=doc_use_clusters), Argument("cluster_cutoff", float, - optional=False, doc=doc_cluster_cutoff), + optional=True, doc=doc_cluster_cutoff), Argument("fp_params", dict, args, [], optional=False, doc=doc_fp_params_gaussian), ] diff --git a/dpgen/simplify/arginfo.py b/dpgen/simplify/arginfo.py index 0fbfe606e..c325c5628 100644 --- a/dpgen/simplify/arginfo.py +++ b/dpgen/simplify/arginfo.py @@ -1,10 +1,113 @@ -from dargs import Argument +from typing import List +from dargs import Argument, Variant from dpgen.arginfo import general_mdata_arginfo +from dpgen.generator.arginfo import ( + basic_args, + data_args, + training_args, + fp_style_vasp_args, + fp_style_gaussian_args, +) + + +def general_simplify_arginfo() -> Argument: + """General simplify arginfo. + + Returns + ------- + Argument + arginfo + """ + doc_labeled = "If true, the initial data is labeled." + doc_pick_data = "Path to the directory with the pick data with the deepmd/npy format. Systems are detected recursively." + doc_init_pick_number = "The number of initial pick data." + doc_iter_pick_number = "The number of pick data in each iteration." + doc_model_devi_f_trust_lo = "The lower bound of forces for the selection for the model deviation." + doc_model_devi_f_trust_hi = "The higher bound of forces for the selection for the model deviation." + + return [ + Argument("labeled", bool, optional=True, default=False, doc=doc_labeled), + Argument("pick_data", str, doc=doc_pick_data), + Argument("init_pick_number", int, doc=doc_init_pick_number), + Argument("iter_pick_number", int, doc=doc_iter_pick_number), + Argument("model_devi_f_trust_lo", float, optional=False, doc=doc_model_devi_f_trust_lo), + Argument("model_devi_f_trust_hi", float, optional=False, doc=doc_model_devi_f_trust_hi), + ] + + +def fp_style_variant_type_args() -> Variant: + """Generate variant for fp style variant type. + + Returns + ------- + Variant + variant for fp style + """ + doc_fp_style = 'Software for First Principles, if `labeled` is false. Options include “vasp”, “gaussian” up to now.' + doc_fp_style_none = 'No fp.' + doc_fp_style_vasp = 'VASP.' + doc_fp_style_gaussian = 'Gaussian. The command should be set as `g16 < input`.' + + return Variant("fp_style", [ + Argument("none", dict, doc=doc_fp_style_none), + # simplify use the same fp method as run + Argument("vasp", dict, fp_style_vasp_args(), doc=doc_fp_style_vasp), + Argument("gaussian", dict, fp_style_gaussian_args(), + doc=doc_fp_style_gaussian), + ], + optional=True, + default_tag="none", + doc=doc_fp_style) + + +def fp_args() -> List[Argument]: + """Generate arginfo for fp. + + Returns + ------- + List[Argument] + arginfo + """ + doc_fp_task_max = 'Maximum of structures to be calculated in 02.fp of each iteration.' + doc_fp_task_min = 'Minimum of structures to be calculated in 02.fp of each iteration.' + + return [ + Argument("fp_task_max", int, optional=True, doc=doc_fp_task_max), + Argument("fp_task_min", int, optional=True, doc=doc_fp_task_min), + ] + + +def simplify_jdata_arginfo() -> Argument: + """Generate arginfo for dpgen simplify jdata. + + Returns + ------- + Argument + arginfo + """ + doc_run_jdata = "Parameters for simplify.json, the first argument of `dpgen simplify`." + return Argument("simplify_jdata", + dict, + sub_fields=[ + *basic_args(), + # TODO: we may remove sys_configs; it is required in train method + *data_args(), + *general_simplify_arginfo(), + # simplify use the same training method as run + *training_args(), + *fp_args(), + ], + sub_variants=[ + fp_style_variant_type_args(), + ], + doc=doc_run_jdata, + ) + def simplify_mdata_arginfo() -> Argument: """Generate arginfo for dpgen simplify mdata. - + Returns ------- Argument diff --git a/examples/simplify/qm7.json b/examples/simplify/qm7.json index 648c589e7..131a903a6 100644 --- a/examples/simplify/qm7.json +++ b/examples/simplify/qm7.json @@ -16,11 +16,11 @@ "pick_data": "/scratch/jz748/simplify/qm7", "init_data_prefix": "", "init_data_sys": [], + "sys_configs": [null], "sys_batch_size": [ "auto" ], "numb_models": 4, - "train_param": "input.json", "default_training_param": { "model": { "type_map": [ @@ -92,11 +92,8 @@ }, "use_clusters": true, "fp_style": "gaussian", - "shuffle_poscar": false, "fp_task_max": 1000, "fp_task_min": 10, - "fp_pp_path": "/home/jzzeng/", - "fp_pp_files": [], "fp_params": { "keywords": "mn15/6-31g** force nosymm scf(maxcyc=512)", "nproc": 28, @@ -105,9 +102,7 @@ }, "init_pick_number":100, "iter_pick_number":100, - "e_trust_lo":1e10, - "e_trust_hi":1e10, - "f_trust_lo":0.25, - "f_trust_hi":0.45, + "model_devi_f_trust_lo":0.10, + "model_devi_f_trust_hi":0.30, "_comment": " that's all " } diff --git a/tests/test_check_examples.py b/tests/test_check_examples.py index 102e98490..61dbbc977 100644 --- a/tests/test_check_examples.py +++ b/tests/test_check_examples.py @@ -9,8 +9,12 @@ from dpgen.data.arginfo import ( init_reaction_jdata_arginfo, ) +from dpgen.simplify.arginfo import ( + simplify_jdata_arginfo, +) init_reaction_jdata = init_reaction_jdata_arginfo() +simplify_jdata = simplify_jdata_arginfo() # directory of examples p_examples = Path(__file__).parent.parent / "examples" @@ -19,6 +23,7 @@ # tuple of example list input_files = ( (init_reaction_jdata, p_examples / "init" / "reaction.json"), + (simplify_jdata, p_examples / "simplify" / "qm7.json"), ) From 2a7e1b80f9a63661b5d1b2447f809bff389bed14 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Mon, 11 Jul 2022 01:05:48 -0400 Subject: [PATCH 23/25] remove `train_param` from examples, which was deprecated in #68 (#789) --- README.md | 2 -- doc/run/example-of-param.md | 1 - dpgen/generator/ch4/param.json | 1 - examples/run/deprecated/dp0.12-lammps-cp2k/CH4/param_CH4.json | 1 - examples/run/deprecated/param-h2oscan-vasp.json | 1 - examples/run/deprecated/param-mg-vasp-ucloud.json | 1 - examples/run/deprecated/param-mg-vasp.json | 1 - examples/run/deprecated/param-pyridine-pwscf.json | 1 - examples/run/dp1.x_lammps_gaussian/dodecane/dodecane.json | 1 - tests/generator/param-mg-vasp-diy.json | 1 - tests/generator/param-mg-vasp-old.json | 1 - tests/generator/param-mg-vasp.json | 1 - tests/generator/param-mgo-cp2k-exinput.json | 1 - tests/generator/param-pyridine-cp2k.json | 1 - tests/generator/param-pyridine-gaussian.json | 1 - tests/generator/param-pyridine-pwmat.json | 1 - tests/generator/param-pyridine-pwscf-old.json | 1 - tests/generator/param-pyridine-pwscf.json | 1 - tests/generator/param-pyridine-siesta.json | 1 - 19 files changed, 20 deletions(-) diff --git a/README.md b/README.md index f26fb51df..76126cad3 100644 --- a/README.md +++ b/README.md @@ -373,7 +373,6 @@ In `PARAM`, you can specialize the task as you expect. "_comment": " that's all ", "numb_models": 4, - "train_param": "input.json", "default_training_param": { "model": { "type_map": [ @@ -1015,7 +1014,6 @@ Here is an example of `param.json` for QM7 dataset: "auto" ], "numb_models": 4, - "train_param": "input.json", "default_training_param": { "model": { "type_map": [ diff --git a/doc/run/example-of-param.md b/doc/run/example-of-param.md index 0490d971f..3eef51fc1 100644 --- a/doc/run/example-of-param.md +++ b/doc/run/example-of-param.md @@ -50,7 +50,6 @@ The training related keys in param.json are given as follows ```json "numb_models": 4, - "train_param": "input.json", "default_training_param": { }, ``` diff --git a/dpgen/generator/ch4/param.json b/dpgen/generator/ch4/param.json index 96008265f..0a71c295f 100644 --- a/dpgen/generator/ch4/param.json +++ b/dpgen/generator/ch4/param.json @@ -24,7 +24,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/examples/run/deprecated/dp0.12-lammps-cp2k/CH4/param_CH4.json b/examples/run/deprecated/dp0.12-lammps-cp2k/CH4/param_CH4.json index 86cdfe11c..4089e7326 100644 --- a/examples/run/deprecated/dp0.12-lammps-cp2k/CH4/param_CH4.json +++ b/examples/run/deprecated/dp0.12-lammps-cp2k/CH4/param_CH4.json @@ -30,7 +30,6 @@ ], "_comment": " that's all ", "numb_models": 4, - "train_param": "input.json", "default_training_param": { "_comment": "that's all", "use_smooth": true, diff --git a/examples/run/deprecated/param-h2oscan-vasp.json b/examples/run/deprecated/param-h2oscan-vasp.json index 389727892..cf296dbef 100644 --- a/examples/run/deprecated/param-h2oscan-vasp.json +++ b/examples/run/deprecated/param-h2oscan-vasp.json @@ -107,7 +107,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/examples/run/deprecated/param-mg-vasp-ucloud.json b/examples/run/deprecated/param-mg-vasp-ucloud.json index f96ddd190..9d3c3cf61 100644 --- a/examples/run/deprecated/param-mg-vasp-ucloud.json +++ b/examples/run/deprecated/param-mg-vasp-ucloud.json @@ -55,7 +55,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/examples/run/deprecated/param-mg-vasp.json b/examples/run/deprecated/param-mg-vasp.json index 12ecd909b..6829142b4 100644 --- a/examples/run/deprecated/param-mg-vasp.json +++ b/examples/run/deprecated/param-mg-vasp.json @@ -55,7 +55,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/examples/run/deprecated/param-pyridine-pwscf.json b/examples/run/deprecated/param-pyridine-pwscf.json index cd6634595..a224b33f2 100644 --- a/examples/run/deprecated/param-pyridine-pwscf.json +++ b/examples/run/deprecated/param-pyridine-pwscf.json @@ -25,7 +25,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/examples/run/dp1.x_lammps_gaussian/dodecane/dodecane.json b/examples/run/dp1.x_lammps_gaussian/dodecane/dodecane.json index c2147f4ca..a0b799e8b 100644 --- a/examples/run/dp1.x_lammps_gaussian/dodecane/dodecane.json +++ b/examples/run/dp1.x_lammps_gaussian/dodecane/dodecane.json @@ -9,7 +9,6 @@ ], "sys_format":"lammps/lmp", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "model":{ "type_map": ["C","H"], diff --git a/tests/generator/param-mg-vasp-diy.json b/tests/generator/param-mg-vasp-diy.json index b7fffc41b..cc2071c31 100644 --- a/tests/generator/param-mg-vasp-diy.json +++ b/tests/generator/param-mg-vasp-diy.json @@ -17,7 +17,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/tests/generator/param-mg-vasp-old.json b/tests/generator/param-mg-vasp-old.json index cd521bbc3..8b85990f0 100644 --- a/tests/generator/param-mg-vasp-old.json +++ b/tests/generator/param-mg-vasp-old.json @@ -17,7 +17,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/tests/generator/param-mg-vasp.json b/tests/generator/param-mg-vasp.json index f2ecdcf8f..9693f4c1f 100644 --- a/tests/generator/param-mg-vasp.json +++ b/tests/generator/param-mg-vasp.json @@ -17,7 +17,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/tests/generator/param-mgo-cp2k-exinput.json b/tests/generator/param-mgo-cp2k-exinput.json index 85a286f4d..88093ace4 100644 --- a/tests/generator/param-mgo-cp2k-exinput.json +++ b/tests/generator/param-mgo-cp2k-exinput.json @@ -25,7 +25,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/tests/generator/param-pyridine-cp2k.json b/tests/generator/param-pyridine-cp2k.json index c2fd07eb0..fba915b8e 100644 --- a/tests/generator/param-pyridine-cp2k.json +++ b/tests/generator/param-pyridine-cp2k.json @@ -25,7 +25,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/tests/generator/param-pyridine-gaussian.json b/tests/generator/param-pyridine-gaussian.json index d11e1df48..ecd0a9d96 100644 --- a/tests/generator/param-pyridine-gaussian.json +++ b/tests/generator/param-pyridine-gaussian.json @@ -25,7 +25,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/tests/generator/param-pyridine-pwmat.json b/tests/generator/param-pyridine-pwmat.json index cc2e59b39..803068699 100644 --- a/tests/generator/param-pyridine-pwmat.json +++ b/tests/generator/param-pyridine-pwmat.json @@ -25,7 +25,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/tests/generator/param-pyridine-pwscf-old.json b/tests/generator/param-pyridine-pwscf-old.json index 579a66e10..210a3325d 100644 --- a/tests/generator/param-pyridine-pwscf-old.json +++ b/tests/generator/param-pyridine-pwscf-old.json @@ -25,7 +25,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/tests/generator/param-pyridine-pwscf.json b/tests/generator/param-pyridine-pwscf.json index 3e852bf04..d3c69078a 100644 --- a/tests/generator/param-pyridine-pwscf.json +++ b/tests/generator/param-pyridine-pwscf.json @@ -25,7 +25,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, diff --git a/tests/generator/param-pyridine-siesta.json b/tests/generator/param-pyridine-siesta.json index dfdf7384f..afffcddf0 100644 --- a/tests/generator/param-pyridine-siesta.json +++ b/tests/generator/param-pyridine-siesta.json @@ -25,7 +25,6 @@ "_comment": " 00.train ", "numb_models": 4, - "train_param": "input.json", "default_training_param" : { "_comment": " model parameters", "use_smooth": true, From 8ed4e2fc14507de45387f9e801373e802b62530d Mon Sep 17 00:00:00 2001 From: Futaki Haduki <812556867@qq.com> Date: Mon, 11 Jul 2022 13:10:49 +0800 Subject: [PATCH 24/25] fix bug for sys_idx dependent trust level and add option for model_devi_job (#786) * adapt different trust level for different sys_configs * fix #746; add iter dependent trust level * add document for #786 * some fix accroding to review * fix unexpected 0 bug * fix document not supporting typing bug * change value for trust level in unittest --- README.md | 11 +- dpgen/generator/arginfo.py | 30 +++-- dpgen/generator/run.py | 28 +++-- tests/generator/context.py | 1 + .../generator/param-mg-vasp-multi-trust.json | 111 ++++++++++++++++++ tests/generator/test_make_fp.py | 48 ++++++++ 6 files changed, 207 insertions(+), 22 deletions(-) create mode 100644 tests/generator/param-mg-vasp-multi-trust.json diff --git a/README.md b/README.md index 76126cad3..f760e2a79 100644 --- a/README.md +++ b/README.md @@ -513,10 +513,10 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key | *#Exploration* | **model_devi_dt** | Float | 0.002 (recommend) | Timestep for MD | | **model_devi_skip** | Integer | 0 | Number of structures skipped for fp in each MD -| **model_devi_f_trust_lo** | Float or List of float | 0.05 | Lower bound of forces for the selection. If List, should be set for each index in `sys_configs`, respectively. | -| **model_devi_f_trust_hi** | Float or List of float | 0.15 | Upper bound of forces for the selection. If List, should be set for each index in `sys_configs`, respectively. | -| **model_devi_v_trust_lo** | Float or List of float | 1e10 | Lower bound of virial for the selection. If List, should be set for each index in `sys_configs`, respectively. Should be used with DeePMD-kit v2.x. | -| **model_devi_v_trust_hi** | Float or List of float | 1e10 | Upper bound of virial for the selection. If List, should be set for each index in `sys_configs`, respectively. Should be used with DeePMD-kit v2.x. | +| **model_devi_f_trust_lo** | Float or List of float or Dict[str, float] | 0.05 | Lower bound of forces for the selection. If List, should be set for each index in `sys_configs`, respectively. | +| **model_devi_f_trust_hi** | Float or List of float or Dict[str, float] | 0.15 | Upper bound of forces for the selection. If List, should be set for each index in `sys_configs`, respectively. | +| **model_devi_v_trust_lo** | Float or List of float or Dict[str, float] | 1e10 | Lower bound of virial for the selection. If List, should be set for each index in `sys_configs`, respectively. Should be used with DeePMD-kit v2.x. | +| **model_devi_v_trust_hi** | Float or List of float or Dict[str, float] | 1e10 | Upper bound of virial for the selection. If List, should be set for each index in `sys_configs`, respectively. Should be used with DeePMD-kit v2.x. | | model_devi_adapt_trust_lo | Boolean | False | Adaptively determines the lower trust levels of force and virial. This option should be used together with `model_devi_numb_candi_f`, `model_devi_numb_candi_v` and optionally with `model_devi_perc_candi_f` and `model_devi_perc_candi_v`. `dpgen` will make two sets: 1. From the frames with force model deviation lower than `model_devi_f_trust_hi`, select `max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames)` frames with largest force model deviation. 2. From the frames with virial model deviation lower than `model_devi_v_trust_hi`, select `max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames)` frames with largest virial model deviation. The union of the two sets is made as candidate dataset| | model_devi_numb_candi_f | Int | 10 | See `model_devi_adapt_trust_lo`.| | model_devi_numb_candi_v | Int | 0 | See `model_devi_adapt_trust_lo`.| @@ -535,7 +535,8 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key | **model_devi_jobs["ensembles"]** | String | "nvt" | Determining which ensemble used in MD, **options** include “npt” and “nvt”. | | model_devi_jobs["neidelay"] | Integer | "10" | delay building until this many steps since last build | | model_devi_jobs["taut"] | Float | "0.1" | Coupling time of thermostat (ps) | -| model_devi_jobs["taup"] | Float | "0.5" | Coupling time of barostat (ps) +| model_devi_jobs["taup"] | Float | "0.5" | Coupling time of barostat (ps) | +| model_devi_jobs["model_devi_f_trust_lo"]
model_devi_jobs["model_devi_f_trust_hi"]
model_devi_jobs["model_devi_v_trust_lo"]
model_devi_jobs["model_devi_v_trust_hi"] | Float or Dict[str, float] | See global model_devi config above like **model_devi_f_trust_lo**. For dict, should be set for each index in sys_idx, respectively. | | *#Labeling* | **fp_style** | string | "vasp" | Software for First Principles. **Options** include “vasp”, “pwscf”, “siesta” and “gaussian” up to now. | | **fp_task_max** | Integer | 20 | Maximum of structures to be calculated in `02.fp` of each iteration. | diff --git a/dpgen/generator/arginfo.py b/dpgen/generator/arginfo.py index 9357ee912..a7bd283f3 100644 --- a/dpgen/generator/arginfo.py +++ b/dpgen/generator/arginfo.py @@ -1,4 +1,4 @@ -from typing import List +from typing import Dict, List from dargs import Argument, Variant from dpgen.arginfo import general_mdata_arginfo @@ -90,6 +90,10 @@ def model_devi_jobs_args() -> List[Argument]: doc_neidelay = 'delay building until this many steps since last build.' doc_taut = 'Coupling time of thermostat (ps).' doc_taup = 'Coupling time of barostat (ps).' + doc_model_devi_f_trust_lo = 'Lower bound of forces for the selection. If dict, should be set for each index in sys_idx, respectively.' + doc_model_devi_f_trust_hi = 'Upper bound of forces for the selection. If dict, should be set for each index in sys_idx, respectively.' + doc_model_devi_v_trust_lo = 'Lower bound of virial for the selection. If dict, should be set for each index in sys_idx, respectively. Should be used with DeePMD-kit v2.x.' + doc_model_devi_v_trust_hi = 'Upper bound of virial for the selection. If dict, should be set for each index in sys_idx, respectively. Should be used with DeePMD-kit v2.x.' args = [ Argument("sys_idx", list, optional=False, doc=doc_sys_idx), @@ -101,6 +105,14 @@ def model_devi_jobs_args() -> List[Argument]: Argument("neidelay", int, optional=True, doc=doc_neidelay), Argument("taut", float, optional=True, doc=doc_taut), Argument("taup", float, optional=True, doc=doc_taup), + Argument("model_devi_f_trust_lo", [ + float, dict], optional=False, doc=doc_model_devi_f_trust_lo), + Argument("model_devi_f_trust_hi", [ + float, dict], optional=False, doc=doc_model_devi_f_trust_hi), + Argument("model_devi_v_trust_lo", [ + float, dict], optional=False, doc=doc_model_devi_v_trust_lo), + Argument("model_devi_v_trust_hi", [ + float, dict], optional=False, doc=doc_model_devi_v_trust_hi), ] doc_model_devi_jobs = 'Settings for exploration in 01.model_devi. Each dict in the list corresponds to one iteration. The index of model_devi_jobs exactly accord with index of iterations' @@ -110,10 +122,10 @@ def model_devi_jobs_args() -> List[Argument]: def model_devi_lmp_args() -> List[Argument]: doc_model_devi_dt = 'Timestep for MD. 0.002 is recommend.' doc_model_devi_skip = 'Number of structures skipped for fp in each MD.' - doc_model_devi_f_trust_lo = 'Lower bound of forces for the selection. If list, should be set for each index in sys_configs, respectively.' - doc_model_devi_f_trust_hi = 'Upper bound of forces for the selection. If list, should be set for each index in sys_configs, respectively.' - doc_model_devi_v_trust_lo = 'Lower bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.' - doc_model_devi_v_trust_hi = 'Upper bound of virial for the selection. If list, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.' + doc_model_devi_f_trust_lo = 'Lower bound of forces for the selection. If list or dict, should be set for each index in sys_configs, respectively.' + doc_model_devi_f_trust_hi = 'Upper bound of forces for the selection. If list or dict, should be set for each index in sys_configs, respectively.' + doc_model_devi_v_trust_lo = 'Lower bound of virial for the selection. If list or dict, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.' + doc_model_devi_v_trust_hi = 'Upper bound of virial for the selection. If list or dict, should be set for each index in sys_configs, respectively. Should be used with DeePMD-kit v2.x.' doc_model_devi_adapt_trust_lo = 'Adaptively determines the lower trust levels of force and virial. This option should be used together with model_devi_numb_candi_f, model_devi_numb_candi_v and optionally with model_devi_perc_candi_f and model_devi_perc_candi_v. dpgen will make two sets:\n\n\ - 1. From the frames with force model deviation lower than model_devi_f_trust_hi, select max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames) frames with largest force model deviation. \n\n\ - 2. From the frames with virial model deviation lower than model_devi_v_trust_hi, select max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames) frames with largest virial model deviation. \n\n\ @@ -134,13 +146,13 @@ def model_devi_lmp_args() -> List[Argument]: Argument("model_devi_skip", int, optional=False, doc=doc_model_devi_skip), Argument("model_devi_f_trust_lo", [ - float, list], optional=False, doc=doc_model_devi_f_trust_lo), + float, list, dict], optional=False, doc=doc_model_devi_f_trust_lo), Argument("model_devi_f_trust_hi", [ - float, list], optional=False, doc=doc_model_devi_f_trust_hi), + float, list, dict], optional=False, doc=doc_model_devi_f_trust_hi), Argument("model_devi_v_trust_lo", [ - float, list], optional=False, doc=doc_model_devi_v_trust_lo), + float, list, dict], optional=False, doc=doc_model_devi_v_trust_lo), Argument("model_devi_v_trust_hi", [ - float, list], optional=False, doc=doc_model_devi_v_trust_hi), + float, list, dict], optional=False, doc=doc_model_devi_v_trust_hi), Argument("model_devi_adapt_trust_lo", bool, optional=True, doc=doc_model_devi_adapt_trust_lo), Argument("model_devi_numb_candi_f", int, optional=True, diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index 43ff1bba3..69305229c 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -1902,6 +1902,8 @@ def _make_fp_vasp_inner (modd_path, def _trust_limitation_check(sys_idx, lim): if isinstance(lim, list): sys_lim = lim[sys_idx] + elif isinstance(lim, dict): + sys_lim = lim[str(sys_idx)] else: sys_lim = lim return sys_lim @@ -1912,10 +1914,10 @@ def _trust_limitation_check(sys_idx, lim): modd_system_task.sort() if model_devi_engine in ('lammps', 'gromacs', 'calypso'): # convert global trust limitations to local ones - f_trust_lo_sys = _trust_limitation_check(ss, f_trust_lo) - f_trust_hi_sys = _trust_limitation_check(ss, f_trust_hi) - v_trust_lo_sys = _trust_limitation_check(ss, v_trust_lo) - v_trust_hi_sys = _trust_limitation_check(ss, v_trust_hi) + f_trust_lo_sys = _trust_limitation_check(int(ss), f_trust_lo) + f_trust_hi_sys = _trust_limitation_check(int(ss), f_trust_hi) + v_trust_lo_sys = _trust_limitation_check(int(ss), v_trust_lo) + v_trust_hi_sys = _trust_limitation_check(int(ss), v_trust_hi) # assumed e -> v if not model_devi_adapt_trust_lo: @@ -2453,10 +2455,6 @@ def _make_fp_vasp_configs(iter_index, jdata): fp_task_max = jdata['fp_task_max'] model_devi_skip = jdata['model_devi_skip'] - v_trust_lo = jdata.get('model_devi_v_trust_lo', 1e10) - v_trust_hi = jdata.get('model_devi_v_trust_hi', 1e10) - f_trust_lo = jdata['model_devi_f_trust_lo'] - f_trust_hi = jdata['model_devi_f_trust_hi'] type_map = jdata['type_map'] iter_name = make_iter_name(iter_index) work_path = os.path.join(iter_name, fp_name) @@ -2469,6 +2467,20 @@ def _make_fp_vasp_configs(iter_index, cur_job = json.load(open(os.path.join(modd_path, 'cur_job.json'), 'r')) if 'task_min' in cur_job : task_min = cur_job['task_min'] + else: + cur_job = {} + # support iteration dependent trust levels + v_trust_lo = cur_job.get('model_devi_v_trust_lo', jdata.get('model_devi_v_trust_lo', 1e10)) + v_trust_hi = cur_job.get('model_devi_v_trust_hi', jdata.get('model_devi_v_trust_hi', 1e10)) + if cur_job.get('model_devi_f_trust_lo') is not None: + f_trust_lo = cur_job.get('model_devi_f_trust_lo') + else: + f_trust_lo = jdata['model_devi_f_trust_lo'] + if cur_job.get('model_devi_f_trust_hi') is not None: + f_trust_hi = cur_job.get('model_devi_f_trust_hi') + else: + f_trust_hi = jdata['model_devi_f_trust_hi'] + # make configs fp_tasks = _make_fp_vasp_inner(modd_path, work_path, model_devi_skip, diff --git a/tests/generator/context.py b/tests/generator/context.py index 9b5c23c58..f16ea89a1 100644 --- a/tests/generator/context.py +++ b/tests/generator/context.py @@ -27,6 +27,7 @@ param_abacus_file = 'param-pyridine-abacus.json' param_abacus_post_file = 'param-methane-abacus.json' param_amber_file = "param-amber.json" +param_multiple_trust_file = 'param-mg-vasp-multi-trust.json' def my_file_cmp(test, f0, f1): with open(f0) as fp0 : diff --git a/tests/generator/param-mg-vasp-multi-trust.json b/tests/generator/param-mg-vasp-multi-trust.json new file mode 100644 index 000000000..acd7e1865 --- /dev/null +++ b/tests/generator/param-mg-vasp-multi-trust.json @@ -0,0 +1,111 @@ +{ + "type_map": ["Mg", "Al"], + "mass_map": [24, 27], + + "init_data_prefix": "data", + "init_data_sys": ["deepmd" + ], + "init_batch_size": [16], + "sys_configs": [ + ["data/mg.fcc.02x02x02/01.scale_pert/sys-0032/scale*/000000/POSCAR"], + ["data/mg.fcc.02x02x02/01.scale_pert/sys-0032/scale*/000001/POSCAR"] + ], + "_comment": "0 1 2 3", + "_comment": "4 5 6 7", + "sys_batch_size": [1, 1 + ], + + "_comment": " 00.train ", + "numb_models": 4, + "train_param": "input.json", + "default_training_param" : { + "_comment": " model parameters", + "use_smooth": true, + "sel_a": [90], + "rcut_smth": 2.00, + "rcut": 6.00, + "filter_neuron": [25, 50, 100], + "filter_resnet_dt": false, + "n_axis_neuron": 12, + "n_neuron": [240, 240, 240], + "resnet_dt": true, + "coord_norm": true, + "type_fitting_net": false, + + "_comment": " traing controls", + "systems": [], + "set_prefix": "set", + "stop_batch": 400000, + "batch_size": 1, + "start_lr": 0.002, + "decay_steps": 2000, + "decay_rate": 0.95, + "seed": 0, + + "start_pref_e": 0.02, + "limit_pref_e": 2, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0.0, + "limit_pref_v": 0.0, + + "_comment": " display and restart", + "_comment": " frequencies counted in batch", + "disp_file": "lcurve.out", + "disp_freq": 2000, + "numb_test": 10, + "save_freq": 20000, + "save_ckpt": "model.ckpt", + "load_ckpt": "model.ckpt", + "disp_training": true, + "time_training": true, + "profiling": false, + "profiling_file": "timeline.json", + + "_comment": "that's all" + }, + + "_comment": " 01.model_devi ", + "_comment": "model_devi_skip: the first x of the recorded frames", + "model_devi_dt": 0.002, + "model_devi_skip": 0, + "model_devi_f_trust_lo": [0.000, 0.050], + "model_devi_f_trust_hi": {"1": 0.100, "0": 0.150}, + "model_devi_e_trust_lo": 1e10, + "model_devi_e_trust_hi": 1e10, + "model_devi_clean_traj": false, + "model_devi_jobs": [ + {"sys_idx": [0, 1], "temps": [50,100], "press": [1.0,2.0], "trj_freq": 10, "nsteps": 1000, "ensemble": "npt", "_idx": "00"} + ], + + "_comment": " 02.fp ", + "fp_style": "vasp", + "shuffle_poscar": false, + "fp_task_max": 100, + "fp_task_min": 10, + "fp_pp_path": ".", + "fp_pp_files": ["vasp/potcars/POTCAR.mg", "vasp/potcars/POTCAR.al"], + "_comment": " user provided vasp script ", + "user_fp_params": { + "PREC": "A", + "ENCUT": 600, + "ISYM": 0, + "ALGO": "fast", + "EDIFF": 1e-05, + "LREAL": "A", + "NPAR": 1, + "KPAR": 1, + "NELMIN": 4, + "ISIF": 2, + "ISMEAR": 1, + "SIGMA": 0.25, + "IBRION": -1, + "NSW": 0, + "LWAVE": false, + "LCHARG": false, + "PSTRESS": 0, + "KSPACING": 0.16, + "KGAMMA": false + }, + "_comment": " that's all " +} diff --git a/tests/generator/test_make_fp.py b/tests/generator/test_make_fp.py index 5df81692f..65a371114 100644 --- a/tests/generator/test_make_fp.py +++ b/tests/generator/test_make_fp.py @@ -23,6 +23,7 @@ from .context import ref_cp2k_file_exinput from .context import machine_file from .context import param_diy_file +from .context import param_multiple_trust_file from .context import make_kspacing_kpoints from .context import my_file_cmp from .context import setUpModule @@ -345,6 +346,16 @@ def _check_potcar(testCase, idx, fp_pp_path, fp_pp_files) : def _check_sel(testCase, idx, fp_task_max, flo, fhi): + + def _trust_limitation_check(sys_idx, lim): + if isinstance(lim, list): + sys_lim = lim[sys_idx] + elif isinstance(lim, dict): + sys_lim = lim[str(sys_idx)] + else: + sys_lim = lim + return sys_lim + fp_path = os.path.join('iter.%06d' % idx, '02.fp') candi_files = glob.glob(os.path.join(fp_path, 'candidate.shuffled.*.out')) candi_files.sort() @@ -358,6 +369,8 @@ def _check_sel(testCase, idx, fp_task_max, flo, fhi): f_idx.append(ii.split()[1]) md_task = md_task[:fp_task_max] f_idx = f_idx[:fp_task_max] + flo = _trust_limitation_check(int(sidx), flo) + fhi = _trust_limitation_check(int(sidx), fhi) for tt,ff in zip(md_task, f_idx): md_value = np.loadtxt(os.path.join(tt, 'model_devi.out')) fvalue = md_value[int(ff)][4] @@ -795,6 +808,41 @@ def test_make_fp_vasp_ele_temp(self): # checked elsewhere # _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files']) shutil.rmtree('iter.000000') + + def test_make_fp_vasp_multiple_trust_level(self): + # Verify if sys_idx dependent trust level could be read. + setUpModule() + if os.path.isdir('iter.000000') : + shutil.rmtree('iter.000000') + with open (param_multiple_trust_file, 'r') as fp : + jdata = json.load (fp) + fp.close() + with open (machine_file, 'r') as fp: + mdata = json.load (fp) + fp.close() + md_descript = [] + ele_temp = [] + nsys = 2 + nmd = 3 + n_frame = 10 + for ii in range(nsys) : + tmp = [] + for jj in range(nmd) : + tmp.append(np.arange(0, 0.29, 0.29/10)) + md_descript.append(tmp) + ele_temp.append([np.random.random() * 100000] * nmd) + atom_types = [0, 1, 0, 1] + type_map = jdata['type_map'] + _make_fake_md(0, md_descript, atom_types, type_map, ele_temp = ele_temp) + make_fp(0, jdata, {}) + _check_sel(self, 0, jdata['fp_task_max'], jdata['model_devi_f_trust_lo'], jdata['model_devi_f_trust_hi']) + _check_poscars(self, 0, jdata['fp_task_max'], jdata['type_map']) + _check_incar_ele_temp(self, 0, ele_temp) + _check_kpoints_exists(self, 0) + _check_kpoints(self,0) + # checked elsewhere + # _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files']) + shutil.rmtree('iter.000000') class TestMakeFPGaussian(unittest.TestCase): From 43931d22c835cff1b7f20f21cce6964b044f399d Mon Sep 17 00:00:00 2001 From: Yuzhi Zhang <529133328@qq.com> Date: Mon, 11 Jul 2022 14:27:43 +0800 Subject: [PATCH 25/25] Add netcdf4 dependency to conda recipe (#792) * Change mv to cp when downloading softlinks. * Add explanations relevant to POTCAR's examples. * Fix CH4's vasp-md INCAR example. * Fix bug in make_submission of Auto-test * Add ase, custodian, GromacsWrapper in conda packages. * Fix bug of fp_task_min * Add gromacswrapper to conda recipe * Revert "Fix bug of fp_task_min" This reverts commit 8d7f8a77748a2d1509d91e184b9cbde1fe603807. * Add numkit recipe * Add `convert_mdata` definition to fix simplify bug. * Add netCDF4's dependency to conda recipe * Delete gromacswrapper and numkit dependency --- conda/meta.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conda/meta.yaml b/conda/meta.yaml index e5480ba7a..b7541087d 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -28,6 +28,7 @@ requirements: - ase - GromacsWrapper - custodian + - netCDF4 run: - python >=3.6 @@ -40,6 +41,7 @@ requirements: - ase - GromacsWrapper - custodian + - netCDF4 test: imports: