diff --git a/.github/workflows/mirror_gitee.yml b/.github/workflows/mirror_gitee.yml index 35d6d0c52..b298d071b 100644 --- a/.github/workflows/mirror_gitee.yml +++ b/.github/workflows/mirror_gitee.yml @@ -15,5 +15,5 @@ jobs: ORGANIZATION: deepmodeling SSH_PRIVATE_KEY: ${{ secrets.SYNC_GITEE_PRIVATE_KEY }} with: - source-repo: "git@github.com:deepmodeling/dpgen.git" + source-repo: "https://github.com/deepmodeling/dpgen.git" destination-repo: "git@gitee.com:deepmodeling/dpgen.git" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 000000000..939a76699 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,34 @@ +name: release_conda_and_github + +on: + push: + tags: + - 'v*' + +jobs: + publish-conda: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: publish-to-conda + uses: felix5572/conda-publish-action@v1.9 + with: + subdir: 'conda' + anacondatoken: ${{ secrets.ANACONDA_TOKEN }} + platforms: 'noarch' + construct-and-publish: + runs-on: ubuntu-latest + needs: conda-publish + steps: + - uses: actions/checkout@master + - uses: s-weigand/setup-conda@v1 + - run: conda install constructor jinja2 -y + - run: constructor ./conda + - name: Upload to release + uses: softprops/action-gh-release@master + if: startsWith(github.ref, 'refs/tags/') + with: + files: dpgen-*.sh + repository: ${{ env.GITHUB_REPOSITORY }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 000000000..5aed6c23a --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,25 @@ +preferred-citation: + type: article + authors: + - family-names: "Zhang" + given-names: "Yuzhi" + - family-names: "Wang" + given-names: "Haidi" + - family-names: "Chen" + given-names: "Weijie" + - family-names: "Zeng" + given-names: "Jinzhe" + - family-names: "Zhang" + given-names: "Linfeng" + - family-names: "Wang" + given-names: "Han" + - family-names: "E" + given-names: "Weinan" + doi: "10.1016/j.cpc.2020.107206" + journal: "Computer Physics Communications" + month: 8 + start: 107206 # First page number + end: 107206 # Last page number + title: "DP-GEN: A concurrent learning platform for the generation of reliable deep learning based potential energy models" + volume: 253 + year: 2020 diff --git a/README.md b/README.md index 8462ccaf2..52d608313 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ [![conda install](https://img.shields.io/conda/dn/conda-forge/dpgen?label=conda%20install)](https://anaconda.org/conda-forge/dpgen) [![pip install](https://img.shields.io/pypi/dm/dpgen?label=pip%20install)](https://pypi.org/project/dpgen) -DP-GEN (Deep Generator) is a software written in Python, delicately designed to generate a deep learning based model of interatomic potential energy and force field. DP-GEN is depedent on DeepMD-kit (https://github.com/deepmodeling/deepmd-kit/blob/master/README.md). With highly scalable interface with common softwares for molecular simulation, DP-GEN is capable to automatically prepare scripts and maintain job queues on HPC machines (High Performance Cluster) and analyze results. +DP-GEN (Deep Generator) is a software written in Python, delicately designed to generate a deep learning based model of interatomic potential energy and force field. DP-GEN is dependent on [DeepMD-kit](https://github.com/deepmodeling/deepmd-kit/blob/master/README.md). With highly scalable interface with common softwares for molecular simulation, DP-GEN is capable to automatically prepare scripts and maintain job queues on HPC machines (High Performance Cluster) and analyze results. If you use this software in any publication, please cite: @@ -34,7 +34,7 @@ Yuzhi Zhang, Haidi Wang, Weijie Chen, Jinzhe Zeng, Linfeng Zhang, Han Wang, and ### Highlighted features + **Accurate and efficient**: DP-GEN is capable to sample more than tens of million structures and select only a few for first principles calculation. DP-GEN will finally obtain a uniformly accurate model. + **User-friendly and automatic**: Users may install and run DP-GEN easily. Once succusefully running, DP-GEN can dispatch and handle all jobs on HPCs, and thus there's no need for any personal effort. -+ **Highly scalable**: With modularized code structures, users and developers can easily extend DP-GEN for their most relevant needs. DP-GEN currently supports for HPC systems (Slurm, PBS, LSF and cloud machines ), Deep Potential interface with DeePMD-kit, MD interface with LAMMPS and *ab-initio* calculation interface with VASP, PWSCF,SIESTA and Gaussian. We're sincerely welcome and embraced to users' contributions, with more possibilities and cases to use DP-GEN. ++ **Highly scalable**: With modularized code structures, users and developers can easily extend DP-GEN for their most relevant needs. DP-GEN currently supports for HPC systems (Slurm, PBS, LSF and cloud machines ), Deep Potential interface with DeePMD-kit, MD interface with [LAMMPS](https://www.lammps.org/), [Gromacs](http://www.gromacs.org/) and *ab-initio* calculation interface with VASP, PWSCF, CP2K, SIESTA and Gaussian, Abacus, PWMAT, etc . We're sincerely welcome and embraced to users' contributions, with more possibilities and cases to use DP-GEN. ### Code structure and interface + dpgen: @@ -43,7 +43,8 @@ Yuzhi Zhang, Haidi Wang, Weijie Chen, Jinzhe Zeng, Linfeng Zhang, Han Wang, and * generator: source codes for main process of deep generator. * auto_test : source code for undertaking materials property analysis. - * remote : source code for automatically submiting scripts,maintaining job queues and collecting results. + * remote and dispatcher : source code for automatically submiting scripts,maintaining job queues and collecting results. + Notice this part hase been integrated into [dpdispatcher](https://github.com/deepmodeling/dpdispatcher) * database : source code for collecting data generated by DP-GEN and interface with database. + examples : providing example JSON files. @@ -63,6 +64,15 @@ Options for TASK: * `test`: Auto-test for Deep Potential. * `db`: Collecting data from DP-GEN. + +[Here](examples) are examples you can refer to. You should make sure that provide a correct [JSON](https://docs.python.org/3/library/json.html) file. +You can use following command to check your JSON file. +```python +import json +#Specify machine parameters in machine.json +json.load(open("machine.json")) +``` + ## Download and Install One can download the source code of dpgen by ```bash @@ -539,8 +549,14 @@ The bold notation of key (such aas **type_map**) means that it's a necessary key | **model_devi_skip** | Integer | 0 | Number of structures skipped for fp in each MD | **model_devi_f_trust_lo** | Float | 0.05 | Lower bound of forces for the selection. | **model_devi_f_trust_hi** | Float | 0.15 | Upper bound of forces for the selection -| **model_devi_e_trust_lo** | Float | 1e10 | Lower bound of energies for the selection. Recommend to set them a high number, since forces provide more precise information. Special cases such as energy minimization may need this. | -| **model_devi_e_trust_hi** | Float | 1e10 | Upper bound of energies for the selection. | +| **model_devi_v_trust_lo** | Float | 1e10 | Lower bound of virial for the selection. Should be used with DeePMD-kit v2.x | +| **model_devi_v_trust_hi** | Float | 1e10 | Upper bound of virial for the selection. Should be used with DeePMD-kit v2.x | +| model_devi_adapt_trust_lo | Boolean | False | Adaptively determines the lower trust levels of force and virial. This option should be used together with `model_devi_numb_candi_f`, `model_devi_numb_candi_v` and optionally with `model_devi_perc_candi_f` and `model_devi_perc_candi_v`. `dpgen` will make two sets: 1. From the frames with force model deviation lower than `model_devi_f_trust_hi`, select `max(model_devi_numb_candi_f, model_devi_perc_candi_f*n_frames)` frames with largest force model deviation. 2. From the frames with virial model deviation lower than `model_devi_v_trust_hi`, select `max(model_devi_numb_candi_v, model_devi_perc_candi_v*n_frames)` frames with largest virial model deviation. The union of the two sets is made as candidate dataset| +| model_devi_numb_candi_f | Int | 10 | See `model_devi_adapt_trust_lo`.| +| model_devi_numb_candi_v | Int | 0 | See `model_devi_adapt_trust_lo`.| +| model_devi_perc_candi_f | Float | 0.0 | See `model_devi_adapt_trust_lo`.| +| model_devi_perc_candi_v | Float | 0.0 | See `model_devi_adapt_trust_lo`.| +| model_devi_f_avg_relative | Boolean | False | Normalized the force model deviations by the RMS force magnitude along the trajectory. This key should not be used with `use_relative`. | | **model_devi_clean_traj** | Boolean | true | Deciding whether to clean traj folders in MD since they are too large. | | **model_devi_nopbc** | Boolean | False | Assume open boundary condition in MD simulations. | | model_devi_activation_func | List of list of string | [["tanh","tanh"],["tanh","gelu"],["gelu","tanh"],["gelu","gelu"]] | Set activation functions for models, length of the List should be the same as `numb_models`, and two elements in the list of string respectively assign activation functions to the embedding and fitting nets within each model. *Backward compatibility*: the orginal "List of String" format is still supported, where embedding and fitting nets of one model use the same activation function, and the length of the List should be the same as `numb_models`| @@ -1322,7 +1338,9 @@ mem_limit | Interger | 16 | Maximal memory permitted to apply for the job. | # End of resources | command | String | "lmp_serial" | Executable path of software, such as `lmp_serial`, `lmp_mpi` and `vasp_gpu`, `vasp_std`, etc. | group_size | Integer | 5 | DP-GEN will put these jobs together in one submitting script. - +| user_forward_files | List of str | ["/path_to/vdw_kernel.bindat"] | These files will be uploaded in each calculation task. You should make sure provide the path exists. +| user_backward_files | List of str | ["HILLS"] | Besides DP-GEN's normal output, these files will be downloaded after each calculation. You should make sure these files can be generated. + ## Troubleshooting 1. The most common problem is whether two settings correspond with each other, including: - The order of elements in `type_map` and `mass_map` and **`fp_pp_files`**. diff --git a/conda/conda_build_config.yaml b/conda/conda_build_config.yaml new file mode 100644 index 000000000..2cb7c1896 --- /dev/null +++ b/conda/conda_build_config.yaml @@ -0,0 +1,5 @@ +channel_sources: + - defaults + - conda-forge +channel_targets: + - deepmodeling diff --git a/conda/construct.yaml b/conda/construct.yaml new file mode 100644 index 000000000..c3c6005de --- /dev/null +++ b/conda/construct.yaml @@ -0,0 +1,18 @@ +{% set version = os.popen('git describe --tags --abbrev=0').read().strip('\n').lstrip('v').replace('-', '_') %} + +name: dpgen +version: {{ version }} + +channels: + - defaults + - conda-forge + - deepmodeling + +specs: + - python 3.8 + - pip + - dpgen {{ version }} + +ignore_duplicate_files: True + +license_file: ../LICENSE diff --git a/conda/meta.yaml b/conda/meta.yaml new file mode 100644 index 000000000..c599c642f --- /dev/null +++ b/conda/meta.yaml @@ -0,0 +1,52 @@ +{% set name = "dpgen" %} +{% set version = os.popen('git describe --tags --abbrev=0').read().strip('\n').lstrip('v').replace('-', '_') %} + +package: + name: {{ name|lower }} + version: {{ version }} + +source: + path: .. + +build: + number: 0 + noarch: python + script: python -m pip install --no-deps --ignore-installed . + +requirements: + build: + - git + host: + - python >=3.6 + - pip + - setuptools_scm + - dargs + - paramiko + - requests + - dpdata + - dpdispatcher + + run: + - python >=3.6 + - dargs + - paramiko + - requests + - dpdata + - dpdispatcher + - pymatgen + +test: + imports: + - dpgen + +about: + home: https://github.com/deepmodeling/dpgen + license: LGPL-3.0 + license_family: LGPL + license_file: LICENSE + doc_url: https://github.com/deepmodeling/dpgen + dev_url: https://github.com/deepmodeling/dpgen + +extra: + recipe-maintainers: + - felix5572 diff --git a/doc/CONTRIBUTING.md b/doc/CONTRIBUTING.md new file mode 100644 index 000000000..31a8996a1 --- /dev/null +++ b/doc/CONTRIBUTING.md @@ -0,0 +1,10 @@ +# DP-GEN Contributing Guide +Welcome to [DP-GEN](https://github.com/deepmodeling/dpgen/tree/master/dpgen) ! + + +## How to contribute +DP-GEN adopts the same convention as other softwares in DeepModeling Community. +You can first refer to DeePMD-kit's +[Contributing guide](https://github.com/deepmodeling/deepmd-kit/edit/devel/CONTRIBUTING.md) +and [Developer guide](https://github.com/deepmodeling/deepmd-kit/edit/devel/doc/development/index.md). + diff --git a/dpgen/auto_test/common_equi.py b/dpgen/auto_test/common_equi.py index 9dcb83e03..103e16dcc 100644 --- a/dpgen/auto_test/common_equi.py +++ b/dpgen/auto_test/common_equi.py @@ -9,10 +9,9 @@ from dpgen.auto_test.calculator import make_calculator from dpgen.auto_test.mpdb import get_structure from dpgen.dispatcher.Dispatcher import make_dispatcher -from dpgen.remote.decide_machine import decide_fp_machine, decide_model_devi_machine from distutils.version import LooseVersion from dpgen.dispatcher.Dispatcher import make_submission - +from dpgen.remote.decide_machine import convert_mdata lammps_task_type = ['deepmd', 'meam', 'eam_fs', 'eam_alloy'] @@ -133,9 +132,9 @@ def run_equi(confs, inter_type = inter_param['type'] # vasp if inter_type == "vasp": - mdata = decide_fp_machine(mdata) + mdata = convert_mdata(mdata, ["fp"]) elif inter_type in lammps_task_type: - mdata = decide_model_devi_machine(mdata) + mdata = convert_mdata(mdata, ["model_devi"]) else: raise RuntimeError("unknown task %s, something wrong" % inter_type) diff --git a/dpgen/auto_test/common_prop.py b/dpgen/auto_test/common_prop.py index bbd7203e2..00f439d37 100644 --- a/dpgen/auto_test/common_prop.py +++ b/dpgen/auto_test/common_prop.py @@ -13,9 +13,8 @@ from dpgen.auto_test.Vacancy import Vacancy from dpgen.auto_test.calculator import make_calculator from dpgen.dispatcher.Dispatcher import make_dispatcher -from dpgen.remote.decide_machine import decide_fp_machine, decide_model_devi_machine from dpgen.dispatcher.Dispatcher import make_submission - +from dpgen.remote.decide_machine import convert_mdata lammps_task_type = ['deepmd', 'meam', 'eam_fs', 'eam_alloy'] @@ -150,9 +149,9 @@ def run_property(confs, inter_type = inter_param_prop['type'] # vasp if inter_type == "vasp": - mdata = decide_fp_machine(mdata) + mdata = convert_mdata(mdata, ["fp"]) elif inter_type in lammps_task_type: - mdata = decide_model_devi_machine(mdata) + mdata = convert_mdata(mdata, ["model_devi"]) else: raise RuntimeError("unknown task %s, something wrong" % inter_type) diff --git a/dpgen/auto_test/lib/util.py b/dpgen/auto_test/lib/util.py index 0a86287fd..32709da28 100644 --- a/dpgen/auto_test/lib/util.py +++ b/dpgen/auto_test/lib/util.py @@ -77,11 +77,11 @@ def get_machine_info(mdata,task_type): command = vasp_exec command = cmd_append_log(command, "log") elif task_type in lammps_task_type: - lmp_exec = mdata['lmp_command'] + model_devi_exec = mdata['model_devi_command'] group_size = mdata['model_devi_group_size'] resources = mdata['model_devi_resources'] machine=mdata['model_devi_machine'] - command = lmp_exec + " -i in.lammps" + command = model_devi_exec + " -i in.lammps" command = cmd_append_log(command, "model_devi.log") return machine, resources, command, group_size diff --git a/dpgen/data/gen.py b/dpgen/data/gen.py index 10d220d61..25c610c61 100644 --- a/dpgen/data/gen.py +++ b/dpgen/data/gen.py @@ -22,14 +22,16 @@ import dpgen.data.tools.sc as sc from distutils.version import LooseVersion from dpgen.generator.lib.vasp import incar_upper +from dpgen.generator.lib.utils import symlink_user_forward_files from pymatgen.core import Structure from pymatgen.io.vasp import Incar -from dpgen.remote.decide_machine import decide_fp_machine +from dpgen.remote.decide_machine import convert_mdata from dpgen import ROOT_PATH from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher, make_submission + def create_path (path,back=False) : if path[-1] != "/": path += '/' @@ -311,12 +313,7 @@ def make_vasp_relax (jdata, mdata) : os.remove(os.path.join(work_dir, 'POTCAR')) shutil.copy2( jdata['relax_incar'], os.path.join(work_dir, 'INCAR')) - is_cvasp = False - if 'cvasp' in mdata['fp_resources'].keys(): - is_cvasp = mdata['fp_resources']['cvasp'] - if is_cvasp: - cvasp_file=os.path.join(ROOT_PATH,'generator/lib/cvasp.py') - shutil.copyfile(cvasp_file, os.path.join(work_dir, 'cvasp.py')) + out_potcar = os.path.join(work_dir, 'POTCAR') with open(out_potcar, 'w') as outfile: for fname in potcars: @@ -338,8 +335,17 @@ def make_vasp_relax (jdata, mdata) : os.symlink(ln_src, 'POTCAR') except FileExistsError: pass + is_cvasp = False + if 'cvasp' in mdata['fp_resources'].keys(): + is_cvasp = mdata['fp_resources']['cvasp'] + if is_cvasp: + cvasp_file = os.path.join(ROOT_PATH, 'generator/lib/cvasp.py') + shutil.copyfile(cvasp_file, 'cvasp.py') os.chdir(work_dir) os.chdir(cwd) + symlink_user_forward_files(mdata=mdata, task_type="fp", + work_path=os.path.join(os.path.basename(out_dir),global_dirname_02), + task_format= {"fp" : "sys-*"}) def make_scale(jdata): out_dir = jdata['out_dir'] @@ -373,6 +379,7 @@ def make_scale(jdata): os.chdir(scale_path) poscar_scale(pos_src, 'POSCAR', jj) os.chdir(cwd) + def pert_scaled(jdata) : out_dir = jdata['out_dir'] @@ -425,7 +432,7 @@ def pert_scaled(jdata) : shutil.copy2(pos_in, pos_out) os.chdir(cwd) -def make_vasp_md(jdata) : +def make_vasp_md(jdata, mdata) : out_dir = jdata['out_dir'] potcars = jdata['potcars'] scale = jdata['scale'] @@ -451,7 +458,9 @@ def make_vasp_md(jdata) : with open(fname) as infile: outfile.write(infile.read()) os.chdir(path_md) - os.chdir(cwd) + os.chdir(cwd) + + for ii in sys_ps : for jj in scale : @@ -478,8 +487,20 @@ def make_vasp_md(jdata) : os.symlink(os.path.relpath(file_potcar), 'POTCAR') except FileExistsError: pass + + is_cvasp = False + if 'cvasp' in mdata['fp_resources'].keys(): + is_cvasp = mdata['fp_resources']['cvasp'] + if is_cvasp: + cvasp_file = os.path.join(ROOT_PATH, 'generator/lib/cvasp.py') + shutil.copyfile(cvasp_file, 'cvasp.py') - os.chdir(cwd) + os.chdir(cwd) + + symlink_user_forward_files(mdata=mdata, task_type="fp", + work_path=os.path.join(os.path.basename(out_dir),global_dirname_04), + task_format= {"fp" :"sys-*/scale*/00*"}) + def coll_vasp_md(jdata) : out_dir = jdata['out_dir'] @@ -565,11 +586,14 @@ def run_vasp_relax(jdata, mdata): work_dir = os.path.join(jdata['out_dir'], global_dirname_02) forward_files = ["POSCAR", "INCAR", "POTCAR"] + user_forward_files = mdata.get("fp" + "_user_forward_files", []) + forward_files += [os.path.basename(file) for file in user_forward_files] backward_files = ["OUTCAR","CONTCAR"] + backward_files += mdata.get("fp" + "_user_backward_files", []) forward_common_files = [] if 'cvasp' in mdata['fp_resources']: if mdata['fp_resources']['cvasp']: - forward_common_files=['cvasp.py'] + forward_files +=['cvasp.py'] relax_tasks = glob.glob(os.path.join(work_dir, "sys-*")) relax_tasks.sort() #dlog.info("work_dir",work_dir) @@ -624,11 +648,14 @@ def run_vasp_md(jdata, mdata): md_nstep = jdata['md_nstep'] forward_files = ["POSCAR", "INCAR", "POTCAR"] + user_forward_files = mdata.get("fp" + "_user_forward_files", []) + forward_files += [os.path.basename(file) for file in user_forward_files] backward_files = ["OUTCAR"] + backward_files += mdata.get("fp" + "_user_backward_files", []) forward_common_files = [] if 'cvasp' in mdata['fp_resources']: if mdata['fp_resources']['cvasp']: - forward_common_files=['cvasp.py'] + forward_files +=['cvasp.py'] path_md = work_dir path_md = os.path.abspath(path_md) @@ -694,7 +721,7 @@ def gen_init_bulk(args) : if args.MACHINE is not None: # Selecting a proper machine - mdata = decide_fp_machine(mdata) + mdata = convert_mdata(mdata, ["fp"]) #disp = make_dispatcher(mdata["fp_machine"]) # Decide work path @@ -757,9 +784,12 @@ def gen_init_bulk(args) : pert_scaled(jdata) elif stage == 3 : dlog.info("Current stage is 3, run a short md") - make_vasp_md(jdata) if args.MACHINE is not None: + make_vasp_md(jdata, mdata) run_vasp_md(jdata, mdata) + else: + make_vasp_md(jdata, {"fp_resources":{}}) + elif stage == 4 : dlog.info("Current stage is 4, collect data") coll_vasp_md(jdata) diff --git a/dpgen/data/surf.py b/dpgen/data/surf.py index 322d26ad4..13420e118 100644 --- a/dpgen/data/surf.py +++ b/dpgen/data/surf.py @@ -11,7 +11,7 @@ import dpgen.data.tools.bcc as bcc from dpgen import dlog from dpgen import ROOT_PATH -from dpgen.remote.decide_machine import decide_fp_machine +from dpgen.remote.decide_machine import convert_mdata from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher #-----PMG--------- from pymatgen.io.vasp import Poscar @@ -596,7 +596,7 @@ def gen_init_surf(args): if args.MACHINE is not None: # Decide a proper machine - mdata = decide_fp_machine(mdata) + mdata = convert_mdata(mdata, ["fp"]) # disp = make_dispatcher(mdata["fp_machine"]) #stage = args.STAGE diff --git a/dpgen/dispatcher/ALI.py b/dpgen/dispatcher/ALI.py index ef68322be..ef4f850b2 100644 --- a/dpgen/dispatcher/ALI.py +++ b/dpgen/dispatcher/ALI.py @@ -84,6 +84,21 @@ def manual_delete(stage): os.remove("apg_id.json") print("delete successfully!") +def delete_apg(stage): + fp = open("machine-ali.json") + data = json.load(fp) + mdata_machine = data[stage][0]["machine"] + mdata_resources = data[stage][0]["resources"] + cloud_resources = mdata_machine["cloud_resources"] + ali = ALI(mdata_machine, mdata_resources, "work_path", [1], 1, cloud_resources) + fp = open("apg_id.json") + data = json.load(fp) + ali.cloud_resources["apg_id"] = data["apg_id"] + ali.delete_apg() + os.remove("apg_id.json") + print("delete successfully!") + + class ALI(DispatcherList): def __init__(self, mdata_machine, mdata_resources, work_path, run_tasks, group_size, cloud_resources=None): super().__init__(mdata_machine, mdata_resources, work_path, run_tasks, group_size, cloud_resources) diff --git a/dpgen/generator/ch4/machine.json b/dpgen/generator/ch4/machine.json index bff646bcd..653f613d6 100644 --- a/dpgen/generator/ch4/machine.json +++ b/dpgen/generator/ch4/machine.json @@ -21,7 +21,7 @@ "_comment": "that's all" }, - "lmp_command": "/sharedext4/softwares/lammps/bin/lmp_serial", + "model_devi_command": "/sharedext4/softwares/lammps/bin/lmp_serial", "model_devi_group_size": 1, "_comment": "model_devi on localhost", "model_devi_machine": { diff --git a/dpgen/generator/lib/gaussian.py b/dpgen/generator/lib/gaussian.py index 71f2b5405..ee7e2828f 100644 --- a/dpgen/generator/lib/gaussian.py +++ b/dpgen/generator/lib/gaussian.py @@ -114,8 +114,13 @@ def make_gaussian_input(sys_data, fp_params): keywords = [keywords] else: keywords = keywords.copy() + # assume default charge is zero and default spin multiplicity is 1 - charge = fp_params.get('charge', 0) + if 'charge' in sys_data.keys(): + charge = sys_data['charge'] + else: + charge = fp_params.get('charge', 0) + use_fragment_guesses = False multiplicity = fp_params.get('multiplicity', 'auto') if type(multiplicity) == int: diff --git a/dpgen/generator/lib/lammps.py b/dpgen/generator/lib/lammps.py index 03dca2b4e..3190d9f01 100644 --- a/dpgen/generator/lib/lammps.py +++ b/dpgen/generator/lib/lammps.py @@ -89,7 +89,7 @@ def make_lammps_input(ensemble, ret+= "\n" ret+= "thermo_style custom step temp pe ke etotal press vol lx ly lz xy xz yz\n" ret+= "thermo ${THERMO_FREQ}\n" - ret+= "dump 1 all custom ${DUMP_FREQ} traj/*.lammpstrj id type x y z\n" + ret+= "dump 1 all custom ${DUMP_FREQ} traj/*.lammpstrj id type x y z fx fy fz\n" ret+= "restart 10000 dpgen.restart\n" ret+= "\n" if pka_e is None : @@ -137,5 +137,37 @@ def make_lammps_input(ensemble, # cvt_lammps_conf('POSCAR', 'tmp.lmp') - - +def get_dumped_forces( + file_name): + with open(file_name) as fp: + lines = fp.read().split('\n') + natoms = None + for idx,ii in enumerate(lines): + if 'ITEM: NUMBER OF ATOMS' in ii: + natoms = int(lines[idx+1]) + break + if natoms is None: + raise RuntimeError('wrong dump file format, cannot find number of atoms', file_name) + idfx = None + for idx,ii in enumerate(lines): + if 'ITEM: ATOMS' in ii: + keys = ii + keys = keys.replace('ITEM: ATOMS', '') + keys = keys.split() + idfx = keys.index('fx') + idfy = keys.index('fy') + idfz = keys.index('fz') + break + if idfx is None: + raise RuntimeError('wrong dump file format, cannot find dump keys', file_name) + ret = [] + for ii in range(idx+1, idx+natoms+1): + words = lines[ii].split() + ret.append([ float(words[ii]) for ii in [idfx, idfy, idfz] ]) + ret = np.array(ret) + return ret + + +if __name__ == '__main__': + ret = get_dumped_forces('40.lammpstrj') + print(ret) diff --git a/dpgen/generator/lib/utils.py b/dpgen/generator/lib/utils.py index af7a71bf6..772d379ce 100644 --- a/dpgen/generator/lib/utils.py +++ b/dpgen/generator/lib/utils.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os, re, shutil, logging +import glob iter_format = "%06d" task_format = "%02d" @@ -60,4 +61,37 @@ def log_task (message) : def record_iter (record, ii, jj) : with open (record, "a") as frec : - frec.write ("%d %d\n" % (ii, jj)) + frec.write ("%d %d\n" % (ii, jj)) + +def symlink_user_forward_files(mdata, task_type, work_path, task_format = None): + ''' + Symlink user-defined forward_common_files + Current path should be work_path, such as 00.train + + Parameters + --------- + mdata : dict + machine parameters + task_type: str + task_type, such as "train" + work_path : str + work_path, such as "iter.000001/00.train" + Returns + ------- + None + ''' + user_forward_files = mdata.get(task_type + "_" + "user_forward_files", []) + #Angus: In the future, we may unify the task format. + if task_format is None: + task_format = {"train" : "0*", "model_devi" : "task.*", "fp": "task.*"} + #"init_relax" : "sys-*", "init_md" : "sys-*/scale*/00*" + for file in user_forward_files: + assert os.path.isfile(file) ,\ + "user_forward_file %s of %s stage doesn't exist. " % (file, task_type) + tasks = glob.glob(os.path.join(work_path, task_format[task_type])) + for task in tasks: + if os.path.isfile(os.path.join(task, os.path.basename(file))): + os.remove(os.path.join(task, os.path.basename(file))) + os.symlink(file, os.path.join(task, os.path.basename(file))) + return + \ No newline at end of file diff --git a/dpgen/generator/run.py b/dpgen/generator/run.py index ceaaed9b2..ab77da8f9 100644 --- a/dpgen/generator/run.py +++ b/dpgen/generator/run.py @@ -28,6 +28,7 @@ import scipy.constants as pc from collections import Counter from distutils.version import LooseVersion +from typing import List from numpy.linalg import norm from dpgen import dlog from dpgen import SHORT_CMD @@ -38,7 +39,8 @@ from dpgen.generator.lib.utils import log_iter from dpgen.generator.lib.utils import record_iter from dpgen.generator.lib.utils import log_task -from dpgen.generator.lib.lammps import make_lammps_input +from dpgen.generator.lib.utils import symlink_user_forward_files +from dpgen.generator.lib.lammps import make_lammps_input, get_dumped_forces from dpgen.generator.lib.vasp import write_incar_dict from dpgen.generator.lib.vasp import make_vasp_incar_user_dict from dpgen.generator.lib.vasp import incar_upper @@ -53,11 +55,7 @@ from dpgen.generator.lib.gaussian import make_gaussian_input, take_cluster from dpgen.generator.lib.cp2k import make_cp2k_input, make_cp2k_input_from_external, make_cp2k_xyz from dpgen.generator.lib.ele_temp import NBandsEsti -from dpgen.remote.RemoteJob import SSHSession, JobStatus, SlurmJob, PBSJob, LSFJob, CloudMachineJob, awsMachineJob -from dpgen.remote.group_jobs import ucloud_submit_jobs, aws_submit_jobs -from dpgen.remote.group_jobs import group_slurm_jobs -from dpgen.remote.group_jobs import group_local_jobs -from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine +from dpgen.remote.decide_machine import convert_mdata from dpgen.dispatcher.Dispatcher import Dispatcher, _split_tasks, make_dispatcher, make_submission from dpgen.util import sepline from dpgen import ROOT_PATH @@ -186,9 +184,12 @@ def dump_to_poscar(dump, poscar, type_map, fmt = "lammps/dump") : sys = dpdata.System(dump, fmt = fmt, type_map = type_map) sys.to_vasp_poscar(poscar) -def dump_to_deepmd_raw(dump, deepmd_raw, type_map, fmt='gromacs/gro'): +def dump_to_deepmd_raw(dump, deepmd_raw, type_map, fmt='gromacs/gro', charge=None): system = dpdata.System(dump, fmt = fmt, type_map = type_map) system.to_deepmd_raw(deepmd_raw) + if charge is not None: + with open(os.path.join(deepmd_raw, "charge"), 'w') as f: + f.write(str(charge)) def make_train (iter_index, @@ -208,7 +209,14 @@ def make_train (iter_index, training_init_model = jdata.get('training_init_model', False) training_reuse_iter = jdata.get('training_reuse_iter') training_reuse_old_ratio = jdata.get('training_reuse_old_ratio', None) - training_reuse_stop_batch = jdata.get('training_reuse_stop_batch', 400000) + + if 'training_reuse_stop_batch' in jdata.keys(): + training_reuse_stop_batch = jdata['training_reuse_stop_batch'] + elif 'training_reuse_numb_steps' in jdata.keys(): + training_reuse_stop_batch = jdata['training_reuse_numb_steps'] + else: + training_reuse_stop_batch = 400000 + training_reuse_start_lr = jdata.get('training_reuse_start_lr', 1e-4) training_reuse_start_pref_e = jdata.get('training_reuse_start_pref_e', 0.1) training_reuse_start_pref_f = jdata.get('training_reuse_start_pref_f', 100) @@ -345,14 +353,16 @@ def make_train (iter_index, else: raise RuntimeError('invalid setting for use_ele_temp ' + str(use_ele_temp)) else: - raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x version!" ) + raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x or 2.x version!" ) # set training reuse model if training_reuse_iter is not None and iter_index >= training_reuse_iter: if LooseVersion('1') <= LooseVersion(mdata["deepmd_version"]) < LooseVersion('2'): + jinput['training']['stop_batch'] = training_reuse_stop_batch jinput['training']['auto_prob_style'] \ ="prob_sys_size; 0:%d:%f; %d:%d:%f" \ %(old_range, training_reuse_old_ratio, old_range, len(init_data_sys), 1.-training_reuse_old_ratio) elif LooseVersion('2') <= LooseVersion(mdata["deepmd_version"]) < LooseVersion('3'): + jinput['training']['numb_steps'] = training_reuse_stop_batch jinput['training']['training_data']['auto_prob'] \ ="prob_sys_size; 0:%d:%f; %d:%d:%f" \ %(old_range, training_reuse_old_ratio, old_range, len(init_data_sys), 1.-training_reuse_old_ratio) @@ -363,7 +373,7 @@ def make_train (iter_index, if jinput['loss'].get('start_pref_f') is not None: jinput['loss']['start_pref_f'] = training_reuse_start_pref_f jinput['learning_rate']['start_lr'] = training_reuse_start_lr - jinput['training']['stop_batch'] = training_reuse_stop_batch + for ii in range(numb_models) : task_path = os.path.join(work_path, train_task_fmt % ii) @@ -384,7 +394,7 @@ def make_train (iter_index, jinput['model']['fitting_net']['seed'] = random.randrange(sys.maxsize) % (2**32) jinput['training']['seed'] = random.randrange(sys.maxsize) % (2**32) else: - raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x version!" ) + raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x or 2.x version!" ) # set model activation function if model_devi_activation_func is not None: if LooseVersion(mdata["deepmd_version"]) < LooseVersion('1'): @@ -395,7 +405,7 @@ def make_train (iter_index, jinput['model']['fitting_net']['activation_function'] = model_devi_activation_func[ii][1] if len(np.array(model_devi_activation_func).shape) == 1 : # for backward compatibility, 1-dim list, not net-resolved jinput['model']['descriptor']['activation_function'] = model_devi_activation_func[ii] - jinput['model']['descriptor']['activation_function'] = model_devi_activation_func[ii] + jinput['model']['fitting_net']['activation_function'] = model_devi_activation_func[ii] # dump the input.json with open(os.path.join(task_path, train_input_file), 'w') as outfile: json.dump(jinput, outfile, indent = 4) @@ -422,6 +432,9 @@ def make_train (iter_index, for ii in range(len(iter0_models)): old_model_files = glob.glob(os.path.join(iter0_models[ii], 'model.ckpt*')) _link_old_models(work_path, old_model_files, ii) + # Copy user defined forward files + symlink_user_forward_files(mdata=mdata, task_type="train", work_path=work_path) + def _link_old_models(work_path, old_model_files, ii): @@ -502,7 +515,7 @@ def run_train (iter_index, command = '%s freeze' % train_command commands.append(command) else: - raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x version!" ) + raise RuntimeError("DP-GEN currently only supports for DeePMD-kit 1.x or 2.x version!" ) #_tasks = [os.path.basename(ii) for ii in all_task] # run_tasks = [] @@ -559,8 +572,10 @@ def run_train (iter_index, train_group_size = 1 api_version = mdata.get('api_version', '0.9') - # print('debug:commands', commands) - + + user_forward_files = mdata.get("train" + "_user_forward_files", []) + forward_files += [os.path.basename(file) for file in user_forward_files] + backward_files += mdata.get("train" + "_user_backward_files", []) if LooseVersion(api_version) < LooseVersion('1.0'): warnings.warn(f"the dpdispatcher will be updated to new version." f"And the interface may be changed. Please check the documents for more details") @@ -836,7 +851,8 @@ def make_model_devi (iter_index, _make_model_devi_revmat(iter_index, jdata, mdata, conf_systems) else: raise RuntimeError('unknown model_devi input mode', input_mode) - + #Copy user defined forward_files + symlink_user_forward_files(mdata=mdata, task_type="model_devi", work_path=work_path) return True @@ -1073,6 +1089,9 @@ def _make_model_devi_native(iter_index, jdata, mdata, conf_systems): sys_counter += 1 def _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems): + # only support for deepmd v2.0 + if LooseVersion(mdata['deepmd_version']) < LooseVersion('2.0'): + raise RuntimeError("Only support deepmd-kit 2.x for model_devi_engine='gromacs'") model_devi_jobs = jdata['model_devi_jobs'] if (iter_index >= len(model_devi_jobs)) : return False @@ -1083,6 +1102,12 @@ def _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems): else: model_devi_dt = jdata['model_devi_dt'] nsteps = cur_job.get("nsteps", None) + lambdas = cur_job.get("lambdas", [1.0]) + temps = cur_job.get("temps", [298.0]) + + for ll in lambdas: + assert (ll >= 0.0 and ll <= 1.0), "Lambda should be in [0,1]" + if nsteps is None: raise RuntimeError("nsteps is None, you should set nsteps in model_devi_jobs!") # Currently Gromacs engine is not supported for different temperatures! @@ -1108,48 +1133,50 @@ def _make_model_devi_native_gromacs(iter_index, jdata, mdata, conf_systems): conf_counter = 0 task_counter = 0 for cc in ss : - task_name = make_model_devi_task_name(sys_idx[sys_counter], task_counter) - #conf_name = make_model_devi_conf_name(sys_idx[sys_counter], conf_counter) + '.lmp' - task_path = os.path.join(work_path, task_name) - # dlog.info(task_path) - create_path(task_path) - #create_path(os.path.join(task_path, 'traj')) - #loc_conf_name = 'conf.lmp' - gromacs_settings = jdata.get("gromacs_settings" , "") - for key,file in gromacs_settings.items(): - if key != "traj_filename" and key != "mdp_filename": - os.symlink(os.path.join(cc,file), os.path.join(task_path, file)) - - # input.json for DP-Gromacs - with open(os.path.join(cc, "input.json")) as f: - input_json = json.load(f) - input_json["graph_file"] = models[0] - with open(os.path.join(task_path,'input.json'), 'w') as _outfile: - json.dump(input_json, _outfile, indent = 4) - - # trj_freq - trj_freq = cur_job.get("trj_freq", 10) - mdp = MDP() - mdp.read(os.path.join(cc, gromacs_settings['mdp_filename'])) - mdp['nstcomm'] = trj_freq - mdp['nstxout'] = trj_freq - mdp['nstlog'] = trj_freq - mdp['nstenergy'] = trj_freq - # dt - mdp['dt'] = dt - mdp.write(os.path.join(task_path, gromacs_settings['mdp_filename'])) - - cwd_ = os.getcwd() - os.chdir(task_path) - job = {} - - job["model_devi_dt"] = model_devi_dt - job["nsteps"] = nsteps - with open('job.json', 'w') as _outfile: - json.dump(job, _outfile, indent = 4) - os.chdir(cwd_) - - task_counter += 1 + for ll in lambdas: + for tt in temps: + task_name = make_model_devi_task_name(sys_idx[sys_counter], task_counter) + task_path = os.path.join(work_path, task_name) + create_path(task_path) + gromacs_settings = jdata.get("gromacs_settings" , "") + for key,file in gromacs_settings.items(): + if key != "traj_filename" and key != "mdp_filename" and key != "group_name": + os.symlink(os.path.join(cc,file), os.path.join(task_path, file)) + # input.json for DP-Gromacs + with open(os.path.join(cc, "input.json")) as f: + input_json = json.load(f) + input_json["graph_file"] = models[0] + input_json["lambda"] = ll + with open(os.path.join(task_path,'input.json'), 'w') as _outfile: + json.dump(input_json, _outfile, indent = 4) + + # trj_freq + trj_freq = cur_job.get("trj_freq", 10) + mdp = MDP() + mdp.read(os.path.join(cc, gromacs_settings['mdp_filename'])) + mdp['nstcomm'] = trj_freq + mdp['nstxout'] = trj_freq + mdp['nstlog'] = trj_freq + mdp['nstenergy'] = trj_freq + # dt + mdp['dt'] = model_devi_dt + # temps + if "ref_t" in list(mdp.keys()): + mdp["ref_t"] = tt + else: + mdp["ref-t"] = tt + mdp.write(os.path.join(task_path, gromacs_settings['mdp_filename'])) + + cwd_ = os.getcwd() + os.chdir(task_path) + job = {} + job["trj_freq"] = cur_job["trj_freq"] + job["model_devi_dt"] = model_devi_dt + job["nsteps"] = nsteps + with open('job.json', 'w') as _outfile: + json.dump(job, _outfile, indent = 4) + os.chdir(cwd_) + task_counter += 1 conf_counter += 1 sys_counter += 1 @@ -1159,10 +1186,7 @@ def run_model_devi (iter_index, jdata, mdata) : #rmdlog.info("This module has been run !") - lmp_exec = mdata['lmp_command'] - # Angus: lmp_exec name should be changed to model_devi_exec. - # We should also change make_dispatcher - # For now, I will use this name for gromacs command + model_devi_exec = mdata['model_devi_command'] model_devi_group_size = mdata['model_devi_group_size'] model_devi_resources = mdata['model_devi_resources'] @@ -1196,7 +1220,7 @@ def run_model_devi (iter_index, model_devi_engine = jdata.get("model_devi_engine", "lammps") if model_devi_engine == "lammps": - command = "{ if [ ! -f dpgen.restart.10000 ]; then %s -i input.lammps -v restart 0; else %s -i input.lammps -v restart 1; fi }" % (lmp_exec, lmp_exec) + command = "{ if [ ! -f dpgen.restart.10000 ]; then %s -i input.lammps -v restart 0; else %s -i input.lammps -v restart 1; fi }" % (model_devi_exec, model_devi_exec) command = "/bin/sh -c '%s'" % command commands = [command] forward_files = ['conf.lmp', 'input.lammps', 'traj'] @@ -1204,29 +1228,43 @@ def run_model_devi (iter_index, if use_plm: forward_files += ['input.plumed'] # backward_files += ['output.plumed'] - backward_files += ['output.plumed','COLVAR','dump.0.xyz'] + backward_files += ['output.plumed','COLVAR'] if use_plm_path: forward_files += ['plmpath.pdb'] elif model_devi_engine == "gromacs": + gromacs_settings = jdata.get("gromacs_settings", {}) mdp_filename = gromacs_settings.get("mdp_filename", "md.mdp") topol_filename = gromacs_settings.get("topol_filename", "processed.top") conf_filename = gromacs_settings.get("conf_filename", "conf.gro") index_filename = gromacs_settings.get("index_filename", "index.raw") + # Initial reference to process pbc condition. + # Default is em.tpr + ref_filename = gromacs_settings.get("ref_filename", "em.tpr") deffnm = gromacs_settings.get("deffnm", "deepmd") maxwarn = gromacs_settings.get("maxwarn", 1) + traj_filename = gromacs_settings.get("traj_filename", "deepmd_traj.gro") + grp_name = gromacs_settings.get("group_name", "Other") nsteps = cur_job["nsteps"] - - command = "%s grompp -f %s -p %s -c %s -o %s -maxwarn %d" % (lmp_exec, mdp_filename, topol_filename, conf_filename, deffnm, maxwarn) - command += "&& %s mdrun -deffnm %s -nsteps %d" %(lmp_exec, deffnm, nsteps) + trj_freq = cur_job.get("trj_freq", 10) + + command = "%s grompp -f %s -p %s -c %s -o %s -maxwarn %d" % (model_devi_exec, mdp_filename, topol_filename, conf_filename, deffnm, maxwarn) + command += "&& %s mdrun -deffnm %s -nsteps %d" %(model_devi_exec, deffnm, nsteps) + command += "&& echo -e \"%s\n%s\n\" | %s trjconv -s %s -f %s.trr -o %s -pbc mol -ur compact -center" % (grp_name, grp_name, model_devi_exec, ref_filename, deffnm, traj_filename) + command += "&& if [ ! -d traj ]; then \n mkdir traj; fi\n" + command += f"python -c \"import dpdata;system = dpdata.System('{traj_filename}', fmt='gromacs/gro'); [system.to_gromacs_gro('traj/%d.gromacstrj' % (i * {trj_freq}), frame_idx=i) for i in range(system.get_nframes())]; system.to_deepmd_npy('traj_deepmd')\"" + command += f"&& dp model-devi -m ../graph.000.pb ../graph.001.pb ../graph.002.pb ../graph.003.pb -s traj_deepmd -o model_devi.out -f {trj_freq}" commands = [command] - - forward_files = [mdp_filename, topol_filename, conf_filename, index_filename, "input.json" ] - backward_files = ["%s.tpr" % deffnm, "%s.log" %deffnm , 'model_devi.out', 'model_devi.log'] + + forward_files = [mdp_filename, topol_filename, conf_filename, index_filename, ref_filename, "input.json", "job.json" ] + backward_files = ["%s.tpr" % deffnm, "%s.log" %deffnm , traj_filename, 'model_devi.out', "traj", "traj_deepmd" ] cwd = os.getcwd() + user_forward_files = mdata.get("model_devi" + "_user_forward_files", []) + forward_files += [os.path.basename(file) for file in user_forward_files] + backward_files += mdata.get("model_devi" + "_user_backward_files", []) api_version = mdata.get('api_version', '0.9') if LooseVersion(api_version) < LooseVersion('1.0'): warnings.warn(f"the dpdispatcher will be updated to new version." @@ -1318,11 +1356,190 @@ def check_bad_box(conf_name, raise RuntimeError('unknow key', key) return is_bad + +def _read_model_devi_file( + task_path : str, + model_devi_f_avg_relative : bool = False +): + model_devi = np.loadtxt(os.path.join(task_path, 'model_devi.out')) + if model_devi_f_avg_relative : + trajs = glob.glob(os.path.join(task_path, 'traj', '*.lammpstrj')) + all_f = [] + for ii in trajs: + all_f.append(get_dumped_forces(ii)) + all_f = np.array(all_f) + all_f = all_f.reshape([-1,3]) + avg_f = np.sqrt(np.average(np.sum(np.square(all_f), axis = 1))) + model_devi[:,4:7] = model_devi[:,4:7] / avg_f + np.savetxt(os.path.join(task_path, 'model_devi_avgf.out'), model_devi, fmt='%16.6e') + return model_devi + + +def _select_by_model_devi_standard( + modd_system_task: List[str], + f_trust_lo : float, + f_trust_hi : float, + v_trust_lo : float, + v_trust_hi : float, + cluster_cutoff : float, + model_devi_skip : int = 0, + model_devi_f_avg_relative : bool = False, + detailed_report_make_fp : bool = True, +): + fp_candidate = [] + if detailed_report_make_fp: + fp_rest_accurate = [] + fp_rest_failed = [] + cc = 0 + counter = Counter() + counter['candidate'] = 0 + counter['failed'] = 0 + counter['accurate'] = 0 + for tt in modd_system_task : + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + all_conf = _read_model_devi_file(tt, model_devi_f_avg_relative) + for ii in range(all_conf.shape[0]) : + if all_conf[ii][0] < model_devi_skip : + continue + cc = int(all_conf[ii][0]) + if cluster_cutoff is None: + if (all_conf[ii][1] < v_trust_hi and all_conf[ii][1] >= v_trust_lo) or \ + (all_conf[ii][4] < f_trust_hi and all_conf[ii][4] >= f_trust_lo) : + fp_candidate.append([tt, cc]) + counter['candidate'] += 1 + elif (all_conf[ii][1] >= v_trust_hi ) or (all_conf[ii][4] >= f_trust_hi ): + if detailed_report_make_fp: + fp_rest_failed.append([tt, cc]) + counter['failed'] += 1 + elif (all_conf[ii][1] < v_trust_lo and all_conf[ii][4] < f_trust_lo ): + if detailed_report_make_fp: + fp_rest_accurate.append([tt, cc]) + counter['accurate'] += 1 + else : + raise RuntimeError('md traj %s frame %d with f devi %f does not belong to either accurate, candidiate and failed, it should not happen' % (tt, ii, all_conf[ii][4])) + else: + idx_candidate = np.where(np.logical_and(all_conf[ii][7:] < f_trust_hi, all_conf[ii][7:] >= f_trust_lo))[0] + for jj in idx_candidate: + fp_candidate.append([tt, cc, jj]) + counter['candidate'] += len(idx_candidate) + idx_rest_accurate = np.where(all_conf[ii][7:] < f_trust_lo)[0] + if detailed_report_make_fp: + for jj in idx_rest_accurate: + fp_rest_accurate.append([tt, cc, jj]) + counter['accurate'] += len(idx_rest_accurate) + idx_rest_failed = np.where(all_conf[ii][7:] >= f_trust_hi)[0] + if detailed_report_make_fp: + for jj in idx_rest_failed: + fp_rest_failed.append([tt, cc, jj]) + counter['failed'] += len(idx_rest_failed) + + return fp_rest_accurate, fp_candidate, fp_rest_failed, counter + + + +def _select_by_model_devi_adaptive_trust_low( + modd_system_task: List[str], + f_trust_hi : float, + numb_candi_f : int, + perc_candi_f : float, + v_trust_hi : float, + numb_candi_v : int, + perc_candi_v : float, + model_devi_skip : int = 0, + model_devi_f_avg_relative : bool = False, +): + """ + modd_system_task model deviation tasks belonging to one system + f_trust_hi + numb_candi_f number of candidate due to the f model deviation + perc_candi_f percentage of candidate due to the f model deviation + v_trust_hi + numb_candi_v number of candidate due to the v model deviation + perc_candi_v percentage of candidate due to the v model deviation + model_devi_skip + + returns + accur the accurate set + candi the candidate set + failed the failed set + counter counters, number of elements in the sets + f_trust_lo adapted trust level of f + v_trust_lo adapted trust level of v + """ + idx_v = 1 + idx_f = 4 + accur = set() + candi = set() + failed = [] + coll_v = [] + coll_f = [] + for tt in modd_system_task: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + model_devi = np.loadtxt(os.path.join(tt, 'model_devi.out')) + model_devi = _read_model_devi_file(tt, model_devi_f_avg_relative) + for ii in range(model_devi.shape[0]) : + if model_devi[ii][0] < model_devi_skip : + continue + cc = int(model_devi[ii][0]) + # tt: name of task folder + # cc: time step of the frame + md_v = model_devi[ii][idx_v] + md_f = model_devi[ii][idx_f] + if md_f > f_trust_hi or md_v > v_trust_hi: + failed.append([tt, cc]) + else: + coll_v.append([model_devi[ii][idx_v], tt, cc]) + coll_f.append([model_devi[ii][idx_f], tt, cc]) + # now accur takes all non-failed frames, + # will be substracted by candidate lat er + accur.add((tt, cc)) + # sort + coll_v.sort() + coll_f.sort() + assert(len(coll_v) == len(coll_f)) + # calcuate numbers + numb_candi_v = max(numb_candi_v, int(perc_candi_v * 0.01 * len(coll_v))) + numb_candi_f = max(numb_candi_f, int(perc_candi_f * 0.01 * len(coll_f))) + # adjust number of candidate + if len(coll_v) < numb_candi_v: + numb_candi_v = len(coll_v) + if len(coll_f) < numb_candi_f: + numb_candi_f = len(coll_f) + # compute trust lo + if numb_candi_v == 0: + v_trust_lo = v_trust_hi + else: + v_trust_lo = coll_v[-numb_candi_v][0] + if numb_candi_f == 0: + f_trust_lo = f_trust_hi + else: + f_trust_lo = coll_f[-numb_candi_f][0] + # add to candidate set + for ii in range(len(coll_v) - numb_candi_v, len(coll_v)): + candi.add(tuple(coll_v[ii][1:])) + for ii in range(len(coll_f) - numb_candi_f, len(coll_f)): + candi.add(tuple(coll_f[ii][1:])) + # accurate set is substracted by the candidate set + accur = accur - candi + # convert to list + candi = [list(ii) for ii in candi] + accur = [list(ii) for ii in accur] + # counters + counter = Counter() + counter['candidate'] = len(candi) + counter['failed'] = len(failed) + counter['accurate'] = len(accur) + + return accur, candi, failed, counter, f_trust_lo, v_trust_lo + + def _make_fp_vasp_inner (modd_path, work_path, model_devi_skip, - e_trust_lo, - e_trust_hi, + v_trust_lo, + v_trust_hi, f_trust_lo, f_trust_hi, fp_task_min, @@ -1348,7 +1565,13 @@ def _make_fp_vasp_inner (modd_path, system_index.sort() fp_tasks = [] + + charges_recorder = [] # record charges for each fp_task + charges_map = jdata.get("sys_charges", []) + cluster_cutoff = jdata['cluster_cutoff'] if jdata.get('use_clusters', False) else None + model_devi_adapt_trust_lo = jdata.get('model_devi_adapt_trust_lo', False) + model_devi_f_avg_relative = jdata.get('model_devi_f_avg_relative', False) # skip save *.out if detailed_report_make_fp is False, default is True detailed_report_make_fp = jdata.get("detailed_report_make_fp", True) # skip bad box criteria @@ -1356,56 +1579,37 @@ def _make_fp_vasp_inner (modd_path, # skip discrete structure in cluster fp_cluster_vacuum = jdata.get('fp_cluster_vacuum',None) for ss in system_index : - fp_candidate = [] - if detailed_report_make_fp: - fp_rest_accurate = [] - fp_rest_failed = [] modd_system_glob = os.path.join(modd_path, 'task.' + ss + '.*') modd_system_task = glob.glob(modd_system_glob) modd_system_task.sort() - cc = 0 - counter = Counter() - counter['candidate'] = 0 - counter['failed'] = 0 - counter['accurate'] = 0 - for tt in modd_system_task : - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - all_conf = np.loadtxt(os.path.join(tt, 'model_devi.out')) - for ii in range(all_conf.shape[0]) : - if all_conf[ii][0] < model_devi_skip : - continue - cc = int(all_conf[ii][0]) - if cluster_cutoff is None: - if (all_conf[ii][1] < e_trust_hi and all_conf[ii][1] >= e_trust_lo) or \ - (all_conf[ii][4] < f_trust_hi and all_conf[ii][4] >= f_trust_lo) : - fp_candidate.append([tt, cc]) - counter['candidate'] += 1 - elif (all_conf[ii][1] >= e_trust_hi ) or (all_conf[ii][4] >= f_trust_hi ): - if detailed_report_make_fp: - fp_rest_failed.append([tt, cc]) - counter['failed'] += 1 - elif (all_conf[ii][1] < e_trust_lo and all_conf[ii][4] < f_trust_lo ): - if detailed_report_make_fp: - fp_rest_accurate.append([tt, cc]) - counter['accurate'] += 1 - else : - raise RuntimeError('md traj %s frame %d with f devi %f does not belong to either accurate, candidiate and failed, it should not happen' % (tt, ii, all_conf[ii][4])) - else: - idx_candidate = np.where(np.logical_and(all_conf[ii][7:] < f_trust_hi, all_conf[ii][7:] >= f_trust_lo))[0] - for jj in idx_candidate: - fp_candidate.append([tt, cc, jj]) - counter['candidate'] += len(idx_candidate) - idx_rest_accurate = np.where(all_conf[ii][7:] < f_trust_lo)[0] - if detailed_report_make_fp: - for jj in idx_rest_accurate: - fp_rest_accurate.append([tt, cc, jj]) - counter['accurate'] += len(idx_rest_accurate) - idx_rest_failed = np.where(all_conf[ii][7:] >= f_trust_hi)[0] - if detailed_report_make_fp: - for jj in idx_rest_failed: - fp_rest_failed.append([tt, cc, jj]) - counter['failed'] += len(idx_rest_failed) + + # assumed e -> v + if not model_devi_adapt_trust_lo: + fp_rest_accurate, fp_candidate, fp_rest_failed, counter \ + = _select_by_model_devi_standard( + modd_system_task, + f_trust_lo, f_trust_hi, + v_trust_lo, v_trust_hi, + cluster_cutoff, + model_devi_skip, + model_devi_f_avg_relative = model_devi_f_avg_relative, + detailed_report_make_fp = detailed_report_make_fp, + ) + else: + numb_candi_f = jdata.get('model_devi_numb_candi_f', 10) + numb_candi_v = jdata.get('model_devi_numb_candi_v', 0) + perc_candi_f = jdata.get('model_devi_perc_candi_f', 0.) + perc_candi_v = jdata.get('model_devi_perc_candi_v', 0.) + fp_rest_accurate, fp_candidate, fp_rest_failed, counter, f_trust_lo_ad, v_trust_lo_ad \ + = _select_by_model_devi_adaptive_trust_low( + modd_system_task, + f_trust_hi, numb_candi_f, perc_candi_f, + v_trust_hi, numb_candi_v, perc_candi_v, + model_devi_skip = model_devi_skip, + model_devi_f_avg_relative = model_devi_f_avg_relative, + ) + dlog.info("system {0:s} {1:9s} : f_trust_lo {2:6.3f} v_trust_lo {3:6.3f}".format(ss, 'adapted', f_trust_lo_ad, v_trust_lo_ad)) + # print a report fp_sum = sum(counter.values()) for cc_key, cc_value in counter.items(): @@ -1461,11 +1665,11 @@ def _make_fp_vasp_inner (modd_path, continue if fp_cluster_vacuum is not None: - assert fp_cluster_vacuum >0 - skip_cluster = check_cluster(conf_name, fp_cluster_vacuum) - if skip_cluster: - count_bad_cluster +=1 - continue + assert fp_cluster_vacuum >0 + skip_cluster = check_cluster(conf_name, fp_cluster_vacuum) + if skip_cluster: + count_bad_cluster +=1 + continue # link job.json job_name = os.path.join(tt, "job.json") @@ -1481,6 +1685,8 @@ def _make_fp_vasp_inner (modd_path, fp_task_path = os.path.join(work_path, fp_task_name) create_path(fp_task_path) fp_tasks.append(fp_task_path) + if charges_map: + charges_recorder.append(charges_map[int(ss)]) cwd = os.getcwd() os.chdir(fp_task_path) if cluster_cutoff is None: @@ -1498,13 +1704,18 @@ def _make_fp_vasp_inner (modd_path, dlog.info("system {0:s} skipped {1:6d} confs with bad cluster, {2:6d} remains".format(ss, count_bad_cluster, numb_task - count_bad_cluster)) if cluster_cutoff is None: cwd = os.getcwd() - for ii in fp_tasks: - os.chdir(ii) + for idx, task in enumerate(fp_tasks): + os.chdir(task) if model_devi_engine == "lammps": dump_to_poscar('conf.dump', 'POSCAR', type_map, fmt = "lammps/dump") + if charges_map: + warnings.warn('"sys_charges" keyword only support for gromacs engine now.') elif model_devi_engine == "gromacs": # dump_to_poscar('conf.dump', 'POSCAR', type_map, fmt = "gromacs/gro") - dump_to_deepmd_raw('conf.dump', 'deepmd.raw', type_map, fmt = 'gromacs/gro') + if charges_map: + dump_to_deepmd_raw('conf.dump', 'deepmd.raw', type_map, fmt='gromacs/gro', charge=charges_recorder[idx]) + else: + dump_to_deepmd_raw('conf.dump', 'deepmd.raw', type_map, fmt='gromacs/gro', charge=None) else: raise RuntimeError("unknown model_devi engine", model_devi_engine) os.chdir(cwd) @@ -1765,8 +1976,8 @@ def _make_fp_vasp_configs(iter_index, jdata): fp_task_max = jdata['fp_task_max'] model_devi_skip = jdata['model_devi_skip'] - e_trust_lo = 1e+10 - e_trust_hi = 1e+10 + v_trust_lo = jdata.get('model_devi_v_trust_lo', 1e10) + v_trust_hi = jdata.get('model_devi_v_trust_hi', 1e10) f_trust_lo = jdata['model_devi_f_trust_lo'] f_trust_hi = jdata['model_devi_f_trust_hi'] type_map = jdata['type_map'] @@ -1784,7 +1995,7 @@ def _make_fp_vasp_configs(iter_index, # make configs fp_tasks = _make_fp_vasp_inner(modd_path, work_path, model_devi_skip, - e_trust_lo, e_trust_hi, + v_trust_lo, v_trust_hi, f_trust_lo, f_trust_hi, task_min, fp_task_max, [], @@ -1930,6 +2141,8 @@ def make_fp_gaussian(iter_index, sys_data = dpdata.System('POSCAR').data elif model_devi_engine == "gromacs": sys_data = dpdata.System("deepmd.raw", fmt='deepmd/raw').data + if os.path.isfile('deepmd.raw/charge'): + sys_data['charge'] = int(np.loadtxt('deepmd.raw/charge', dtype=int)) ret = make_gaussian_input(sys_data, fp_params) with open('input', 'w') as fp: fp.write(ret) @@ -2015,6 +2228,10 @@ def make_fp (iter_index, make_fp_pwmat(iter_index, jdata) else : raise RuntimeError ("unsupported fp style") + # Copy user defined forward_files + iter_name = make_iter_name(iter_index) + work_path = os.path.join(iter_name, fp_name) + symlink_user_forward_files(mdata=mdata, task_type="fp", work_path=work_path) def _vasp_check_fin (ii) : if os.path.isfile(os.path.join(ii, 'OUTCAR')) : @@ -2120,6 +2337,10 @@ def run_fp_inner (iter_index, # fp_run_tasks.append(ii) run_tasks = [os.path.basename(ii) for ii in fp_run_tasks] + user_forward_files = mdata.get("fp" + "_user_forward_files", []) + forward_files += [os.path.basename(file) for file in user_forward_files] + backward_files += mdata.get("fp" + "_user_backward_files", []) + api_version = mdata.get('api_version', '0.9') if LooseVersion(api_version) < LooseVersion('1.0'): warnings.warn(f"the dpdispatcher will be updated to new version." @@ -2158,10 +2379,9 @@ def run_fp (iter_index, mdata) : fp_style = jdata['fp_style'] fp_pp_files = jdata['fp_pp_files'] - if fp_style == "vasp" : forward_files = ['POSCAR', 'INCAR', 'POTCAR','KPOINTS'] - backward_files = ['OUTCAR','vasprun.xml'] + backward_files = ['fp.log','OUTCAR','vasprun.xml'] # Move cvasp interface to jdata if ('cvasp' in jdata) and (jdata['cvasp'] == True): mdata['fp_resources']['cvasp'] = True @@ -2473,6 +2693,7 @@ def post_fp_gaussian (iter_index, sys_output = glob.glob(os.path.join(work_path, "task.%s.*/output"%ss)) sys_output.sort() for idx,oo in enumerate(sys_output) : + # TODO : UnboundLocalError sometimes occurs when parsing gaussian log sys = dpdata.LabeledSystem(oo, fmt = 'gaussian/log') if len(sys) > 0: sys.check_type_map(type_map = jdata['type_map']) @@ -2511,20 +2732,31 @@ def post_fp_cp2k (iter_index, system_index.sort() cwd = os.getcwd() + # tcount: num of all fp tasks + tcount = 0 + # icount: num of converged fp tasks + icount = 0 for ss in system_index : sys_output = glob.glob(os.path.join(work_path, "task.%s.*/output"%ss)) sys_output.sort() - for idx,oo in enumerate(sys_output) : - sys = dpdata.LabeledSystem(oo, fmt = 'cp2k/output') - if len(sys) > 0: - sys.check_type_map(type_map = jdata['type_map']) - if idx == 0: - all_sys = sys + tcount += len(sys_output) + all_sys = None + for oo in sys_output : + _sys = dpdata.LabeledSystem(oo, fmt = 'cp2k/output') + _sys.check_type_map(type_map = jdata['type_map']) + if all_sys is None: + all_sys = _sys else: - all_sys.append(sys) - sys_data_path = os.path.join(work_path, 'data.%s'%ss) - all_sys.to_deepmd_raw(sys_data_path) - all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output)) + all_sys.append(_sys) + + + icount += len(all_sys) + if all_sys is not None: + sys_data_path = os.path.join(work_path, 'data.%s'%ss) + all_sys.to_deepmd_raw(sys_data_path) + all_sys.to_deepmd_npy(sys_data_path, set_size = len(sys_output)) + dlog.info("failed frame number: %s "%(tcount-icount)) + dlog.info("total frame number: %s "%tcount) def post_fp_pwmat (iter_index, @@ -2646,7 +2878,8 @@ def run_iter (param_file, machine_file) : listener = logging.handlers.QueueListener(que, smtp_handler) dlog.addHandler(queue_handler) listener.start() - + # Convert mdata + mdata = convert_mdata(mdata) max_tasks = 10000 numb_task = 9 record = "record.dpgen" @@ -2673,7 +2906,6 @@ def run_iter (param_file, machine_file) : make_train (ii, jdata, mdata) elif jj == 1 : log_iter ("run_train", ii, jj) - mdata = decide_train_machine(mdata) run_train (ii, jdata, mdata) elif jj == 2 : log_iter ("post_train", ii, jj) @@ -2685,7 +2917,6 @@ def run_iter (param_file, machine_file) : break elif jj == 4 : log_iter ("run_model_devi", ii, jj) - mdata = decide_model_devi_machine(mdata) run_model_devi (ii, jdata, mdata) elif jj == 5 : @@ -2696,7 +2927,6 @@ def run_iter (param_file, machine_file) : make_fp (ii, jdata, mdata) elif jj == 7 : log_iter ("run_fp", ii, jj) - mdata = decide_fp_machine(mdata) run_fp (ii, jdata, mdata) elif jj == 8 : log_iter ("post_fp", ii, jj) diff --git a/dpgen/remote/decide_machine.py b/dpgen/remote/decide_machine.py index cda17853e..2688d3ca3 100644 --- a/dpgen/remote/decide_machine.py +++ b/dpgen/remote/decide_machine.py @@ -11,278 +11,313 @@ import numpy as np from distutils.version import LooseVersion -def decide_train_machine(mdata): - if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): - mdata['train_group_size'] = mdata['train'][0]['resources']['group_size'] - if 'train' in mdata: - continue_flag = False - if 'record.machine' in os.listdir(): - try: - with open('record.machine', 'r') as _infile: - profile = json.load(_infile) - if profile['purpose'] == 'train': - mdata['train_machine'] = profile['machine'] - mdata['train_resources'] = profile['resources'] - - if 'python_path' in profile: - mdata['python_path'] = profile['python_path'] - if "group_size" in profile: - mdata["train_group_size"] = profile["group_size"] - if 'deepmd_version' in profile: - mdata["deepmd_version"] = profile['deepmd_version'] - if 'command' in profile: - mdata['train_command'] = profile["command"] - continue_flag = True - except: - pass - if ("hostname" not in mdata["train"][0]["machine"]) or (len(mdata["train"]) == 1): - mdata["train_machine"] = mdata["train"][0]["machine"] - mdata["train_resources"] = mdata["train"][0]["resources"] - - if 'python_path' in mdata["train"][0]: - mdata["python_path"] = mdata["train"][0]["python_path"] - if "group_size" in mdata["train"][0]: - mdata["train_group_size"] = mdata["train"][0]["group_size"] - if 'deepmd_version' in mdata["train"][0]: - mdata["deepmd_version"] = mdata["train"][0]["deepmd_version"] - if 'command' in mdata["train"][0]: - mdata["train_command"] = mdata["train"][0]["command"] - continue_flag = True - - pd_flag = False - pd_count_list =[] - # pd for pending job in slurm - # if we need to launch new machine_idxines - if not continue_flag: - #assert isinstance(mdata['train']['machine'], list) - #assert isinstance(mdata['train']['resources'], list) - #assert len(mdata['train']['machine']) == len(mdata['train']['resources']) - # mdata['train'] is a list - for machine_idx in range(len(mdata['train'])): - temp_machine = mdata['train'][machine_idx]['machine'] - temp_resources = mdata['train'][machine_idx]['resources'] - temp_ssh_sess = SSHSession(temp_machine) - cwd = os.getcwd() - temp_context = SSHContext(cwd, temp_ssh_sess) - if temp_machine['machine_type'] == 'lsf': - temp_batch = LSF(temp_context) - else: - temp_batch = Slurm(temp_context) - # For other type of machines, please add them using 'elif'. - # Here slurm is selected as the final choice in convinience. - command = temp_batch._make_squeue(temp_machine, temp_resources) - ret, stdin, stdout, stderr = temp_batch.context.block_call(command) - pd_response = stdout.read().decode('utf-8').split("\n") - pd_count = len(pd_response) - temp_context.clean() - ## If there is no need to waiting for allocation - if pd_count ==1: - mdata['train_machine'] = temp_machine - mdata['train_resources'] = temp_resources +def convert_mdata(mdata, task_types=["train", "model_devi", "fp"]): + ''' + Convert mdata for DP-GEN main process. + New convension is like mdata["fp"]["machine"], + DP-GEN needs mdata["fp_machine"] - if 'python_path' in mdata['train'][machine_idx]: - mdata['python_path'] = mdata['train'][machine_idx]['python_path'] - if 'group_size' in mdata['train'][machine_idx]: - mdata['train_group_size'] = mdata['train'][machine_idx]['group_size'] - if 'deepmd_version' in mdata['train'][machine_idx]: - mdata['deepmd_version'] = mdata['train'][machine_idx]['deepmd_version'] - if 'command' in mdata['train'][machine_idx]: - mdata['train_command'] = mdata['train'][machine_idx]['command'] + Notice that we deprecate the function which can automatically select one most avalaible machine, + since this function was only used by Angus, and only supports for Slurm. + In the future this can be implemented. - ## No need to wait - pd_flag = True - break - else: - pd_count_list.append(pd_count) - if not pd_flag: - ## All machines need waiting, then compare waiting jobs - ## Select a machine which has fewest waiting jobs - min_machine_idx = np.argsort(pd_count_list)[0] - mdata['train_machine'] = mdata['train'][min_machine_idx]['machine'] - mdata['train_resources'] = mdata['train'][min_machine_idx]['resources'] - - if 'python_path' in mdata['train'][min_machine_idx]: - mdata['python_path'] = mdata['train'][min_machine_idx]['python_path'] - if "group_size" in mdata['train'][min_machine_idx]: - mdata["train_group_size"] = mdata['train'][min_machine_idx]["group_size"] - if 'deepmd_version' in mdata['train'][min_machine_idx]: - mdata['deepmd_version'] = mdata['train'][min_machine_idx]["deepmd_version"] - if 'command' in mdata['train'][min_machine_idx]: - mdata['train_command'] = mdata['train'][min_machine_idx]['command'] + Parameters + ---------- + mdata : dict + Machine parameters to be converted. + task_types : list of string + Type of tasks, default is ["train", "model_devi", "fp"] - ## Record which machine is selected - with open("record.machine","w") as _outfile: - profile = {} - profile['purpose'] = 'train' - profile['machine'] = mdata['train_machine'] - profile['resources'] = mdata['train_resources'] - - if 'python_path' in mdata: - profile['python_path'] = mdata['python_path'] - if "train_group_size" in mdata: - profile["group_size"] = mdata["train_group_size"] - if 'deepmd_version' in mdata: - profile['deepmd_version'] = mdata['deepmd_version'] - if 'train_command' in mdata: - profile['command'] = mdata['train_command'] + Returns + ------- + dict + mdata converted + ''' + for task_type in task_types: + if task_type in mdata: + for key, item in mdata[task_type][0].items(): + if "comments" not in key: + mdata[task_type + "_" + key] = item + group_size = mdata[task_type][0]["resources"].get("group_size", 1) + if group_size == 1: group_size = mdata[task_type][0].get("group_size", 1) + mdata[task_type + "_" + "group_size"] = group_size + return mdata - json.dump(profile, _outfile, indent = 4) - return mdata -def decide_model_devi_machine(mdata): - if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): - mdata['model_devi_group_size'] = mdata['model_devi'][0]['resources']['group_size'] - if 'model_devi' in mdata: - continue_flag = False - if 'record.machine' in os.listdir(): - try: - with open('record.machine', 'r') as _infile: - profile = json.load(_infile) - if profile['purpose'] == 'model_devi': - mdata['model_devi_machine'] = profile['machine'] - mdata['model_devi_resources'] = profile['resources'] - mdata['lmp_command'] = profile['command'] - mdata['model_devi_group_size'] = profile['group_size'] - continue_flag = True - except: - pass - if ("hostname" not in mdata["model_devi"][0]["machine"]) or (len(mdata["model_devi"]) == 1): - mdata["model_devi_machine"] = mdata["model_devi"][0]["machine"] - mdata["model_devi_resources"] = mdata["model_devi"][0]["resources"] - mdata["lmp_command"] = mdata["model_devi"][0]["command"] - #if "group_size" in mdata["train"][0]: - mdata["model_devi_group_size"] = mdata["model_devi"][0].get("group_size", 1) - continue_flag = True - pd_count_list =[] - pd_flag = False - if not continue_flag: - - #assert isinstance(mdata['model_devi']['machine'], list) - #ssert isinstance(mdata['model_devi']['resources'], list) - #assert len(mdata['model_devi']['machine']) == len(mdata['model_devi']['resources']) - - for machine_idx in range(len(mdata['model_devi'])): - temp_machine = mdata['model_devi'][machine_idx]['machine'] - temp_resources = mdata['model_devi'][machine_idx]['resources'] - #assert isinstance(temp_machine, dict), "unsupported type of model_devi machine [%d]!" %machine_idx - #assert isinstance(temp_resources, dict), "unsupported type of model_devi resources [%d]!"%machine_idx - #assert temp_machine['machine_type'] == 'slurm', "Currently only support for Slurm!" - temp_ssh_sess = SSHSession(temp_machine) - cwd = os.getcwd() - temp_context = SSHContext(cwd, temp_ssh_sess) - if temp_machine['machine_type'] == 'lsf': - temp_batch = LSF(temp_context) - else: - temp_batch = Slurm(temp_context) - # For other type of machines, please add them using 'elif'. - # Here slurm is selected as the final choice in convinience. - command = temp_batch._make_squeue(temp_machine, temp_resources) - ret, stdin, stdout, stderr = temp_batch.context.block_call(command) - pd_response = stdout.read().decode('utf-8').split("\n") - pd_count = len(pd_response) - temp_context.clean() - if pd_count ==0: - mdata['model_devi_machine'] = temp_machine - mdata['model_devi_resources'] = temp_resources - mdata['lmp_command'] = mdata['model_devi'][machine_idx]['command'] - mdata['model_devi_group_size'] = mdata['model_devi'][machine_idx].get('group_size', 1) - pd_flag = True - break - else: - pd_count_list.append(pd_count) - if not pd_flag: - min_machine_idx = np.argsort(pd_count_list)[0] - mdata['model_devi_machine'] = mdata['model_devi'][min_machine_idx]['machine'] - mdata['model_devi_resources'] = mdata['model_devi'][min_machine_idx]['resources'] - mdata['lmp_command'] = mdata['model_devi'][min_machine_idx]['command'] - mdata['model_devi_group_size'] = mdata['model_devi'][min_machine_idx].get('group_size', 1) - with open("record.machine","w") as _outfile: - profile = {} - profile['purpose'] = 'model_devi' - profile['machine'] = mdata['model_devi_machine'] - profile['resources'] = mdata['model_devi_resources'] - profile['group_size'] = mdata['model_devi_group_size'] - profile['command'] = mdata['lmp_command'] - - json.dump(profile, _outfile, indent = 4) - return mdata -def decide_fp_machine(mdata): - if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): - mdata['fp_group_size'] = mdata['fp'][0]['resources']['group_size'] - if 'fp' in mdata: - #ssert isinstance(mdata['fp']['machine'], list) - #assert isinstance(mdata['fp']['resources'], list) - #assert len(mdata['fp']['machine']) == len(mdata['fp']['resources']) - continue_flag = False - ## decide whether to use an existing machine - if 'record.machine' in os.listdir(): - try: - with open('record.machine', 'r') as _infile: - profile = json.load(_infile) - if profile['purpose'] == 'fp': - mdata['fp_machine'] = profile['machine'] - mdata['fp_resources'] = profile['resources'] - mdata['fp_command'] = profile['command'] - mdata['fp_group_size'] = profile['group_size'] - - continue_flag = True - except: - pass - if ("hostname" not in mdata["fp"][0]["machine"]) or (len(mdata["fp"]) == 1): - mdata["fp_machine"] = mdata["fp"][0]["machine"] - mdata["fp_resources"] = mdata["fp"][0]["resources"] - mdata["fp_command"] = mdata["fp"][0]["command"] - #if "group_size" in mdata["train"][0]: - mdata["fp_group_size"] = mdata["fp"][0].get("group_size", 1) - continue_flag = True - - - pd_count_list =[] - pd_flag = False - if not continue_flag: - for machine_idx in range(len(mdata['fp'])): - temp_machine = mdata['fp'][machine_idx]['machine'] - temp_resources = mdata['fp'][machine_idx]['resources'] - temp_ssh_sess = SSHSession(temp_machine) - cwd = os.getcwd() - temp_context = SSHContext(cwd, temp_ssh_sess) - if temp_machine['machine_type'] == 'lsf': - temp_batch = LSF(temp_context) - else: - temp_batch = Slurm(temp_context) - # For other type of machines, please add them using 'elif'. - # Here slurm is selected as the final choice in convinience. - command = temp_batch._make_squeue(temp_machine, temp_resources) - ret, stdin, stdout, stderr = temp_batch.context.block_call(command) - pd_response = stdout.read().decode('utf-8').split("\n") - pd_count = len(pd_response) - temp_context.clean() - #dlog.info(temp_machine["username"] + " " + temp_machine["hostname"] + " " + str(pd_count)) - if pd_count ==0: - mdata['fp_machine'] = temp_machine - mdata['fp_resources'] = temp_resources - mdata['fp_command'] = mdata['fp'][machine_idx]['command'] - mdata['fp_group_size'] = mdata['fp'][machine_idx].get('group_size', 1) - pd_flag = True - break - else: - pd_count_list.append(pd_count) - if not pd_flag: - min_machine_idx = np.argsort(pd_count_list)[0] - mdata['fp_machine'] = mdata['fp'][min_machine_idx]['machine'] - mdata['fp_resources'] = mdata['fp'][min_machine_idx]['resources'] - mdata['fp_command'] = mdata['fp'][min_machine_idx]['command'] - mdata['fp_group_size'] = mdata['fp'][min_machine_idx].get('group_size',1) - - with open("record.machine","w") as _outfile: - profile = {} - profile['purpose'] = 'fp' - profile['machine'] = mdata['fp_machine'] - profile['resources'] = mdata['fp_resources'] - profile['group_size'] = mdata['fp_group_size'] - profile['command'] = mdata['fp_command'] - json.dump(profile, _outfile, indent = 4) - return mdata +# def decide_train_machine(mdata): +# if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): +# mdata['train_group_size'] = mdata['train'][0]['resources']['group_size'] +# if 'train' in mdata: +# continue_flag = False +# if 'record.machine' in os.listdir(): +# try: +# with open('record.machine', 'r') as _infile: +# profile = json.load(_infile) +# if profile['purpose'] == 'train': +# mdata['train_machine'] = profile['machine'] +# mdata['train_resources'] = profile['resources'] +# +# if 'python_path' in profile: +# mdata['python_path'] = profile['python_path'] +# if "group_size" in profile: +# mdata["train_group_size"] = profile["group_size"] +# if 'deepmd_version' in profile: +# mdata["deepmd_version"] = profile['deepmd_version'] +# if 'command' in profile: +# mdata['train_command'] = profile["command"] +# continue_flag = True +# except: +# pass +# if ("hostname" not in mdata["train"][0]["machine"]) or (len(mdata["train"]) == 1): +# mdata["train_machine"] = mdata["train"][0]["machine"] +# mdata["train_resources"] = mdata["train"][0]["resources"] +# +# if 'python_path' in mdata["train"][0]: +# mdata["python_path"] = mdata["train"][0]["python_path"] +# if "group_size" in mdata["train"][0]: +# mdata["train_group_size"] = mdata["train"][0]["group_size"] +# if 'deepmd_version' in mdata["train"][0]: +# mdata["deepmd_version"] = mdata["train"][0]["deepmd_version"] +# if 'command' in mdata["train"][0]: +# mdata["train_command"] = mdata["train"][0]["command"] +# continue_flag = True +# +# pd_flag = False +# pd_count_list =[] +# # pd for pending job in slurm +# # if we need to launch new machine_idxines +# if not continue_flag: +# +# #assert isinstance(mdata['train']['machine'], list) +# #assert isinstance(mdata['train']['resources'], list) +# #assert len(mdata['train']['machine']) == len(mdata['train']['resources']) +# # mdata['train'] is a list +# for machine_idx in range(len(mdata['train'])): +# temp_machine = mdata['train'][machine_idx]['machine'] +# temp_resources = mdata['train'][machine_idx]['resources'] +# temp_ssh_sess = SSHSession(temp_machine) +# cwd = os.getcwd() +# temp_context = SSHContext(cwd, temp_ssh_sess) +# if temp_machine['machine_type'] == 'lsf': +# temp_batch = LSF(temp_context) +# else: +# temp_batch = Slurm(temp_context) +# # For other type of machines, please add them using 'elif'. +# # Here slurm is selected as the final choice in convinience. +# command = temp_batch._make_squeue(temp_machine, temp_resources) +# ret, stdin, stdout, stderr = temp_batch.context.block_call(command) +# pd_response = stdout.read().decode('utf-8').split("\n") +# pd_count = len(pd_response) +# temp_context.clean() +# ## If there is no need to waiting for allocation +# if pd_count ==1: +# mdata['train_machine'] = temp_machine +# mdata['train_resources'] = temp_resources +# +# if 'python_path' in mdata['train'][machine_idx]: +# mdata['python_path'] = mdata['train'][machine_idx]['python_path'] +# if 'group_size' in mdata['train'][machine_idx]: +# mdata['train_group_size'] = mdata['train'][machine_idx]['group_size'] +# if 'deepmd_version' in mdata['train'][machine_idx]: +# mdata['deepmd_version'] = mdata['train'][machine_idx]['deepmd_version'] +# if 'command' in mdata['train'][machine_idx]: +# mdata['train_command'] = mdata['train'][machine_idx]['command'] +# +# ## No need to wait +# pd_flag = True +# break +# else: +# pd_count_list.append(pd_count) +# if not pd_flag: +# ## All machines need waiting, then compare waiting jobs +# ## Select a machine which has fewest waiting jobs +# min_machine_idx = np.argsort(pd_count_list)[0] +# mdata['train_machine'] = mdata['train'][min_machine_idx]['machine'] +# mdata['train_resources'] = mdata['train'][min_machine_idx]['resources'] +# +# if 'python_path' in mdata['train'][min_machine_idx]: +# mdata['python_path'] = mdata['train'][min_machine_idx]['python_path'] +# if "group_size" in mdata['train'][min_machine_idx]: +# mdata["train_group_size"] = mdata['train'][min_machine_idx]["group_size"] +# if 'deepmd_version' in mdata['train'][min_machine_idx]: +# mdata['deepmd_version'] = mdata['train'][min_machine_idx]["deepmd_version"] +# if 'command' in mdata['train'][min_machine_idx]: +# mdata['train_command'] = mdata['train'][min_machine_idx]['command'] +# +# ## Record which machine is selected +# with open("record.machine","w") as _outfile: +# profile = {} +# profile['purpose'] = 'train' +# profile['machine'] = mdata['train_machine'] +# profile['resources'] = mdata['train_resources'] +# +# if 'python_path' in mdata: +# profile['python_path'] = mdata['python_path'] +# if "train_group_size" in mdata: +# profile["group_size"] = mdata["train_group_size"] +# if 'deepmd_version' in mdata: +# profile['deepmd_version'] = mdata['deepmd_version'] +# if 'train_command' in mdata: +# profile['command'] = mdata['train_command'] +# +# json.dump(profile, _outfile, indent = 4) +# return mdata +# +# def decide_model_devi_machine(mdata): +# if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): +# mdata['model_devi_group_size'] = mdata['model_devi'][0]['resources']['group_size'] +# if 'model_devi' in mdata: +# continue_flag = False +# if 'record.machine' in os.listdir(): +# try: +# with open('record.machine', 'r') as _infile: +# profile = json.load(_infile) +# if profile['purpose'] == 'model_devi': +# mdata['model_devi_machine'] = profile['machine'] +# mdata['model_devi_resources'] = profile['resources'] +# mdata['model_devi_command'] = profile['command'] +# mdata['model_devi_group_size'] = profile['group_size'] +# continue_flag = True +# except: +# pass +# if ("hostname" not in mdata["model_devi"][0]["machine"]) or (len(mdata["model_devi"]) == 1): +# mdata["model_devi_machine"] = mdata["model_devi"][0]["machine"] +# mdata["model_devi_resources"] = mdata["model_devi"][0]["resources"] +# mdata["model_devi_command"] = mdata["model_devi"][0]["command"] +# #if "group_size" in mdata["train"][0]: +# mdata["model_devi_group_size"] = mdata["model_devi"][0].get("group_size", 1) +# continue_flag = True +# +# pd_count_list =[] +# pd_flag = False +# if not continue_flag: +# +# #assert isinstance(mdata['model_devi']['machine'], list) +# #ssert isinstance(mdata['model_devi']['resources'], list) +# #assert len(mdata['model_devi']['machine']) == len(mdata['model_devi']['resources']) +# +# for machine_idx in range(len(mdata['model_devi'])): +# temp_machine = mdata['model_devi'][machine_idx]['machine'] +# temp_resources = mdata['model_devi'][machine_idx]['resources'] +# #assert isinstance(temp_machine, dict), "unsupported type of model_devi machine [%d]!" %machine_idx +# #assert isinstance(temp_resources, dict), "unsupported type of model_devi resources [%d]!"%machine_idx +# #assert temp_machine['machine_type'] == 'slurm', "Currently only support for Slurm!" +# temp_ssh_sess = SSHSession(temp_machine) +# cwd = os.getcwd() +# temp_context = SSHContext(cwd, temp_ssh_sess) +# if temp_machine['machine_type'] == 'lsf': +# temp_batch = LSF(temp_context) +# else: +# temp_batch = Slurm(temp_context) +# # For other type of machines, please add them using 'elif'. +# # Here slurm is selected as the final choice in convinience. +# command = temp_batch._make_squeue(temp_machine, temp_resources) +# ret, stdin, stdout, stderr = temp_batch.context.block_call(command) +# pd_response = stdout.read().decode('utf-8').split("\n") +# pd_count = len(pd_response) +# temp_context.clean() +# if pd_count ==0: +# mdata['model_devi_machine'] = temp_machine +# mdata['model_devi_resources'] = temp_resources +# mdata['model_devi_command'] = mdata['model_devi'][machine_idx]['command'] +# mdata['model_devi_group_size'] = mdata['model_devi'][machine_idx].get('group_size', 1) +# pd_flag = True +# break +# else: +# pd_count_list.append(pd_count) +# if not pd_flag: +# min_machine_idx = np.argsort(pd_count_list)[0] +# mdata['model_devi_machine'] = mdata['model_devi'][min_machine_idx]['machine'] +# mdata['model_devi_resources'] = mdata['model_devi'][min_machine_idx]['resources'] +# mdata['model_devi_command'] = mdata['model_devi'][min_machine_idx]['command'] +# mdata['model_devi_group_size'] = mdata['model_devi'][min_machine_idx].get('group_size', 1) +# with open("record.machine","w") as _outfile: +# profile = {} +# profile['purpose'] = 'model_devi' +# profile['machine'] = mdata['model_devi_machine'] +# profile['resources'] = mdata['model_devi_resources'] +# profile['group_size'] = mdata['model_devi_group_size'] +# profile['command'] = mdata['model_devi_command'] +# +# json.dump(profile, _outfile, indent = 4) +# return mdata +# def decide_fp_machine(mdata): +# if LooseVersion(mdata.get('api_version', '0.9')) >= LooseVersion('1.0'): +# mdata['fp_group_size'] = mdata['fp'][0]['resources']['group_size'] +# if 'fp' in mdata: +# #ssert isinstance(mdata['fp']['machine'], list) +# #assert isinstance(mdata['fp']['resources'], list) +# #assert len(mdata['fp']['machine']) == len(mdata['fp']['resources']) +# continue_flag = False +# ## decide whether to use an existing machine +# if 'record.machine' in os.listdir(): +# try: +# with open('record.machine', 'r') as _infile: +# profile = json.load(_infile) +# if profile['purpose'] == 'fp': +# mdata['fp_machine'] = profile['machine'] +# mdata['fp_resources'] = profile['resources'] +# mdata['fp_command'] = profile['command'] +# mdata['fp_group_size'] = profile['group_size'] +# +# continue_flag = True +# except: +# pass +# if ("hostname" not in mdata["fp"][0]["machine"]) or (len(mdata["fp"]) == 1): +# mdata["fp_machine"] = mdata["fp"][0]["machine"] +# mdata["fp_resources"] = mdata["fp"][0]["resources"] +# mdata["fp_command"] = mdata["fp"][0]["command"] +# #if "group_size" in mdata["train"][0]: +# mdata["fp_group_size"] = mdata["fp"][0].get("group_size", 1) +# continue_flag = True +# +# +# pd_count_list =[] +# pd_flag = False +# if not continue_flag: +# for machine_idx in range(len(mdata['fp'])): +# temp_machine = mdata['fp'][machine_idx]['machine'] +# temp_resources = mdata['fp'][machine_idx]['resources'] +# temp_ssh_sess = SSHSession(temp_machine) +# cwd = os.getcwd() +# temp_context = SSHContext(cwd, temp_ssh_sess) +# if temp_machine['machine_type'] == 'lsf': +# temp_batch = LSF(temp_context) +# else: +# temp_batch = Slurm(temp_context) +# # For other type of machines, please add them using 'elif'. +# # Here slurm is selected as the final choice in convinience. +# command = temp_batch._make_squeue(temp_machine, temp_resources) +# ret, stdin, stdout, stderr = temp_batch.context.block_call(command) +# pd_response = stdout.read().decode('utf-8').split("\n") +# pd_count = len(pd_response) +# temp_context.clean() +# #dlog.info(temp_machine["username"] + " " + temp_machine["hostname"] + " " + str(pd_count)) +# if pd_count ==0: +# mdata['fp_machine'] = temp_machine +# mdata['fp_resources'] = temp_resources +# mdata['fp_command'] = mdata['fp'][machine_idx]['command'] +# mdata['fp_group_size'] = mdata['fp'][machine_idx].get('group_size', 1) +# pd_flag = True +# break +# else: +# pd_count_list.append(pd_count) +# if not pd_flag: +# min_machine_idx = np.argsort(pd_count_list)[0] +# mdata['fp_machine'] = mdata['fp'][min_machine_idx]['machine'] +# mdata['fp_resources'] = mdata['fp'][min_machine_idx]['resources'] +# mdata['fp_command'] = mdata['fp'][min_machine_idx]['command'] +# mdata['fp_group_size'] = mdata['fp'][min_machine_idx].get('group_size',1) +# +# with open("record.machine","w") as _outfile: +# profile = {} +# profile['purpose'] = 'fp' +# profile['machine'] = mdata['fp_machine'] +# profile['resources'] = mdata['fp_resources'] +# profile['group_size'] = mdata['fp_group_size'] +# profile['command'] = mdata['fp_command'] +# json.dump(profile, _outfile, indent = 4) +# return mdata diff --git a/dpgen/simplify/simplify.py b/dpgen/simplify/simplify.py index 9856dc58a..768d64835 100644 --- a/dpgen/simplify/simplify.py +++ b/dpgen/simplify/simplify.py @@ -22,12 +22,10 @@ from dpgen import dlog from dpgen import SHORT_CMD from dpgen.util import sepline -from dpgen.remote.decide_machine import decide_train_machine from dpgen.dispatcher.Dispatcher import Dispatcher, make_dispatcher from dpgen.generator.run import make_train, run_train, post_train, run_fp, post_fp, fp_name, model_devi_name, train_name, train_task_fmt, sys_link_fp_vasp_pp, make_fp_vasp_incar, make_fp_vasp_kp, make_fp_vasp_cp_cvasp, data_system_fmt, model_devi_task_fmt, fp_task_fmt # TODO: maybe the following functions can be moved to dpgen.util from dpgen.generator.lib.utils import log_iter, make_iter_name, create_path, record_iter -from dpgen.remote.decide_machine import decide_train_machine, decide_fp_machine, decide_model_devi_machine from dpgen.generator.lib.gaussian import make_gaussian_input @@ -603,7 +601,8 @@ def run_iter(param_file, machine_file): listener = logging.handlers.QueueListener(que, smtp_handler) dlog.addHandler(queue_handler) listener.start() - + + mdata = convert_mdata(mdata) max_tasks = 10000 numb_task = 9 record = "record.dpgen" @@ -638,7 +637,6 @@ def run_iter(param_file, machine_file): make_train(ii, jdata, mdata) elif jj == 1: log_iter("run_train", ii, jj) - mdata = decide_train_machine(mdata) #disp = make_dispatcher(mdata['train_machine']) run_train(ii, jdata, mdata) elif jj == 2: @@ -651,7 +649,6 @@ def run_iter(param_file, machine_file): break elif jj == 4: log_iter("run_model_devi", ii, jj) - mdata = decide_model_devi_machine(mdata) #disp = make_dispatcher(mdata['model_devi_machine']) run_model_devi(ii, jdata, mdata) elif jj == 5: @@ -665,7 +662,6 @@ def run_iter(param_file, machine_file): if jdata.get("labeled", False): dlog.info("already have labeled data, skip run_fp") else: - mdata = decide_fp_machine(mdata) #disp = make_dispatcher(mdata['fp_machine']) run_fp(ii, jdata, mdata) elif jj == 8: diff --git a/examples/CH4-refact-dpdispatcher/machine-ali-ehpc.json b/examples/CH4-refact-dpdispatcher/machine-ali-ehpc.json index 442ddb201..a90b04f35 100644 --- a/examples/CH4-refact-dpdispatcher/machine-ali-ehpc.json +++ b/examples/CH4-refact-dpdispatcher/machine-ali-ehpc.json @@ -46,7 +46,11 @@ "queue_name": "T4_4_15", "group_size": 5, "source_list": ["/home/fengbo/deepmd.1.2.4.env"] - } + }, + "_comments" : "In user_forward_files, define input files to be uploaded.", + "user_forward_files" : [], + "_comments" : "In user_backward_files, define output files to be collected.", + "user_backward_files" : ["HILLS"] } ], "fp":[ @@ -69,7 +73,11 @@ "queue_name": "G_32_128", "group_size": 1, "source_list": ["~/vasp.env"] - } + }, + "_comments" : "In user_forward_files, define input files to be uploaded.", + "user_forward_files" : ["vdw_kernel.bindat"], + "_comments" : "In user_backward_files, define output files to be collected.", + "user_backward_files" : [] } ] } diff --git a/examples/init/INCAR_methane.md b/examples/init/INCAR_methane.md index a0e3ca29b..9831387aa 100644 --- a/examples/init/INCAR_methane.md +++ b/examples/init/INCAR_methane.md @@ -1,21 +1,33 @@ PREC=A -ENCUT=400 +ENCUT=400.000000 ISYM=0 -ALGO=Fast -EDIFF=1.000000e-06 -LREAL=False +ALGO=fast +EDIFF=1E-6 +LREAL=F NPAR=4 KPAR=1 -NELM=120 -NELMIN=4 + +NELM=200 +ISTART=0 +ICHARG=2 ISIF=2 ISMEAR=0 -SIGMA=0.20000 +SIGMA=0.200000 IBRION=0 -POTIM=0.5 +MAXMIX=50 +NBLOCK=1 +KBLOCK=100 + +SMASS=0 +POTIM=2g +TEBEG=50 +TEEND=50 + NSW=10 + LWAVE=F LCHARG=F PSTRESS=0 + KSPACING=0.500000 -KGAMMA=.FALSE. +KGAMMA=F diff --git a/examples/machine/DeePMD-kit-1.x/machine-local.json b/examples/machine/DeePMD-kit-1.x/machine-local.json index 5c356baef..a266f712b 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-local.json +++ b/examples/machine/DeePMD-kit-1.x/machine-local.json @@ -13,7 +13,7 @@ "_comment": "model_devi on localhost ", - "lmp_command": "/home/wanghan/local/bin/lmp_mpi_010", + "model_devi_command": "/home/wanghan/local/bin/lmp_mpi_010", "model_devi_group_size": 5, "model_devi_machine": { "batch": "shell", diff --git a/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json b/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json index 25cb48349..6893471c5 100644 --- a/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json +++ b/examples/machine/DeePMD-kit-1.x/machine-pbs-gaussian.json @@ -27,7 +27,7 @@ "_comment": "model_devi on localhost ", - "lmp_command": "/gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2", + "model_devi_command": "/gpfs/home/tzhu/lammps-stable_5Jun2019/src/lmp_intel_cpu_intelmpi -pk intel 0 omp 2", "model_devi_group_size": 1, "model_devi_machine": { "machine_type": "lsf", diff --git a/examples/machine/deprecated/DeePMD-kit-0.12/machine-aws.json b/examples/machine/deprecated/DeePMD-kit-0.12/machine-aws.json index f4015b612..7d050b548 100644 --- a/examples/machine/deprecated/DeePMD-kit-0.12/machine-aws.json +++ b/examples/machine/deprecated/DeePMD-kit-0.12/machine-aws.json @@ -96,7 +96,7 @@ "with_mpi":true }, "deepmd_path": "/deepmd_root/", - "lmp_command":"/usr/bin/lmp_mpi", + "model_devi_command":"/usr/bin/lmp_mpi", "fp_command":"/usr/bin/vasp_std", "train_resources": {}, diff --git a/examples/machine/deprecated/DeePMD-kit-0.12/machine-local.json b/examples/machine/deprecated/DeePMD-kit-0.12/machine-local.json index 057db2722..b8e15a625 100644 --- a/examples/machine/deprecated/DeePMD-kit-0.12/machine-local.json +++ b/examples/machine/deprecated/DeePMD-kit-0.12/machine-local.json @@ -14,7 +14,7 @@ "_comment": "model_devi on localhost ", - "lmp_command": "/home/wanghan/local/bin/lmp_mpi_010", + "model_devi_command": "/home/wanghan/local/bin/lmp_mpi_010", "model_devi_group_size": 5, "model_devi_machine": { "batch": "shell", diff --git a/examples/machine/deprecated/machine-hnu.json b/examples/machine/deprecated/machine-hnu.json index 8b9ee8003..eb9cb91f2 100644 --- a/examples/machine/deprecated/machine-hnu.json +++ b/examples/machine/deprecated/machine-hnu.json @@ -21,7 +21,7 @@ "_comment": "that's all" }, - "lmp_command": "/home/llang/dp_v2/local/bin/lmp_mpi_0_12_0", + "model_devi_command": "/home/llang/dp_v2/local/bin/lmp_mpi_0_12_0", "model_devi_group_size": 10, "_comment": "model_devi on localhost ", "model_devi_machine": { diff --git a/examples/machine/deprecated/machine-tiger-pwscf-della.json b/examples/machine/deprecated/machine-tiger-pwscf-della.json index 7201947b1..44911f487 100644 --- a/examples/machine/deprecated/machine-tiger-pwscf-della.json +++ b/examples/machine/deprecated/machine-tiger-pwscf-della.json @@ -19,7 +19,7 @@ "_comment": "that's all" }, - "lmp_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", + "model_devi_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", "model_devi_group_size": 20, "_comment": "model_devi on localhost ", "model_devi_machine": { diff --git a/examples/machine/deprecated/machine-tiger-vasp-della.json b/examples/machine/deprecated/machine-tiger-vasp-della.json index 822788b8f..fa1fdf6e9 100644 --- a/examples/machine/deprecated/machine-tiger-vasp-della.json +++ b/examples/machine/deprecated/machine-tiger-vasp-della.json @@ -19,7 +19,7 @@ "_comment": "that's all" }, - "lmp_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", + "model_devi_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", "model_devi_group_size": 10, "_comment": "model_devi on localhost ", "model_devi_machine": { diff --git a/examples/machine/deprecated/machine-tiger.json b/examples/machine/deprecated/machine-tiger.json index b1400d76f..ccc1b573f 100644 --- a/examples/machine/deprecated/machine-tiger.json +++ b/examples/machine/deprecated/machine-tiger.json @@ -19,7 +19,7 @@ "_comment": "that's all" }, - "lmp_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", + "model_devi_command": "/home/linfengz/SCR/wanghan/local/bin/lmp_serial_0110_gpu", "model_devi_group_size": 20, "_comment": "model_devi on localhost ", "model_devi_machine": { diff --git a/examples/machine/deprecated/machine-ucloud.json b/examples/machine/deprecated/machine-ucloud.json index 963c250e9..52e9040c1 100644 --- a/examples/machine/deprecated/machine-ucloud.json +++ b/examples/machine/deprecated/machine-ucloud.json @@ -30,7 +30,7 @@ }, - "lmp_command": "/usr/bin/lmp_mpi", + "model_devi_command": "/usr/bin/lmp_mpi", "model_devi_group_size": 20, "model_devi_machine": { "machine_type": "ucloud", diff --git a/examples/run/dp1.x-gromacs-gaussian/param.json b/examples/run/deprecated/dp1.x-gromacs-gaussian/param.json similarity index 100% rename from examples/run/dp1.x-gromacs-gaussian/param.json rename to examples/run/deprecated/dp1.x-gromacs-gaussian/param.json diff --git a/examples/run/dp1.x-lammps-cp2k/methane/param-ch4.json b/examples/run/dp1.x-lammps-cp2k/methane/param-ch4.json index 273854d93..fec7fcf92 100644 --- a/examples/run/dp1.x-lammps-cp2k/methane/param-ch4.json +++ b/examples/run/dp1.x-lammps-cp2k/methane/param-ch4.json @@ -48,7 +48,7 @@ "seed": 1 }, "fitting_net": { - "n_neuron": [ + "neuron": [ 120, 120, 120 diff --git a/examples/run/dp2.x-gromacs-gaussian/machine.json b/examples/run/dp2.x-gromacs-gaussian/machine.json new file mode 100644 index 000000000..0f73b2277 --- /dev/null +++ b/examples/run/dp2.x-gromacs-gaussian/machine.json @@ -0,0 +1,69 @@ +{ + "deepmd_version" : "2.0", + "train": [ + { + "machine": { + "batch": "slurm", + "work_path": "/work/path" + }, + "resources": { + "numb_node": 1, + "numb_gpu": 1, + "partition": "all", + "time_limit": "120:0:0", + "task_per_node": 8, + "exclude_list": [], + "module_list": [], + "source_list": ["/path/to/dp-2.0.env"] + }, + "command": "dp" + } + ], + "model_devi": [ + { + "machine": { + "batch": "slurm", + "work_path": "/work/path" + }, + "resources": { + "numb_node": 1, + "numb_gpu": 1, + "partition": "all", + "time_limit": "120:0:0", + "task_per_node": 8, + "source_list": [ + "/path/to/gromacs-dp/env" + ], + "module_list": [], + "exclude_list": [], + "envs": { + "GMX_DEEPMD_INPUT_JSON": "input.json" + } + }, + "command": "gmx_mpi", + "group_size": 1 + } + ], + "fp": [ + { + "machine": { + "batch": "slurm", + "work_path": "/work/path" + }, + "resources": { + "numb_node": 1, + "numb_gpu": 0, + "time_limit": "120:0:0", + "task_per_node": 28, + "partition": "cpu", + "exclude_list": [], + "source_list": [ + "/path/to/gaussian/bashrc" + ], + "module_list": [] + }, + "command": "g16 < input", + "group_size": 20 + } + ] +} diff --git a/examples/run/dp2.x-gromacs-gaussian/param.json b/examples/run/dp2.x-gromacs-gaussian/param.json new file mode 100644 index 000000000..30557d307 --- /dev/null +++ b/examples/run/dp2.x-gromacs-gaussian/param.json @@ -0,0 +1,160 @@ +{ + "type_map": ["H", "C", "N", "O", "F", "S", "Cl"], + "mass_map": [2, 12, 14, 16, 17, 32, 35], + "init_data_prefix": "/path/to/init/data", + "init_data_sys": ["data.init"], + "init_multi_systems": true, + "sys_configs_prefix": "/path/to/model/devi/data", + "sys_configs": [ + ["model_devi/CHEMBL3402749_500"], + ["model_devi/CHEMBL3402741_400"], + ["model_devi/CHEMBL3402748_5300"], + ["model_devi/CHEMBL3402743_42"], + ["model_devi/CHEMBL3402761_1"], + ["model_devi/CHEMBL3402756_2.7"], + ["model_devi/CHEMBL3402750_400"], + ["model_devi/CHEMBL3402764_90"], + ["model_devi/CHEMBL3402758_10"], + ["model_devi/CHEMBL3402754_40"], + ["model_devi/CHEMBL3402747_3400"], + ["model_devi/CHEMBL3402762_1"], + ["model_devi/CHEMBL3402744_300"], + ["model_devi/CHEMBL3402752_30000"], + ["model_devi/CHEMBL3402742_23"], + ["model_devi/CHEMBL3402759_5.7"], + ["model_devi/CHEMBL3402745_200"], + ["model_devi/CHEMBL3402757_6.5"], + ["model_devi/CHEMBL3402755_4200"], + ["model_devi/CHEMBL3402751_2100"], + ["model_devi/CHEMBL3402753_200"], + ["model_devi/CHEMBL3402763_90"], + ["model_devi/CHEMBL3402765_11-charged-pKa-8.1"], + ["model_devi/CHEMBL3402760_1"] + ], + "sys_charges": [ + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 1, + 0, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 1, + 0, + 0, + 0, + 1, + 1, + 1 + ], + "sys_format": "gromacs/gro", + "numb_models": 4, + "training_reuse_iter": 1, + "training_reuse_old_ratio": 0.3, + "training_reuse_start_lr": 0.0001, + "training_reuse_decay_steps": 500, + "training_reuse_numb_steps": 200000, + "training_reuse_start_pref_e": 1, + "training_reuse_start_pref_f": 100, + "train_param": "input.json", + "default_training_param": { + "model": { + "type_map": ["H", "C", "N", "O", "F", "S", "Cl"], + "descriptor": { + "type": "se_e2_a", + "sel": [40, 48, 48, 48, 48, 48, 48], + "rcut_smth": 0.50, + "rcut": 6.00, + "neuron": [25, 50, 100], + "resnet_dt": false, + "axis_neuron": 16, + "type_one_side": true + }, + "fitting_net": { + "neuron": [240, 240, 240], + "resnet_dt": true, + "type": "ener" + } + }, + "learning_rate": { + "type": "exp", + "start_lr": 0.001, + "decay_steps": 5000 + }, + "loss": { + "start_pref_e": 0.02, + "limit_pref_e": 8, + "start_pref_f": 1000, + "limit_pref_f": 1, + "start_pref_v": 0, + "limit_pref_v": 0 + }, + "training": { + "numb_steps": 1000000, + "disp_file": "lcurve.out", + "disp_freq": 1000, + "numb_test": 1, + "save_freq": 1000, + "save_ckpt": "model.ckpt" + } + }, + "model_devi_engine": "gromacs", + "gromacs_settings": { + "mdp_filename": "md.mdp", + "topol_filename": "processed.top", + "conf_filename": "npt.gro", + "index_filename": "index.raw", + "ref_filename": "em.tpr", + "model_devi_script": "model_devi.py", + "traj_filename": "deepmd_traj.gro", + "group_name": "Other" + }, + "model_devi_dt": 0.001, + "model_devi_f_trust_lo": 0.20, + "model_devi_f_trust_hi": 0.60, + "model_devi_e_trust_lo": 1e10, + "model_devi_e_trust_hi": 1e10, + "model_devi_clean_traj": false, + "model_devi_skip": 0, + "model_devi_nopbc": true, + "model_devi_activation_func": [ + ["tanh", "tanh"], + ["tanh", "tanh"], + ["tanh", "tanh"], + ["tanh", "tanh"] + ], + "model_devi_jobs": [ + { + "_idx": 0, + "ensemble": "nvt", + "nsteps": 100, + "press": [], + "sys_idx": [ 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23], + "temps": [], + "trj_freq": 1 + } + ], + "fp_style": "gaussian", + "shuffle_poscar": false, + "fp_task_max": 50, + "fp_task_min": 1, + "fp_pp_path": "./", + "fp_pp_files": [], + "fp_params": { + "keywords": "force m062x/6-31** nosymm", + "nproc": 28, + "multiplicity": 1 + } +} diff --git a/tests/generator/context.py b/tests/generator/context.py index c79920396..a943bc895 100644 --- a/tests/generator/context.py +++ b/tests/generator/context.py @@ -4,6 +4,7 @@ from dpgen.generator.run import * from dpgen.generator.lib.gaussian import detect_multiplicity from dpgen.generator.lib.ele_temp import NBandsEsti +from dpgen.generator.lib.lammps import get_dumped_forces param_file = 'param-mg-vasp.json' param_file_v1 = 'param-mg-vasp-v1.json' diff --git a/tests/generator/machine-local-v1.json b/tests/generator/machine-local-v1.json index 7079678e8..2218884f2 100644 --- a/tests/generator/machine-local-v1.json +++ b/tests/generator/machine-local-v1.json @@ -28,7 +28,7 @@ "source_list": [], "_comment": "that's All" }, - "lmp_command": "/home/wanghan/local/bin/lmp_mpi_1_1_0", + "model_devi_command": "/home/wanghan/local/bin/lmp_mpi_1_1_0", "model_devi_group_size": 10, "fp_machine": { diff --git a/tests/generator/machine-local.json b/tests/generator/machine-local.json index 05a0f2811..a4743c964 100644 --- a/tests/generator/machine-local.json +++ b/tests/generator/machine-local.json @@ -18,7 +18,7 @@ "_comment": "model_devi on localhost ", - "lmp_command": "/home/wanghan/local/bin/lmp_mpi_010", + "model_devi_command": "/home/wanghan/local/bin/lmp_mpi_010", "model_devi_group_size": 5, "model_devi_machine": { "machine_type": "local", @@ -49,6 +49,6 @@ "with_mpi": true, "_comment": "that's all" }, - + "fp_user_forward_files" : ["vdw_kernel.bindat"], "_comment": " that's all " } diff --git a/tests/generator/test_gromacs_engine.py b/tests/generator/test_gromacs_engine.py index cfbeaa997..fe7da360e 100644 --- a/tests/generator/test_gromacs_engine.py +++ b/tests/generator/test_gromacs_engine.py @@ -91,7 +91,7 @@ def _copy_outputs(self, path_1, path_2): def test_make_model_devi_gromacs(self): flag = make_model_devi(iter_index=0, jdata=self.jdata, - mdata={}) + mdata={"deepmd_version": "2.0"}) self.assertTrue(flag) self.assertTrue(os.path.exists(self.model_devi_path)) self.assertTrue(os.path.exists(self.model_devi_task_path)) @@ -108,7 +108,7 @@ def test_make_fp_gaussian(self): def tearDown(self): - #pass + # pass shutil.rmtree(self.iter_path) if __name__ == '__main__': unittest.main() diff --git a/tests/generator/test_lammps.py b/tests/generator/test_lammps.py new file mode 100644 index 000000000..b1dcc55a4 --- /dev/null +++ b/tests/generator/test_lammps.py @@ -0,0 +1,38 @@ +import os,sys,json,glob,shutil,textwrap +import dpdata +import numpy as np +import unittest + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +__package__ = 'generator' +from .context import get_dumped_forces + +class TestGetDumpForce(unittest.TestCase): + def setUp(self): + file_content = textwrap.dedent("""\ +ITEM: TIMESTEP +40 +ITEM: NUMBER OF ATOMS +2 +ITEM: BOX BOUNDS xy xz yz pp pp pp +-2.9180686220264818e-04 8.0855380329747089e+00 1.4011011277606830e-07 +-2.9198257591541018e-04 8.0855378881632269e+00 3.3202396460852749e-08 +-2.9180686326490957e-04 8.0855378891632768e+00 -1.7571268247505500e-07 +ITEM: ATOMS id type x y z fx fy fz +1 1 2.09532 8.19528 2.00538 -0.00569269 -0.0200373 -0.0342394 +2 1 -0.0727384 4.01773 4.05582 -0.0297083 0.0817184 0.0722508 +""") + with open('tmp.dump', 'w') as fp: + fp.write(file_content) + self.expected_f = [ -0.00569269, -0.0200373, -0.0342394, -0.0297083, 0.0817184, 0.0722508] + + def tearDown(self): + if os.path.isfile('tmp.dump'): + os.remove('tmp.dump') + + def test_read_dump(self): + ff = get_dumped_forces('tmp.dump') + self.assertEqual(ff.shape, (2, 3)) + ff = ff.reshape([-1]) + for ii in range(6): + self.assertAlmostEqual(ff[ii], self.expected_f[ii]) diff --git a/tests/generator/test_make_fp.py b/tests/generator/test_make_fp.py index 09ac5aede..914c9b149 100644 --- a/tests/generator/test_make_fp.py +++ b/tests/generator/test_make_fp.py @@ -481,6 +481,15 @@ def _check_pwmat_input(testCase, idx): testCase.assertEqual(lines.strip(), pwmat_input_ref.strip()) os.chdir(cwd) +def _check_symlink_user_forward_files(testCase, idx, file): + fp_path = os.path.join('iter.%06d' % idx, '02.fp') + tasks = glob.glob(os.path.join(fp_path, 'task.*')) + cwd = os.getcwd() + for ii in tasks: + os.chdir(ii) + testCase.assertEqual(os.path.isfile("vdw_kernel.bindat"), True) + os.chdir(cwd) + class TestMakeFPPwscf(unittest.TestCase): def test_make_fp_pwscf(self): setUpModule() @@ -614,7 +623,7 @@ def test_make_fp_vasp(self): atom_types = [0, 1, 0, 1] type_map = jdata['type_map'] _make_fake_md(0, md_descript, atom_types, type_map) - make_fp(0, jdata, {}) + make_fp(0, jdata, {"fp_user_forward_files" : ["vdw_kernel.bindat"] }) _check_sel(self, 0, jdata['fp_task_max'], jdata['model_devi_f_trust_lo'], jdata['model_devi_f_trust_hi']) _check_poscars(self, 0, jdata['fp_task_max'], jdata['type_map']) # _check_incar_exists(self, 0) @@ -755,7 +764,7 @@ def test_make_fp_vasp_ele_temp(self): # checked elsewhere # _check_potcar(self, 0, jdata['fp_pp_path'], jdata['fp_pp_files']) shutil.rmtree('iter.000000') - + class TestMakeFPGaussian(unittest.TestCase): def make_fp_gaussian(self, multiplicity="auto"): diff --git a/tests/generator/vdw_kernel.bindat b/tests/generator/vdw_kernel.bindat new file mode 100644 index 000000000..e69de29bb diff --git a/tests/tools/context.py b/tests/tools/context.py index d4e70a8c5..1d3510786 100644 --- a/tests/tools/context.py +++ b/tests/tools/context.py @@ -8,3 +8,5 @@ def my_file_cmp(test, f0, f1): with open(f1) as fp1: test.assertTrue(fp0.read() == fp1.read()) +def setUpModule(): + os.chdir(os.path.abspath(os.path.dirname(__file__))) diff --git a/tests/tools/machine_fp_single.json b/tests/tools/machine_fp_single.json new file mode 100644 index 000000000..f998388eb --- /dev/null +++ b/tests/tools/machine_fp_single.json @@ -0,0 +1,15 @@ +{ + "fp":[ + { + "command": "vasp_std", + "machine":{ + "batch_type": "PBS" + }, + "resources": { + "group_size" : 8 + }, + "_comments" : "In user_forward_files, define input files to be uploaded.", + "user_forward_files" : ["vdw_kernel.bindat"] + } + ] +} \ No newline at end of file diff --git a/tests/tools/test_convert_mdata.py b/tests/tools/test_convert_mdata.py new file mode 100644 index 000000000..5458b0faa --- /dev/null +++ b/tests/tools/test_convert_mdata.py @@ -0,0 +1,17 @@ +import os,sys,json +import unittest + +test_dir = os.path.abspath(os.path.join(os.path.dirname(__file__))) +sys.path.insert(0, os.path.join(test_dir, '..')) +__package__ = 'tools' +from dpgen.remote.decide_machine import convert_mdata +from .context import setUpModule +machine_file = 'machine_fp_single.json' +class TestConvertMdata(unittest.TestCase): + def test_convert_mdata (self): + mdata = json.load(open(machine_file)) + mdata = convert_mdata(mdata, ["fp"]) + self.assertEqual(mdata["fp_command"], "vasp_std") + self.assertEqual(mdata["fp_group_size"], 8) + self.assertEqual(mdata["fp_machine"]["batch_type"], "PBS") + self.assertEqual(mdata["fp_user_forward_files"], ["vdw_kernel.bindat"])