Skip to content

Commit

Permalink
support no pbc calculation (#104)
Browse files Browse the repository at this point in the history
This PR introduces the changes:
- support the `remove_pbc` option in the `FileConfGenerator` (input
confs for exploreation)
- support removing the pbc from the selected configurations. (output
confs from exploration for fp calculations)
- supply training input template rather than writing it in the dpgen
input script.
- fix bugs in gaussian support (args claim does not match interface)

Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
  • Loading branch information
wanghan-iapcm and Han Wang committed Dec 25, 2022
1 parent c4acd0b commit 67c19d6
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 99 deletions.
9 changes: 8 additions & 1 deletion dpgen2/conf/file_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(
files : Union[str,List[str]],
fmt : str = 'auto',
prefix : Optional[str] = None,
remove_pbc : Optional[bool] = False,
):
if not isinstance(files, list):
assert(isinstance(files, str))
Expand All @@ -32,6 +33,7 @@ def __init__(
ff.sort()
self.files += ff
self.fmt = fmt
self.remove_pbc = remove_pbc


def generate(
Expand All @@ -40,7 +42,10 @@ def generate(
) -> dpdata.MultiSystems:
ms = dpdata.MultiSystems(type_map=type_map)
for ff in self.files:
ms.append(dpdata.System(ff, fmt=self.fmt))
ss = dpdata.System(ff, fmt=self.fmt, type_map=type_map)
if self.remove_pbc:
ss.remove_pbc()
ms.append(ss)
return ms


Expand All @@ -49,10 +54,12 @@ def args() -> List[Argument]:
doc_files = "The paths to the configuration files. widecards are supported."
doc_prefix = "The prefix of file paths."
doc_fmt = "The format (dpdata accepted formats) of the files."
doc_remove_pbc = "The remove the pbc of the data. shift the coords to the center of box so it can be used with lammps."

return [
Argument("files", [str, list], optional=False, doc=doc_files),
Argument("prefix", str, optional=True, default=None, doc=doc_prefix),
Argument("fmt", str, optional=True, default='auto', doc=doc_fmt),
Argument("remove_pbc", bool, optional=True, default=False, doc=doc_remove_pbc),
]

6 changes: 4 additions & 2 deletions dpgen2/entrypoint/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
def dp_train_args():
doc_numb_models = "Number of models trained for evaluating the model deviation"
doc_config = "Configuration of training"
doc_template_script = "Template training script. It can be a `List[Dict]`, the length of which is the same as `numb_models`. Each template script in the list is used to train a model. Can be a `Dict`, the models share the same template training script. "
doc_template_script = "File names of the template training script. It can be a `List[Dict]`, the length of which is the same as `numb_models`. Each template script in the list is used to train a model. Can be a `Dict`, the models share the same template training script. "

return [
Argument("config", dict, RunDPTrain.training_args(), optional=True, default=RunDPTrain.normalize_config({}), doc=doc_numb_models),
Argument("numb_models", int, optional=True, default=4, doc=doc_numb_models),
Argument("template_script", [list,dict], optional=False, doc=doc_template_script),
Argument("template_script", [list,str], optional=False, doc=doc_template_script),
]

def variant_train():
Expand All @@ -41,6 +41,7 @@ def lmp_args():
doc_f_trust_hi = "Higher trust level of force model deviation"
doc_v_trust_lo = "Lower trust level of virial model deviation"
doc_v_trust_hi = "Higher trust level of virial model deviation"
doc_output_nopbc = "Remove pbc of the output configurations"
doc_configuration_prefix = "The path prefix of lmp initial configurations"
doc_configuration = "A list of initial configurations."
doc_stages = "A list of exploration stages."
Expand All @@ -54,6 +55,7 @@ def lmp_args():
Argument("f_trust_hi", float, optional=False, doc=doc_f_trust_hi),
Argument("v_trust_lo", float, optional=True, default=None, doc=doc_v_trust_lo),
Argument("v_trust_hi", float, optional=True, default=None, doc=doc_v_trust_hi),
Argument("output_nopbc", bool, optional=True, default=False, doc=doc_output_nopbc),
Argument("configuration_prefix", str, optional=True, default=None, doc=doc_configuration_prefix),
Argument("configurations", list,
[], [variant_conf()],
Expand Down
8 changes: 7 additions & 1 deletion dpgen2/entrypoint/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ def make_naive_exploration_scheduler(
fp_task_max = config['fp_task_max'] if old_style else config['fp']['task_max']
conv_accuracy = config['conv_accuracy'] if old_style else config['explore']['conv_accuracy']
max_numb_iter = config['max_numb_iter'] if old_style else config['explore']['max_numb_iter']
output_nopbc = False if old_style else config['explore']['output_nopbc']
fatal_at_max = config.get('fatal_at_max', True) if old_style else config['explore']['fatal_at_max']
scheduler = ExplorationScheduler()

Expand Down Expand Up @@ -237,6 +238,7 @@ def make_naive_exploration_scheduler(
selector = ConfSelectorLammpsFrames(
trust_level,
fp_task_max,
nopbc=output_nopbc,
)
# stage_scheduler
stage_scheduler = ConvergenceCheckStageScheduler(
Expand Down Expand Up @@ -318,7 +320,11 @@ def workflow_concurrent_learning(

type_map = config['type_map'] if old_style else config['inputs']['type_map']
numb_models = config['numb_models'] if old_style else config['train']['numb_models']
template_script = config['default_training_param'] if old_style else config['train']['template_script']
template_script_ = config['default_training_param'] if old_style else config['train']['template_script']
if isinstance(template_script_, list):
template_script = [Path(ii).read_text() for ii in template_script_]
else:
template_script = Path(template_script_).read_text()
train_config = {} if old_style else config['train']['config']
lmp_config = config.get('lmp_config', {}) if old_style else config['explore']['config']
fp_config = config.get('fp_config', {}) if old_style else {}
Expand Down
3 changes: 3 additions & 0 deletions dpgen2/exploration/selector/conf_selector_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ def __init__(
trust_level,
max_numb_sel : Optional[int] = None,
conf_filters : Optional[ConfFilters] = None,
nopbc: Optional[bool] = False,
):
self.trust_level = trust_level
self.max_numb_sel = max_numb_sel
self.conf_filters = conf_filters
self.report = TrajsExplorationReport()
self.nopbc = nopbc

def select (
self,
Expand Down Expand Up @@ -83,6 +85,7 @@ def select (
for ii in range(ntraj):
if len(id_cand_list[ii]) > 0:
ss = dpdata.System(trajs[ii], fmt=traj_fmt, type_map=type_map)
ss.nopbc = self.nopbc
ss = ss.sub_system(id_cand_list[ii])
ms.append(ss)

Expand Down
5 changes: 3 additions & 2 deletions dpgen2/fp/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,15 @@ def optional_input_files(self) -> List[str]:
def run_task(
self,
command : str,
out_name: str,
out: str,
) -> Tuple[str, str]:
r"""Defines how one FP task runs
Parameters
----------
command: str
The command of running gaussian task
out_name: str
out: str
The name of the output data file.
Returns
Expand All @@ -126,6 +126,7 @@ def run_task(
The file name of the log.
"""
# run gaussian
out_name = out
command = ' '.join([command, gaussian_input_name])
ret, out, err = run_command(command, shell=True)
if ret != 0:
Expand Down
45 changes: 45 additions & 0 deletions examples/almg/dp_template.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"model" : {
"type_map": ["Al", "Mg"],
"descriptor": {
"type": "se_a",
"sel": [90, 90],
"rcut_smth": 1.80,
"rcut": 6.00,
"neuron": [25, 50, 100],
"resnet_dt": false,
"axis_neuron": 4,
"seed": 1
},
"fitting_net" : {
"neuron": [128, 128, 128],
"resnet_dt": true,
"seed": 1
}
},
"loss" : {
"start_pref_e": 0.02,
"limit_pref_e": 1,
"start_pref_f": 1000,
"limit_pref_f": 1,
"start_pref_v": 0,
"limit_pref_v": 0
},
"learning_rate" : {
"start_lr": 0.001,
"stop_lr": 1e-8,
"decay_steps": 100
},
"training" : {
"training_data": {
"systems": [],
"batch_size":"auto"
},
"numb_steps":1000,
"seed":10,
"disp_file":"lcurve.out",
"disp_freq":100,
"save_freq":1000
},
"_comment" : "all"
}
44 changes: 1 addition & 43 deletions examples/almg/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -105,49 +105,6 @@
"numb_models" : 4,
"config" : {},
"template_script" : {
"model" : {
"type_map": ["Al", "Mg"],
"descriptor": {
"type": "se_a",
"sel": [90, 90],
"rcut_smth": 1.80,
"rcut": 6.00,
"neuron": [25, 50, 100],
"resnet_dt": false,
"axis_neuron": 4,
"seed": 1
},
"fitting_net" : {
"neuron": [128, 128, 128],
"resnet_dt": true,
"seed": 1
}
},
"loss" : {
"start_pref_e": 0.02,
"limit_pref_e": 1,
"start_pref_f": 1000,
"limit_pref_f": 1,
"start_pref_v": 0,
"limit_pref_v": 0
},
"learning_rate" : {
"start_lr": 0.001,
"stop_lr": 1e-8,
"decay_steps": 100
},
"training" : {
"training_data": {
"systems": [],
"batch_size":"auto"
},
"numb_steps":1000,
"seed":10,
"disp_file":"lcurve.out",
"disp_freq":100,
"save_freq":1000
},
"_comment" : "all"
},
"_comment" : "all"
},
Expand All @@ -162,6 +119,7 @@
"fatal_at_max" : false,
"f_trust_lo": 0.05,
"f_trust_hi": 0.50,
"output_nopbc": false,
"configuration_prefix": null,
"configurations": [
{
Expand Down
2 changes: 2 additions & 0 deletions tests/conf/test_file_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ def test_normalize(self):
"files" : "foo",
"fmt" : 'auto',
"prefix" : None,
"remove_pbc" : False,
}
out_data = FileConfGenerator.normalize_config(
in_data,
Expand All @@ -141,6 +142,7 @@ def test_normalize_1(self):
"files" : ["foo"],
"fmt" : "bar",
"prefix" : None,
"remove_pbc" : False,
}
out_data = FileConfGenerator.normalize_config(
in_data,
Expand Down
54 changes: 4 additions & 50 deletions tests/entrypoint/test_submit_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def test(self):
self.assertEqual(old_data.get('upload_python_packages', None), new_data['upload_python_packages'])
self.assertEqual(old_data['type_map'], new_data['inputs']['type_map'])
self.assertEqual(old_data['numb_models'], new_data['train']['numb_models'])
self.assertEqual(old_data['default_training_param'], new_data['train']['template_script'])
# self.assertEqual(old_data['default_training_param'], new_data['train']['template_script'])
self.assertEqual(new_data['train']['template_script'], "dp_input_template")
self.assertEqual(RunDPTrain.normalize_config({}), new_data['train']['config'])
self.assertEqual(old_data.get('lmp_config', {}), new_data['explore']['config'])
self.assertEqual(old_data.get('fp_config', {}), new_data['fp']['run_config'])
Expand Down Expand Up @@ -394,54 +395,8 @@ def test_bohrium(self):
"type" : "dp",
"numb_models" : 4,
"config" : {},
"template_script" : {
"model" : {
"type_map": ["Al", "Mg"],
"descriptor": {
"type": "se_a",
"sel": [90, 90],
"rcut_smth": 1.80,
"rcut": 6.00,
"neuron": [25, 50, 100],
"resnet_dt": false,
"axis_neuron": 4,
"seed": 1
},
"fitting_net" : {
"neuron": [128, 128, 128],
"resnet_dt": true,
"seed": 1
}
},
"loss" : {
"start_pref_e": 0.02,
"limit_pref_e": 1,
"start_pref_f": 1000,
"limit_pref_f": 1,
"start_pref_v": 0,
"limit_pref_v": 0
},
"learning_rate" : {
"start_lr": 0.001,
"stop_lr": 1e-8,
"decay_steps": 100
},
"training" : {
"training_data": {
"systems": [],
"batch_size":"auto"
},
"numb_steps":1000,
"seed":10,
"disp_file":"lcurve.out",
"disp_freq":100,
"save_freq":1000
},
"_comment" : "all"
},
"_comment" : "all"
"template_script" : "dp_input_template"
},
"explore" : {
"type" : "lmp",
"config" : {
Expand Down Expand Up @@ -522,8 +477,7 @@ def test_bohrium(self):
"type" : "dp",
"numb_models" : 4,
"config" : {},
"template_script" : {
},
"template_script" : "dp_input_template",
"_comment" : "all"
},
Expand Down

0 comments on commit 67c19d6

Please sign in to comment.