support no pbc calculation (#104)

This PR introduces the changes: - support the `remove_pbc` option in the `FileConfGenerator` (input confs for exploreation) - support removing the pbc from the selected configurations. (output confs from exploration for fp calculations) - supply training input template rather than writing it in the dpgen input script. - fix bugs in gaussian support (args claim does not match interface) Co-authored-by: Han Wang <wang_han@iapcm.ac.cn>
deepmodeling · Dec 25, 2022 · 67c19d6 · 67c19d6
1 parent c4acd0b
commit 67c19d6
Show file tree

Hide file tree

Showing 9 changed files with 77 additions and 99 deletions.
diff --git a/dpgen2/conf/file_conf.py b/dpgen2/conf/file_conf.py
@@ -18,6 +18,7 @@ def __init__(
             files : Union[str,List[str]],
             fmt : str = 'auto',
             prefix : Optional[str] = None,
+            remove_pbc : Optional[bool] = False,
     ):
         if not isinstance(files, list):
             assert(isinstance(files, str))
@@ -32,6 +33,7 @@ def __init__(
             ff.sort()
             self.files += ff
         self.fmt = fmt
+        self.remove_pbc = remove_pbc
 
 
     def generate(
@@ -40,7 +42,10 @@ def generate(
     ) -> dpdata.MultiSystems:
         ms = dpdata.MultiSystems(type_map=type_map)
         for ff in self.files:
-            ms.append(dpdata.System(ff, fmt=self.fmt))
+            ss = dpdata.System(ff, fmt=self.fmt, type_map=type_map)
+            if self.remove_pbc:
+                ss.remove_pbc()
+            ms.append(ss)
         return ms
 
 
@@ -49,10 +54,12 @@ def args() -> List[Argument]:
         doc_files = "The paths to the configuration files. widecards are supported."
         doc_prefix = "The prefix of file paths."
         doc_fmt = "The format (dpdata accepted formats) of the files."
+        doc_remove_pbc = "The remove the pbc of the data. shift the coords to the center of box so it can be used with lammps."
 
         return [
             Argument("files", [str, list], optional=False, doc=doc_files),
             Argument("prefix", str, optional=True, default=None, doc=doc_prefix),
             Argument("fmt", str, optional=True, default='auto', doc=doc_fmt),
+            Argument("remove_pbc", bool, optional=True, default=False, doc=doc_remove_pbc),
         ]
 
diff --git a/dpgen2/entrypoint/args.py b/dpgen2/entrypoint/args.py
@@ -18,12 +18,12 @@
 def dp_train_args():
     doc_numb_models = "Number of models trained for evaluating the model deviation"
     doc_config = "Configuration of training"
-    doc_template_script = "Template training script. It can be a `List[Dict]`, the length of which is the same as `numb_models`. Each template script in the list is used to train a model. Can be a `Dict`, the models share the same template training script. "
+    doc_template_script = "File names of the template training script. It can be a `List[Dict]`, the length of which is the same as `numb_models`. Each template script in the list is used to train a model. Can be a `Dict`, the models share the same template training script. "
 
     return [
         Argument("config", dict, RunDPTrain.training_args(), optional=True, default=RunDPTrain.normalize_config({}), doc=doc_numb_models),
         Argument("numb_models", int, optional=True, default=4, doc=doc_numb_models),
-        Argument("template_script", [list,dict], optional=False, doc=doc_template_script),
+        Argument("template_script", [list,str], optional=False, doc=doc_template_script),
     ]
 
 def variant_train():
@@ -41,6 +41,7 @@ def lmp_args():
     doc_f_trust_hi = "Higher trust level of force model deviation"
     doc_v_trust_lo = "Lower trust level of virial model deviation"
     doc_v_trust_hi = "Higher trust level of virial model deviation"
+    doc_output_nopbc = "Remove pbc of the output configurations"
     doc_configuration_prefix = "The path prefix of lmp initial configurations"
     doc_configuration = "A list of initial configurations."
     doc_stages = "A list of exploration stages."
@@ -54,6 +55,7 @@ def lmp_args():
         Argument("f_trust_hi", float, optional=False, doc=doc_f_trust_hi),
         Argument("v_trust_lo", float, optional=True, default=None, doc=doc_v_trust_lo),
         Argument("v_trust_hi", float, optional=True, default=None, doc=doc_v_trust_hi),
+        Argument("output_nopbc", bool, optional=True, default=False, doc=doc_output_nopbc),
         Argument("configuration_prefix", str, optional=True, default=None, doc=doc_configuration_prefix),
         Argument("configurations", list, 
                  [], [variant_conf()], 

diff --git a/dpgen2/entrypoint/submit.py b/dpgen2/entrypoint/submit.py
@@ -190,6 +190,7 @@ def make_naive_exploration_scheduler(
     fp_task_max = config['fp_task_max'] if old_style else config['fp']['task_max']
     conv_accuracy = config['conv_accuracy'] if old_style else config['explore']['conv_accuracy']
     max_numb_iter = config['max_numb_iter'] if old_style else config['explore']['max_numb_iter']
+    output_nopbc = False if old_style else config['explore']['output_nopbc']
     fatal_at_max = config.get('fatal_at_max', True) if old_style else config['explore']['fatal_at_max']
     scheduler = ExplorationScheduler()
 
@@ -237,6 +238,7 @@ def make_naive_exploration_scheduler(
         selector = ConfSelectorLammpsFrames(
             trust_level,
             fp_task_max,
+            nopbc=output_nopbc,
         )
         # stage_scheduler
         stage_scheduler = ConvergenceCheckStageScheduler(
@@ -318,7 +320,11 @@ def workflow_concurrent_learning(
 
     type_map = config['type_map'] if old_style else config['inputs']['type_map']
     numb_models = config['numb_models'] if old_style else config['train']['numb_models']
-    template_script = config['default_training_param'] if old_style else config['train']['template_script']
+    template_script_ = config['default_training_param'] if old_style else config['train']['template_script']
+    if isinstance(template_script_, list):
+        template_script = [Path(ii).read_text() for ii in template_script_]
+    else:
+        template_script = Path(template_script_).read_text()
     train_config = {} if old_style else config['train']['config']
     lmp_config = config.get('lmp_config', {}) if old_style else config['explore']['config']
     fp_config = config.get('fp_config', {}) if old_style else {}

diff --git a/dpgen2/exploration/selector/conf_selector_frame.py b/dpgen2/exploration/selector/conf_selector_frame.py
@@ -28,11 +28,13 @@ def __init__(
             trust_level,
             max_numb_sel : Optional[int] = None,
             conf_filters : Optional[ConfFilters] = None,
+            nopbc: Optional[bool] = False,
     ):
         self.trust_level = trust_level
         self.max_numb_sel = max_numb_sel
         self.conf_filters = conf_filters
         self.report = TrajsExplorationReport()
+        self.nopbc = nopbc
 
     def select (
             self,
@@ -83,6 +85,7 @@ def select (
         for ii in range(ntraj):
             if len(id_cand_list[ii]) > 0:
                 ss = dpdata.System(trajs[ii], fmt=traj_fmt, type_map=type_map)
+                ss.nopbc = self.nopbc
                 ss = ss.sub_system(id_cand_list[ii])        
                 ms.append(ss)
 

diff --git a/dpgen2/fp/gaussian.py b/dpgen2/fp/gaussian.py
@@ -107,15 +107,15 @@ def optional_input_files(self) -> List[str]:
     def run_task(
             self,
             command : str,
-            out_name: str,
+            out: str,
     ) -> Tuple[str, str]:
         r"""Defines how one FP task runs
         
         Parameters
         ----------
         command: str
             The command of running gaussian task
-        out_name: str
+        out: str
             The name of the output data file.
 
         Returns
@@ -126,6 +126,7 @@ def run_task(
             The file name of the log.
         """
         # run gaussian
+        out_name = out
         command = ' '.join([command, gaussian_input_name])
         ret, out, err = run_command(command, shell=True)
         if ret != 0:

diff --git a/examples/almg/dp_template.json b/examples/almg/dp_template.json
@@ -0,0 +1,45 @@
+{
+    "model" : {
+	"type_map":		["Al", "Mg"],
+	"descriptor": {
+	    "type":		"se_a",
+	    "sel":		[90, 90],
+	    "rcut_smth":	1.80,
+	    "rcut":		6.00,
+	    "neuron":	[25, 50, 100],
+	    "resnet_dt":	false,
+	    "axis_neuron":	4,
+	    "seed":		1
+	},
+	"fitting_net" : {
+	    "neuron":	[128, 128, 128],
+	    "resnet_dt":	true,
+	    "seed":		1
+	}
+    },
+    "loss" : {
+	"start_pref_e":	0.02,
+	"limit_pref_e":	1,
+	"start_pref_f":	1000,
+	"limit_pref_f":	1,
+	"start_pref_v":	0,
+	"limit_pref_v":	0
+    },
+    "learning_rate" : {
+	"start_lr":		0.001,
+	"stop_lr":		1e-8,
+	"decay_steps":	100
+    },
+    "training" : {
+	"training_data": {
+	    "systems": [],
+	    "batch_size":"auto"
+	},
+	"numb_steps":1000,
+	"seed":10,
+	"disp_file":"lcurve.out",
+	"disp_freq":100,
+	"save_freq":1000
+    },
+    "_comment" : "all"
+}
diff --git a/examples/almg/input.json b/examples/almg/input.json
@@ -105,49 +105,6 @@
 	"numb_models" : 4,
 	"config" : {},
 	"template_script" : {
-	    "model" : {
-		"type_map":		["Al", "Mg"],
-		"descriptor": {
-		    "type":		"se_a",
-		    "sel":		[90, 90],
-		    "rcut_smth":	1.80,
-		    "rcut":		6.00,
-		    "neuron":	[25, 50, 100],
-		    "resnet_dt":	false,
-		    "axis_neuron":	4,
-		    "seed":		1
-		},
-		"fitting_net" : {
-		    "neuron":	[128, 128, 128],
-		    "resnet_dt":	true,
-		    "seed":		1
-		}
-	    },
-	    "loss" : {
-		"start_pref_e":	0.02,
-		"limit_pref_e":	1,
-		"start_pref_f":	1000,
-		"limit_pref_f":	1,
-		"start_pref_v":	0,
-		"limit_pref_v":	0
-	    },
-	    "learning_rate" : {
-		"start_lr":		0.001,
-		"stop_lr":		1e-8,
-		"decay_steps":	100
-	    },
-	    "training" : {
-		"training_data": {
-		    "systems": [],
-		    "batch_size":"auto"
-		},
-		"numb_steps":1000,
-		"seed":10,
-		"disp_file":"lcurve.out",
-		"disp_freq":100,
-		"save_freq":1000
-	    },
-	    "_comment" : "all"
 	},
 	"_comment" : "all"
     },
@@ -162,6 +119,7 @@
 	"fatal_at_max" :	false,
 	"f_trust_lo":		0.05,
 	"f_trust_hi":		0.50,
+	"output_nopbc":		false,
 	"configuration_prefix": null, 
 	"configurations":	[
 	    {

diff --git a/tests/conf/test_file_conf.py b/tests/conf/test_file_conf.py
@@ -125,6 +125,7 @@ def test_normalize(self):
             "files" : "foo",
             "fmt" : 'auto',
             "prefix" : None,
+            "remove_pbc" : False,
         }
         out_data = FileConfGenerator.normalize_config(
             in_data,
@@ -141,6 +142,7 @@ def test_normalize_1(self):
             "files" : ["foo"],
             "fmt" : "bar",
             "prefix" : None,
+            "remove_pbc" : False,
         }
         out_data = FileConfGenerator.normalize_config(
             in_data,

diff --git a/tests/entrypoint/test_submit_args.py b/tests/entrypoint/test_submit_args.py
@@ -64,7 +64,8 @@ def test(self):
         self.assertEqual(old_data.get('upload_python_packages', None), new_data['upload_python_packages'])
         self.assertEqual(old_data['type_map'], new_data['inputs']['type_map'])
         self.assertEqual(old_data['numb_models'], new_data['train']['numb_models'])
-        self.assertEqual(old_data['default_training_param'], new_data['train']['template_script'])
+        # self.assertEqual(old_data['default_training_param'], new_data['train']['template_script'])
+        self.assertEqual(new_data['train']['template_script'], "dp_input_template")
         self.assertEqual(RunDPTrain.normalize_config({}), new_data['train']['config'])
         self.assertEqual(old_data.get('lmp_config', {}), new_data['explore']['config'])
         self.assertEqual(old_data.get('fp_config', {}), new_data['fp']['run_config'])
@@ -394,54 +395,8 @@ def test_bohrium(self):
 	"type" :	"dp",
 	"numb_models" : 4,
 	"config" : {},
-	"template_script" : {
-	    "model" : {
-		"type_map":		["Al", "Mg"],
-		"descriptor": {
-		    "type":		"se_a",
-		    "sel":		[90, 90],
-		    "rcut_smth":	1.80,
-		    "rcut":		6.00,
-		    "neuron":	[25, 50, 100],
-		    "resnet_dt":	false,
-		    "axis_neuron":	4,
-		    "seed":		1
-		},
-		"fitting_net" : {
-		    "neuron":	[128, 128, 128],
-		    "resnet_dt":	true,
-		    "seed":		1
-		}
-	    },
-	    "loss" : {
-		"start_pref_e":	0.02,
-		"limit_pref_e":	1,
-		"start_pref_f":	1000,
-		"limit_pref_f":	1,
-		"start_pref_v":	0,
-		"limit_pref_v":	0
-	    },
-	    "learning_rate" : {
-		"start_lr":		0.001,
-		"stop_lr":		1e-8,
-		"decay_steps":	100
-	    },
-	    "training" : {
-		"training_data": {
-		    "systems": [],
-		    "batch_size":"auto"
-		},
-		"numb_steps":1000,
-		"seed":10,
-		"disp_file":"lcurve.out",
-		"disp_freq":100,
-		"save_freq":1000
-	    },
-	    "_comment" : "all"
-	},
-	"_comment" : "all"
+	"template_script" : "dp_input_template"
     },
-
     "explore" : {
 	"type" : "lmp",
 	"config" : {
@@ -522,8 +477,7 @@ def test_bohrium(self):
 	"type" :	"dp",
 	"numb_models" : 4,
 	"config" : {},
-	"template_script" : {
-	},
+	"template_script" : "dp_input_template",
 	"_comment" : "all"
     },