Skip to content

Commit

Permalink
Fix run caly model devi (#197)
Browse files Browse the repository at this point in the history
CALYPSO will propose structures with different chemical formula when
using `VSC` mode. However, dpdata cannot parse a dump file containing
frames with different chemical formula. To resolve this issue, frames
are separated into different dump files based on the number of atoms in
each frame.
  • Loading branch information
wangzyphysics committed Mar 12, 2024
1 parent 00d3f5e commit b81a195
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 50 deletions.
40 changes: 28 additions & 12 deletions dpgen2/op/collect_run_caly.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def get_input_sign(cls):
"opt_results_dir": Artifact(
type=Path, optional=True
), # dir contains POSCAR* CONTCAR* OUTCAR*
"qhull_input": Artifact(type=Path, optional=True), # for vsc
}
)

Expand All @@ -82,6 +83,7 @@ def get_output_sign(cls):
"input_file": Artifact(Path), # input.dat
"results": Artifact(Path), # calypso generated results
"step": Artifact(Path), # step
"qhull_input": Artifact(Path),
}
)

Expand All @@ -104,6 +106,7 @@ def execute(
- `step`: (`Path`) The step file from last calypso run
- `results`: (`Path`) The results dir from last calypso run
- `opt_results_dir`: (`Path`) The results dir contains POSCAR* CONTCAR* OUTCAR* from last calypso run
- `qhull_input`: (`Path`) qhull input file `test_qconvex.in`
Returns
-------
Expand All @@ -115,6 +118,7 @@ def execute(
- `input_file`: (`Path`) The input file of the task (input.dat).
- `step`: (`Path`) The step file.
- `results`: (`Path`) The results dir.
- `qhull_input`: (`Path`) qhull input file.
Raises
------
Expand All @@ -129,7 +133,7 @@ def execute(
# input.dat
_input_file = ip["input_file"]
input_file = _input_file.resolve()
max_step = get_max_step(input_file)
max_step, vsc = get_value_from_inputdat(input_file)
# work_dir name: calypso_task.idx
work_dir = Path(ip["task_name"])

Expand All @@ -142,10 +146,15 @@ def execute(
if ip["opt_results_dir"] is not None
else ip["opt_results_dir"]
)
qhull_input = (
ip["qhull_input"].resolve()
if ip["qhull_input"] is not None
else ip["qhull_input"]
)

with set_directory(work_dir):
# prep files/dirs from last calypso run
prep_last_calypso_file(step, results, opt_results_dir)
prep_last_calypso_file(step, results, opt_results_dir, qhull_input, vsc)
# copy input.dat
Path(input_file.name).symlink_to(input_file)
# run calypso
Expand Down Expand Up @@ -177,21 +186,19 @@ def execute(

step = Path("step").read_text().strip()
finished = "true" if int(cnt_num) == int(max_step) else "false"
# poscar_dir = "poscar_dir_none" if not finished else poscar_dir
# fake_traj = Path("traj_results_dir")
# fake_traj.mkdir(parents=True, exist_ok=True)

if not Path("test_qconvex.in").exists():
Path("test_qconvex.in").write_text("")

ret_dict = {
"task_name": str(work_dir),
"finished": finished,
"poscar_dir": work_dir.joinpath(poscar_dir),
# "input_file": ip["input_file"],
"input_file": _input_file,
"step": work_dir.joinpath("step"),
"results": work_dir.joinpath("results"),
# "fake_traj_results_dir": work_dir.joinpath(fake_traj),
"qhull_input": work_dir.joinpath("test_qconvex.in"),
}

return OPIO(ret_dict)

@staticmethod
Expand Down Expand Up @@ -219,19 +226,28 @@ def normalize_config(data={}):
config_args = CollRunCaly.calypso_args


def prep_last_calypso_file(step, results, opt_results_dir):
def prep_last_calypso_file(step, results, opt_results_dir, qhull_input, vsc):
if step is not None and results is not None or opt_results_dir is not None:
Path(step.name).symlink_to(step)
Path(results.name).symlink_to(results)
for file_name in opt_results_dir.iterdir():
Path(file_name.name).symlink_to(file_name)

if vsc and qhull_input is not None:
Path(qhull_input.name).symlink_to(qhull_input)


def get_max_step(filename):
def get_value_from_inputdat(filename):
max_step = 0
vsc = False
with open(filename, "r") as f:
lines = f.readlines()
for line in lines:
if "MaxStep" in line:
max_step = int(line.strip().split("#")[0].split("=")[1])
return max_step
raise ValueError(f"Key 'MaxStep' missed in {str(filename)}")
continue
if "VSC" in line:
vsc_str = line.strip().split("#")[0].split("=")[1].lower().strip()
if vsc_str.startswith("t"):
vsc = True
return max_step, vsc
73 changes: 52 additions & 21 deletions dpgen2/op/run_caly_model_devi.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from collections import (
defaultdict,
)
from pathlib import (
Path,
)
Expand Down Expand Up @@ -45,8 +48,8 @@ def get_output_sign(cls):
return OPIOSign(
{
"task_name": Parameter(str),
"traj": Artifact(Path),
"model_devi": Artifact(Path),
"traj": Artifact(List[Path]),
"model_devi": Artifact(List[Path]),
}
)

Expand All @@ -71,8 +74,8 @@ def execute(
Any
Output dict with components:
- `task_name`: (`str`) The name of task.
- `traj`: (`Artifact(Path)`) The output trajectory.
- `model_devi`: (`Artifact(Path)`) The model deviation. The order of recorded model deviations should be consistent with the order of frames in `traj`.
- `traj`: (`Artifact(List[Path])`) The output trajectory.
- `model_devi`: (`Artifact(List[Path])`) The model deviation. The order of recorded model deviations should be consistent with the order of frames in `traj`.
"""

Expand All @@ -92,45 +95,70 @@ def execute(
traj_dirs = ip["traj_dirs"]
traj_dirs = [traj_dir.resolve() for traj_dir in traj_dirs]

dump_file_name = "traj.dump"
model_devi_file_name = "model_devi.out"
dump_file_name = "traj.%d.dump"
model_devi_file_name = "model_devi.%d.out"

Devis = []
tcount = 0
with set_directory(work_dir):
dump_file = Path().joinpath(dump_file_name)
model_devi_file = Path().joinpath(model_devi_file_name)
f = open(dump_file, "a")
dump_str_dict = defaultdict(list) # key: natoms, value: dump_strs
devis_dict = defaultdict(list) # key: natoms, value: Devis-s
for traj_dir in traj_dirs:
for traj_name in traj_dir.rglob("*.traj"):
atoms_list = parse_traj(traj_name)
if atoms_list is None:
continue
for atoms in atoms_list:
dump_str = atoms2lmpdump(atoms, tcount, type_map)
f.write(dump_str)
natoms = len(atoms)
dump_str = atoms2lmpdump(atoms, tcount, type_map, ignore=True)
dump_str_dict[natoms].append(dump_str)

pbc = np.all(atoms.get_pbc())
coord = atoms.get_positions().reshape(1, -1)
cell = atoms.get_cell().array.reshape(1, -1) if pbc else None
atype = [type_map.index(atom.symbol) for atom in atoms] # type: ignore
devi = calc_model_devi(coord, cell, atype, graphs)[0]
devi[0] = tcount
Devis.append(devi)
devis_dict[natoms].append(devi)
tcount += 1
f.close()
Devis = np.vstack(Devis)
write_model_devi_out(Devis, model_devi_file)

traj_file_list = []
model_devi_file_list = []
keys = dump_str_dict.keys()
for key in keys:
dump_file = Path().joinpath(dump_file_name % key)
model_devi_file = Path().joinpath(model_devi_file_name % key)

traj_str = dump_str_dict[key]
model_devis = devis_dict[key]
assert len(traj_str) == len(
model_devis
), "The length of traj_str and model_devis should be same."
for idx in range(len(model_devis)):
traj_str[idx] = traj_str[idx] % idx
model_devis[idx][0] = idx

traj_str = "".join(traj_str)
dump_file.write_text(traj_str)

model_devis = np.vstack(model_devis)
write_model_devi_out(model_devis, model_devi_file)

traj_file_list.append(dump_file)
model_devi_file_list.append(model_devi_file)

for idx in range(len(traj_file_list)):
traj_file_list[idx] = work_dir / traj_file_list[idx]
model_devi_file_list[idx] = work_dir / model_devi_file_list[idx]

ret_dict = {
"task_name": str(work_dir),
"traj": work_dir / dump_file,
"model_devi": work_dir / model_devi_file,
"traj": traj_file_list,
"model_devi": model_devi_file_list,
}

return OPIO(ret_dict)


def atoms2lmpdump(atoms, struc_idx, type_map):
def atoms2lmpdump(atoms, struc_idx, type_map, ignore=False):
"""down triangle cell can be obtained from
cell params: a, b, c, alpha, beta, gamma.
cell = cellpar_to_cell([a, b, c, alpha, beta, gamma])
Expand All @@ -154,7 +182,10 @@ def atoms2lmpdump(atoms, struc_idx, type_map):
)

dump_str = "ITEM: TIMESTEP\n"
dump_str += f"{struc_idx}\n"
if not ignore:
dump_str += f"{struc_idx}\n"
else:
dump_str += "%d\n"
dump_str += "ITEM: NUMBER OF ATOMS\n"
dump_str += f"{atoms.get_global_number_of_atoms()}\n"

Expand Down
7 changes: 5 additions & 2 deletions dpgen2/superop/caly_evo_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def __init__(
"results": InputArtifact(optional=True),
"step": InputArtifact(optional=True),
"opt_results_dir": InputArtifact(optional=True),
"qhull_input": InputArtifact(optional=True),
}
self._output_parameters = {
# "task_name": OutputParameter(),
Expand Down Expand Up @@ -177,6 +178,7 @@ def _caly_evo_step(
"step": caly_evo_step_steps.inputs.artifacts["step"],
"results": caly_evo_step_steps.inputs.artifacts["results"],
"opt_results_dir": caly_evo_step_steps.inputs.artifacts["opt_results_dir"],
"qhull_input": caly_evo_step_steps.inputs.artifacts["qhull_input"],
},
key="%s--collect-run-calypso-%s-%s"
% (
Expand Down Expand Up @@ -245,13 +247,14 @@ def _caly_evo_step(
], # input.dat
"results": collect_run_calypso.outputs.artifacts["results"],
"step": collect_run_calypso.outputs.artifacts["step"],
"qhull_input": collect_run_calypso.outputs.artifacts["qhull_input"],
"opt_results_dir": prep_run_dp_optim.outputs.artifacts["optim_results_dir"],
"caly_run_opt_file": prep_run_dp_optim.outputs.artifacts[
"caly_run_opt_file"
], # input.dat
],
"caly_check_opt_file": prep_run_dp_optim.outputs.artifacts[
"caly_check_opt_file"
], # input.dat
],
},
when="%s == false" % (collect_run_calypso.outputs.parameters["finished"]),
)
Expand Down
1 change: 1 addition & 0 deletions dpgen2/superop/prep_run_calypso.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def _prep_run_caly(
"results": temp_value,
"step": temp_value,
"opt_results_dir": temp_value,
"qhull_input": temp_value,
},
key=step_keys["caly-evo-step-{{item}}"],
with_sequence=argo_sequence(
Expand Down
13 changes: 11 additions & 2 deletions tests/mocked_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,11 @@ def execute(
work_dir = Path(ip["task_name"])
work_dir.mkdir(exist_ok=True, parents=True)

qhull_input = (
ip["qhull_input"].resolve()
if ip["qhull_input"] is not None
else ip["qhull_input"]
)
step = ip["step"].resolve() if ip["step"] is not None else ip["step"]
results = (
ip["results"].resolve() if ip["results"] is not None else ip["results"]
Expand Down Expand Up @@ -999,6 +1004,9 @@ def execute(
step_num = Path("step").read_text().strip()
Path("step").write_text(f"{int(step_num)+1}")

if qhull_input is None:
Path("test_qconvex.in").write_text("")

step_num = int(Path("step").read_text().strip())

if results is None:
Expand Down Expand Up @@ -1031,6 +1039,7 @@ def execute(
"input_file": work_dir.joinpath(input_file.name),
"results": work_dir.joinpath("results"),
"step": work_dir.joinpath("step"),
"qhull_input": work_dir.joinpath("test_qconvex.in"),
}
return OPIO(ret_dict)

Expand Down Expand Up @@ -1157,7 +1166,7 @@ def execute(
return OPIO(
{
"task_name": str(work_dir),
"traj": work_dir / dump_file_name,
"model_devi": work_dir / model_devi_file_name,
"traj": [work_dir / dump_file_name],
"model_devi": [work_dir / model_devi_file_name],
}
)
12 changes: 8 additions & 4 deletions tests/op/test_collect_run_caly.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
calypso_input_file,
calypso_log_name,
)
from dpgen2.op.collect_run_caly import CollRunCaly, get_max_step
from dpgen2.op.collect_run_caly import CollRunCaly, get_value_from_inputdat
from dpgen2.utils import (
BinaryFileInput,
)
Expand All @@ -44,7 +44,7 @@ def setUp(self):
self.input_file_path = Path("input_file")
self.input_file_path.mkdir(parents=True, exist_ok=True)
self.input_file = self.input_file_path.joinpath(calypso_input_file)
self.input_file.write_text("input.dat\nMaxStep=3\n")
self.input_file.write_text("input.dat\nMaxStep=3\nVSC= T\n")

self.step_file = self.input_file_path.joinpath("step")
self.step_file.write_text("3")
Expand All @@ -69,12 +69,15 @@ def tearDown(self):
shutil.rmtree(Path(self.task_name), ignore_errors=True)

def test_get_max_step(self):
max_step = get_max_step(self.input_file)
max_step, vsc = get_value_from_inputdat(self.input_file)
self.assertTrue(max_step == 3)
self.assertTrue(vsc == True)

temp_input_file = self.input_file_path.joinpath("temp_input_dat")
temp_input_file.write_text("input.dat\n")
self.assertRaises(ValueError, get_max_step, temp_input_file)
max_step, vsc = get_value_from_inputdat(temp_input_file)
self.assertTrue(max_step == 0)
self.assertTrue(vsc == False)

@patch("dpgen2.op.collect_run_caly.run_command")
def test_step_st_maxstep_01(self, mocked_run):
Expand Down Expand Up @@ -109,6 +112,7 @@ def side_effect(*args, **kwargs):
self.assertEqual(out["input_file"], self.input_file)
self.assertEqual(out["step"], Path(self.task_name) / "step")
self.assertEqual(out["results"], Path(self.task_name) / "results")
self.assertEqual(out["qhull_input"], Path(self.task_name) / "test_qconvex.in")
self.assertEqual(out["finished"], "false")

@patch("dpgen2.op.collect_run_caly.run_command")
Expand Down

0 comments on commit b81a195

Please sign in to comment.