Skip to content

Commit

Permalink
Add wf name to config; Better error format (#166)
Browse files Browse the repository at this point in the history
Signed-off-by: zjgemi <liuxin_zijian@163.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
  • Loading branch information
3 people committed Sep 29, 2023
1 parent ee4b2a9 commit 66a5acc
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 26 deletions.
2 changes: 2 additions & 0 deletions dpgen2/entrypoint/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@ def submit_args(default_step_config=normalize_step_dict({})):
doc_train = "The configuration for training"
doc_explore = "The configuration for exploration"
doc_fp = "The configuration for FP"
doc_name = "The workflow name, 'dpgen' for default"

return (
dflow_conf_args()
Expand Down Expand Up @@ -479,6 +480,7 @@ def submit_args(default_step_config=normalize_step_dict({})):
doc=doc_explore,
),
Argument("fp", dict, [], [variant_fp()], optional=False, doc=doc_fp),
Argument("name", str, optional=True, default="dpgen", doc=doc_name),
]
)

Expand Down
2 changes: 1 addition & 1 deletion dpgen2/entrypoint/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ def submit_concurrent_learning(
# wf_config["inputs"]["do_finetune"] = False
# finetune will not be done again if the old process is reused.

wf = Workflow(name="dpgen")
wf = Workflow(name=wf_config["name"])

if wf_config["inputs"].get("do_finetune", False):
assert finetune_step is not None
Expand Down
21 changes: 14 additions & 7 deletions dpgen2/exploration/task/customized_lmp_template_task_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,14 +180,21 @@ def _make_customized_task_group(
# run shell command with dpgen2.utils.run_command
ret, out, err = run_command(ss, shell=True)
if ret != 0:
logging.error(
"".join(
(
f"customized shell command {ss} failed with return code {ret}\n",
"out msg",
out,
"\n",
"err msg: ",
err,
"\n",
)
)
)
raise FatalError(
f"customized shell command {ss} failed with return code {ret}\n",
"out msg",
out,
"\n",
"err msg",
err,
"\n",
f"customized shell command {ss} failed with return code {ret}"
)
# loop over all pattern matched result dirs
for ff in [
Expand Down
16 changes: 14 additions & 2 deletions dpgen2/fp/gaussian.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Prep and Run Gaussian tasks."""
import logging
from typing import (
Any,
List,
Expand Down Expand Up @@ -155,9 +156,20 @@ def run_task(
command = " ".join([command, gaussian_input_name])
ret, out, err = run_command(command, shell=True)
if ret != 0:
raise TransientError(
"gaussian failed\n", "out msg", out, "\n", "err msg", err, "\n"
logging.error(
"".join(
(
"gaussian failed\n",
"out msg: ",
out,
"\n",
"err msg: ",
err,
"\n",
)
)
)
raise TransientError("gaussian failed")
# convert the output to deepmd/npy format
sys = dpdata.LabeledSystem(gaussian_output_name, fmt="gaussian/log")
sys.to("deepmd/npy", out_name)
Expand Down
8 changes: 6 additions & 2 deletions dpgen2/fp/vasp.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from pathlib import (
Path,
)
Expand Down Expand Up @@ -132,9 +133,12 @@ def run_task(
command = " ".join([command, ">", log_name])
ret, out, err = run_command(command, shell=True)
if ret != 0:
raise TransientError(
"vasp failed\n", "out msg", out, "\n", "err msg", err, "\n"
logging.error(
"".join(
("vasp failed\n", "out msg: ", out, "\n", "err msg: ", err, "\n")
)
)
raise TransientError("vasp failed")
# convert the output to deepmd/npy format
sys = dpdata.LabeledSystem("OUTCAR")
sys.to("deepmd/npy", out_name)
Expand Down
31 changes: 27 additions & 4 deletions dpgen2/op/run_dp_train.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import glob
import json
import logging
import os
import shutil
from pathlib import (
Expand Down Expand Up @@ -207,9 +208,20 @@ def clean_before_quit():
ret, out, err = run_command(command)
if ret != 0:
clean_before_quit()
raise FatalError(
"dp train failed\n", "out msg", out, "\n", "err msg", err, "\n"
logging.error(
"".join(
(
"dp train failed\n",
"out msg: ",
out,
"\n",
"err msg: ",
err,
"\n",
)
)
)
raise FatalError("dp train failed")
fplog.write("#=================== train std out ===================\n")
fplog.write(out)
fplog.write("#=================== train std err ===================\n")
Expand All @@ -222,9 +234,20 @@ def clean_before_quit():
ret, out, err = run_command(["dp", "freeze", "-o", "frozen_model.pb"])
if ret != 0:
clean_before_quit()
raise FatalError(
"dp freeze failed\n", "out msg", out, "\n", "err msg", err, "\n"
logging.error(
"".join(
(
"dp freeze failed\n",
"out msg: ",
out,
"\n",
"err msg: ",
err,
"\n",
)
)
)
raise FatalError("dp freeze failed")
fplog.write("#=================== freeze std out ===================\n")
fplog.write(out)
fplog.write("#=================== freeze std err ===================\n")
Expand Down
26 changes: 16 additions & 10 deletions dpgen2/op/run_lmp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
import os
import random
import re
Expand Down Expand Up @@ -151,17 +152,22 @@ def execute(
command = " ".join([command, "-i", lmp_input_name, "-log", lmp_log_name])
ret, out, err = run_command(command, shell=True)
if ret != 0:
raise TransientError(
"lmp failed\n",
"command was",
command,
"out msg",
out,
"\n",
"err msg",
err,
"\n",
logging.error(
"".join(
(
"lmp failed\n",
"command was: ",
command,
"out msg: ",
out,
"\n",
"err msg: ",
err,
"\n",
)
)
)
raise TransientError("lmp failed")

ret_dict = {
"log": work_dir / lmp_log_name,
Expand Down

0 comments on commit 66a5acc

Please sign in to comment.