Skip to content

Commit

Permalink
support finetune on pretrained models (#152)
Browse files Browse the repository at this point in the history
WIP, building unittests.

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
HuangJiameng and pre-commit-ci[bot] committed May 22, 2023
1 parent b816df3 commit 64363df
Show file tree
Hide file tree
Showing 23 changed files with 707 additions and 27 deletions.
15 changes: 15 additions & 0 deletions dpgen2/entrypoint/args.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import textwrap

import dargs
from dargs import (
Argument,
Expand Down Expand Up @@ -232,6 +234,16 @@ def input_args():
doc_type_map = 'The type map. e.g. ["Al", "Mg"]. Al and Mg will have type 0 and 1, respectively.'
doc_mass_map = "The mass map. e.g. [27., 24.]. Al and Mg will be set with mass 27. and 24. amu, respectively."
doc_mixed_type = "Use `deepmd/npy/mixed` format for storing training data."
doc_do_finetune = """Finetune the pretrained model before the first iteration. If it is set to True, then an additional step, finetune-step,
which is based on a branch of "PrepRunDPTrain," will be added before the dpgen_step. In the
finetune-step, the internal flag finetune_mode is set to "finetune," which means SuperOP "PrepRunDPTrain"
is now used as the "Finetune." In this step, we finetune the pretrained model in the train step and modify
the template after training. After that, in the normal dpgen-step, the flag do_finetune is set as "train-init,"
which means we use --init-frz-model to train based on models from the previous iteration. The "do_finetune" flag
is set to False by default, while the internal flag finetune_mode is set to "no," which means anything related
to finetuning will not be done.
"""
doc_do_finetune = textwrap.dedent(doc_do_finetune)
doc_init_data_prefix = "The prefix of initial data systems"
doc_init_sys = "The inital data systems"

Expand All @@ -246,6 +258,9 @@ def input_args():
doc=doc_init_data_prefix,
),
Argument("mixed_type", bool, optional=True, default=False, doc=doc_mixed_type),
Argument(
"do_finetune", bool, optional=True, default=False, doc=doc_do_finetune
),
Argument(
"init_data_sys", [list, str], optional=False, default=None, doc=doc_init_sys
),
Expand Down
89 changes: 85 additions & 4 deletions dpgen2/entrypoint/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
Dict,
List,
Optional,
Tuple,
Type,
Union,
)

Expand Down Expand Up @@ -305,14 +307,60 @@ def get_kspacing_kgamma_from_incar(

def make_optional_parameter(
mixed_type=False,
finetune_mode="no",
):
return {"data_mixed_type": mixed_type}
return {"data_mixed_type": mixed_type, "finetune_mode": finetune_mode}


def make_finetune_step(
config,
prep_train_config,
run_train_config,
upload_python_packages,
numb_models,
template_script,
train_config,
init_models,
init_data,
iter_data,
):
finetune_optional_parameter = {
"mixed_type": config["inputs"]["mixed_type"],
"finetune_mode": "finetune",
}

finetune_op = PrepRunDPTrain(
"finetune",
PrepDPTrain,
RunDPTrain,
prep_config=prep_train_config,
run_config=run_train_config,
upload_python_packages=upload_python_packages,
finetune=True,
)
finetune_step = Step(
"finetune-step",
template=finetune_op,
parameters={
"block_id": "finetune",
"numb_models": numb_models,
"template_script": template_script,
"train_config": train_config,
"run_optional_parameter": finetune_optional_parameter,
},
artifacts={
"init_models": init_models,
"init_data": init_data,
"iter_data": iter_data,
},
)
return finetune_step


def workflow_concurrent_learning(
config: Dict,
old_style: bool = False,
):
) -> Tuple[Step, Optional[Step]]:
default_config = (
normalize_step_dict(config.get("default_config", {}))
if old_style
Expand Down Expand Up @@ -493,10 +541,34 @@ def workflow_concurrent_learning(
else:
init_models = None

finetune_step = None
optional_parameter = make_optional_parameter(
config["inputs"]["mixed_type"],
)

if config["inputs"].get("do_finetune", False):

finetune_step = make_finetune_step(
config,
prep_train_config,
run_train_config,
upload_python_packages,
numb_models,
template_script,
train_config,
init_models,
init_data,
iter_data,
)

init_models = finetune_step.outputs.artifacts["models"]
template_script = finetune_step.outputs.parameters["template_script"]

optional_parameter = make_optional_parameter(
config["inputs"]["mixed_type"],
finetune_mode="train-init",
)

# here the scheduler is passed as input parameter to the concurrent_learning_op
dpgen_step = Step(
"dpgen-step",
Expand All @@ -517,7 +589,7 @@ def workflow_concurrent_learning(
"iter_data": iter_data,
},
)
return dpgen_step
return dpgen_step, finetune_step


def get_scheduler_ids(
Expand Down Expand Up @@ -603,7 +675,9 @@ def submit_concurrent_learning(

global_config_workflow(wf_config)

dpgen_step = workflow_concurrent_learning(wf_config, old_style=old_style)
dpgen_step, finetune_step = workflow_concurrent_learning(
wf_config, old_style=old_style
)

if reuse_step is not None and replace_scheduler:
scheduler_new = copy.deepcopy(
Expand Down Expand Up @@ -639,8 +713,15 @@ def submit_concurrent_learning(
"conf_selector",
selector,
)
wf_config["inputs"]["do_finetune"] = False
# finetune will not be done again if the old process is reused.

wf = Workflow(name="dpgen")

if wf_config["inputs"].get("do_finetune", False):
assert finetune_step is not None
wf.add(finetune_step)

wf.add(dpgen_step)

# for debug purpose, we may not really submit the wf
Expand Down
2 changes: 1 addition & 1 deletion dpgen2/exploration/task/conf_sampling_task_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def set_conf(

def _sample_confs(
self,
):
) -> list:
confs = []
for ii in range(self.n_sample):
if len(self.conf_queue) == 0:
Expand Down
4 changes: 2 additions & 2 deletions dpgen2/exploration/task/lmp_template_task_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def make_task(
templates.append(self.plm_template)
conts = self.make_cont(templates, self.revisions)
nconts = len(conts[0])
for cc, ii in itertools.product(confs, range(nconts)):
for cc, ii in itertools.product(confs, range(nconts)): # type: ignore
if not self.plm_set:
self.add_task(self._make_lmp_task(cc, conts[0][ii]))
else:
Expand Down Expand Up @@ -165,7 +165,7 @@ def revise_lmp_input_plm(lmp_lines, in_plm, out_plm="output.plumed"):


def revise_by_keys(lmp_lines, keys, values):
for kk, vv in zip(keys, values):
for kk, vv in zip(keys, values): # type: ignore
for ii in range(len(lmp_lines)):
lmp_lines[ii] = lmp_lines[ii].replace(kk, str(vv))
return lmp_lines
2 changes: 1 addition & 1 deletion dpgen2/exploration/task/npt_task_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def make_task(
# clear all existing tasks
self.clear()
confs = self._sample_confs()
for cc, tt, pp in itertools.product(confs, self.temps, self.press):
for cc, tt, pp in itertools.product(confs, self.temps, self.press): # type: ignore
self.add_task(self._make_lmp_task(cc, tt, pp))
return self

Expand Down
2 changes: 2 additions & 0 deletions dpgen2/flow/dpgen_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,14 @@

cl_default_optional_parameter = {
"data_mixed_type": False,
"finetune_mode": "no",
}


def make_block_optional_parameter(cl_optional_parameter):
return {
"data_mixed_type": cl_optional_parameter["data_mixed_type"],
"finetune_mode": cl_optional_parameter["finetune_mode"],
}


Expand Down
2 changes: 1 addition & 1 deletion dpgen2/fp/vasp_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def make_kspacing_kpoints(box, kspacing, kgamma):
rbox = _reciprocal_box(box)
kpoints = [
max(1, (np.ceil(2 * np.pi * np.linalg.norm(ii) / ks).astype(int)))
for ii, ks in zip(rbox, kspacing)
for ii, ks in zip(rbox, kspacing) # type: ignore
]
ret = _make_vasp_kpoints(kpoints, kgamma)
return ret
Expand Down
25 changes: 22 additions & 3 deletions dpgen2/op/run_dp_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class RunDPTrain(OP):

default_optional_parameter = {
"mixed_type": False,
"finetune_mode": "no",
}

@classmethod
Expand Down Expand Up @@ -115,6 +116,7 @@ def execute(
On the failure of training or freezing. Human intervention needed.
"""
mixed_type = ip["optional_parameter"]["mixed_type"]
finetune_mode = ip["optional_parameter"]["finetune_mode"]
config = ip["config"] if ip["config"] is not None else {}
config = RunDPTrain.normalize_config(config)
task_name = ip["task_name"]
Expand Down Expand Up @@ -159,7 +161,9 @@ def execute(
train_dict, config, do_init_model, major_version
)

if RunDPTrain.skip_training(work_dir, train_dict, init_model, iter_data):
if RunDPTrain.skip_training(
work_dir, train_dict, init_model, iter_data, finetune_mode
):
return OPIO(
{
"script": work_dir / train_script_name,
Expand All @@ -181,14 +185,22 @@ def clean_before_quit():
json.dump(train_dict, fp, indent=4)

# train model
if do_init_model:
if do_init_model or finetune_mode == "train-init":
command = [
"dp",
"train",
"--init-frz-model",
str(init_model),
train_script_name,
]
elif finetune_mode == "finetune":
command = [
"dp",
"train",
train_script_name,
"--finetune",
str(init_model),
]
else:
command = ["dp", "train", train_script_name]
ret, out, err = run_command(command)
Expand All @@ -202,6 +214,9 @@ def clean_before_quit():
fplog.write("#=================== train std err ===================\n")
fplog.write(err)

if finetune_mode == "finetune" and os.path.exists("input_v2_compat.json"):
shutil.copy2("input_v2_compat.json", train_script_name)

# freeze model
ret, out, err = run_command(["dp", "freeze", "-o", "frozen_model.pb"])
if ret != 0:
Expand Down Expand Up @@ -280,8 +295,13 @@ def skip_training(
train_dict,
init_model,
iter_data,
finetune_mode,
):
# we have init model and no iter data, skip training
if finetune_mode is not None and (
finetune_mode == "train-init" or finetune_mode == "finetune"
):
return False
if (init_model is not None) and (iter_data is None or len(iter_data) == 0):
with set_directory(work_dir):
with open(train_script_name, "w") as fp:
Expand Down Expand Up @@ -345,7 +365,6 @@ def training_args():
doc_init_model_start_pref_v = (
"The start virial prefactor in loss when init-model"
)

return [
Argument(
"init_model_policy",
Expand Down
2 changes: 1 addition & 1 deletion dpgen2/op/select_confs.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def validate_trajs(
)
rett = []
retm = []
for tt, mm in zip(trajs, model_devis):
for tt, mm in zip(trajs, model_devis): # type: ignore
if (tt is None and mm is not None) or (tt is not None and mm is None):
raise FatalError("trajs frame is {tt} while model_devis frame is {mm}")
elif tt is not None and mm is not None:
Expand Down
2 changes: 2 additions & 0 deletions dpgen2/superop/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@

block_default_optional_parameter = {
"data_mixed_type": False,
"finetune_mode": "no",
}


Expand All @@ -72,6 +73,7 @@ def make_collect_data_optional_parameter(block_optional_parameter):
def make_run_dp_train_optional_parameter(block_optional_parameter):
return {
"mixed_type": block_optional_parameter["data_mixed_type"],
"finetune_mode": block_optional_parameter["finetune_mode"],
}


Expand Down

0 comments on commit 64363df

Please sign in to comment.