Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support automatic training_reuse_old_ratio #1209

Merged
merged 3 commits into from
May 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 12 additions & 2 deletions dpgen/generator/arginfo.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import textwrap
from typing import Dict, List

from dargs import Argument, Variant
Expand Down Expand Up @@ -78,7 +79,15 @@ def training_args() -> List[Argument]:
doc_training_reuse_iter = "The minimal index of iteration that continues training models from old models of last iteration."
doc_reusing = " This option is only adopted when continuing training models from old models. This option will override default parameters."
doc_training_reuse_old_ratio = (
"The probability proportion of old data during training." + doc_reusing
textwrap.dedent(
"""\
The probability proportion of old data during training. It can be:\n
- float: directly assign the probability of old data;
- `auto:f`: automatic probability, where f is the new-to-old ratio;
- `auto`: equivalent to `auto:10`.
"""
)
+ doc_reusing
)
doc_training_reuse_numb_steps = "Number of training batch." + doc_reusing
doc_training_reuse_start_lr = (
Expand Down Expand Up @@ -129,7 +138,8 @@ def training_args() -> List[Argument]:
),
Argument(
"training_reuse_old_ratio",
[None, float],
[str, float],
default="auto",
optional=True,
doc=doc_training_reuse_old_ratio,
),
Expand Down
43 changes: 34 additions & 9 deletions dpgen/generator/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@
training_iter0_model = jdata.get("training_iter0_model_path", [])
training_init_model = jdata.get("training_init_model", False)
training_reuse_iter = jdata.get("training_reuse_iter")
training_reuse_old_ratio = jdata.get("training_reuse_old_ratio", None)
training_reuse_old_ratio = jdata.get("training_reuse_old_ratio", "auto")

# if you want to use DP-ZBL potential , you have to give the path of your energy potential file
if "srtab_file_path" in jdata.keys():
Expand All @@ -282,15 +282,29 @@
training_reuse_start_pref_f = jdata.get("training_reuse_start_pref_f", 100)
model_devi_activation_func = jdata.get("model_devi_activation_func", None)

if training_reuse_iter is not None and training_reuse_old_ratio is None:
raise RuntimeError(
"training_reuse_old_ratio not found but is mandatory when using init-model (training_reuse_iter is detected in param).\n"
"It defines the ratio of the old-data picking probability to the all-data(old-data plus new-data) picking probability in training after training_reuse_iter.\n"
"Denoting the index of the current iter as N (N >= training_reuse_iter ), old-data refers to those existed before the N-1 iter, and new-data refers to that obtained by the N-1 iter.\n"
"A recommended strategy is making the new-to-old ratio close to 10 times of the default value, to reasonably increase the sensitivity of the model to the new-data.\n"
"By default, the picking probability of data from one system or one iter is proportional to the number of batches (the number of frames divided by batch_size) of that systems or iter.\n"
"Detailed discussion about init-model (in Chinese) please see https://mp.weixin.qq.com/s/qsKMZ0j270YhQKvwXUiFvQ"
auto_ratio = False
if (
training_reuse_iter is not None
and isinstance(training_reuse_old_ratio, str)
and training_reuse_old_ratio.startswith("auto")
):
s = training_reuse_old_ratio.split(":")
if len(s) == 1:
new_to_old_ratio = 10.0
elif len(s) == 2:
new_to_old_ratio = float(s[1])

Check warning on line 295 in dpgen/generator/run.py

View check run for this annotation

Codecov / codecov/patch

dpgen/generator/run.py#L291-L295

Added lines #L291 - L295 were not covered by tests
else:
raise ValueError(

Check warning on line 297 in dpgen/generator/run.py

View check run for this annotation

Codecov / codecov/patch

dpgen/generator/run.py#L297

Added line #L297 was not covered by tests
"training_reuse_old_ratio is not correct, got %s"
% training_reuse_old_ratio
)
dlog.info(

Check warning on line 301 in dpgen/generator/run.py

View check run for this annotation

Codecov / codecov/patch

dpgen/generator/run.py#L301

Added line #L301 was not covered by tests
"Use automatic training_reuse_old_ratio to make new-to-old ratio close to %d times of the default value.",
training_reuse_iter,
)
auto_ratio = True
number_old_frames = 0
number_new_frames = 0

Check warning on line 307 in dpgen/generator/run.py

View check run for this annotation

Codecov / codecov/patch

dpgen/generator/run.py#L305-L307

Added lines #L305 - L307 were not covered by tests

model_devi_engine = jdata.get("model_devi_engine", "lammps")
if iter_index > 0 and _check_empty_iter(iter_index - 1, fp_task_min):
Expand Down Expand Up @@ -363,6 +377,8 @@
)
)
init_batch_size.append(detect_batch_size(ss, single_sys))
if auto_ratio:
number_old_frames += get_nframes(single_sys)

Check warning on line 381 in dpgen/generator/run.py

View check run for this annotation

Codecov / codecov/patch

dpgen/generator/run.py#L381

Added line #L381 was not covered by tests
old_range = None
if iter_index > 0:
for ii in range(iter_index):
Expand All @@ -384,6 +400,11 @@
nframes += dpdata.LabeledSystem(
sys_single, fmt="deepmd/npy"
).get_nframes()
if auto_ratio:
if ii == iter_index - 1:
number_new_frames += nframes

Check warning on line 405 in dpgen/generator/run.py

View check run for this annotation

Codecov / codecov/patch

dpgen/generator/run.py#L404-L405

Added lines #L404 - L405 were not covered by tests
else:
number_old_frames += nframes

Check warning on line 407 in dpgen/generator/run.py

View check run for this annotation

Codecov / codecov/patch

dpgen/generator/run.py#L407

Added line #L407 was not covered by tests
if nframes < fp_task_min:
log_task(
"nframes (%d) in data sys %s is too small, skip" % (nframes, jj)
Expand Down Expand Up @@ -452,6 +473,10 @@
"DP-GEN currently only supports for DeePMD-kit 1.x or 2.x version!"
)
# set training reuse model
if auto_ratio:
training_reuse_old_ratio = number_old_frames / (

Check warning on line 477 in dpgen/generator/run.py

View check run for this annotation

Codecov / codecov/patch

dpgen/generator/run.py#L477

Added line #L477 was not covered by tests
number_old_frames + number_new_frames * new_to_old_ratio
)
if training_reuse_iter is not None and iter_index >= training_reuse_iter:
if "numb_steps" in jinput["training"] and training_reuse_stop_batch is not None:
jinput["training"]["numb_steps"] = training_reuse_stop_batch
Expand Down