Skip to content

Commit

Permalink
Merge devel into master (#2772)
Browse files Browse the repository at this point in the history
  • Loading branch information
wanghan-iapcm committed Aug 31, 2023
2 parents 53a1078 + 835d6e5 commit 6cf7544
Show file tree
Hide file tree
Showing 102 changed files with 1,277 additions and 916 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
name: Setup QEMU
if: matrix.platform_id == 'manylinux_aarch64'
- name: Build wheels
uses: pypa/cibuildwheel@v2.14
uses: pypa/cibuildwheel@v2.15
env:
CIBW_BUILD_VERBOSITY: 1
CIBW_ARCHS: all
Expand Down
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ repos:
- id: end-of-file-fixer
exclude: "^.+\\.pbtxt$"
- id: check-yaml
#- id: check-json
- id: check-json
- id: check-added-large-files
args: ['--maxkb=1024', '--enforce-all']
# TODO: remove the following after resolved
Expand All @@ -33,7 +33,7 @@ repos:
files: \.py$
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.0.280
rev: v0.0.286
hooks:
- id: ruff
args: ["--fix"]
Expand All @@ -45,7 +45,7 @@ repos:
args: ["--write"]
# Python inside docs
- repo: https://github.com/asottile/blacken-docs
rev: 1.15.0
rev: 1.16.0
hooks:
- id: blacken-docs
# C++
Expand All @@ -72,7 +72,7 @@ repos:
#- id: cmake-lint
# license header
- repo: https://github.com/Lucas-C/pre-commit-hooks
rev: v1.5.1
rev: v1.5.4
hooks:
# C++, js
- id: insert-license
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ A full [document](doc/train/train-input-auto.rst) on options in the training inp
- [Descriptor `"se_e2_r"`](doc/model/train-se-e2-r.md)
- [Descriptor `"se_e3"`](doc/model/train-se-e3.md)
- [Descriptor `"se_atten"`](doc/model/train-se-atten.md)
- [Descriptor `"se_atten_v2"`](doc/model/train-se-atten.md#descriptor-se_atten_v2)
- [Descriptor `"hybrid"`](doc/model/train-hybrid.md)
- [Descriptor `sel`](doc/model/sel.md)
- [Fit energy](doc/model/train-energy.md)
Expand Down
4 changes: 4 additions & 0 deletions deepmd/descriptor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from .se_atten import (
DescrptSeAtten,
)
from .se_atten_v2 import (
DescrptSeAttenV2,
)
from .se_r import (
DescrptSeR,
)
Expand All @@ -41,6 +44,7 @@
"DescrptSeAEfLower",
"DescrptSeAMask",
"DescrptSeAtten",
"DescrptSeAttenV2",
"DescrptSeR",
"DescrptSeT",
]
52 changes: 49 additions & 3 deletions deepmd/descriptor/se_atten.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ class DescrptSeAtten(DescrptSeA):
Whether to mask the diagonal in the attention weights.
multi_task
If the model has multi fitting nets to train.
stripped_type_embedding
Whether to strip the type embedding into a separated embedding network.
Default value will be True in `se_atten_v2` descriptor.
smooth_type_embdding
When using stripped type embedding, whether to dot smooth factor on the network output of type embedding
to keep the network smooth, instead of setting `set_davg_zero` to be True.
Default value will be True in `se_atten_v2` descriptor.
"""

def __init__(
Expand All @@ -133,9 +140,10 @@ def __init__(
attn_mask: bool = False,
multi_task: bool = False,
stripped_type_embedding: bool = False,
smooth_type_embdding: bool = False,
**kwargs,
) -> None:
if not set_davg_zero:
if not set_davg_zero and not (stripped_type_embedding and smooth_type_embdding):
warnings.warn(
"Set 'set_davg_zero' False in descriptor 'se_atten' "
"may cause unexpected incontinuity during model inference!"
Expand Down Expand Up @@ -166,6 +174,7 @@ def __init__(
"2"
), "se_atten only support tensorflow version 2.0 or higher."
self.stripped_type_embedding = stripped_type_embedding
self.smooth = smooth_type_embdding
self.ntypes = ntypes
self.att_n = attn
self.attn_layer = attn_layer
Expand Down Expand Up @@ -607,6 +616,7 @@ def build(
sel_a=self.sel_all_a,
sel_r=self.sel_all_r,
)

self.nei_type_vec = tf.reshape(self.nei_type_vec, [-1])
self.nmask = tf.cast(
tf.reshape(self.nmask, [-1, 1, self.sel_all_a[0]]),
Expand All @@ -625,6 +635,41 @@ def build(
tf.slice(atype, [0, 0], [-1, natoms[0]]), [-1]
) ## lammps will have error without this
self._identity_tensors(suffix=suffix)
if self.smooth:
self.sliced_avg = tf.reshape(
tf.slice(
tf.reshape(self.t_avg, [self.ntypes, -1, 4]), [0, 0, 0], [-1, 1, 1]
),
[self.ntypes, 1],
)
self.sliced_std = tf.reshape(
tf.slice(
tf.reshape(self.t_std, [self.ntypes, -1, 4]), [0, 0, 0], [-1, 1, 1]
),
[self.ntypes, 1],
)
self.avg_looked_up = tf.reshape(
tf.nn.embedding_lookup(self.sliced_avg, self.atype_nloc),
[-1, natoms[0], 1],
)
self.std_looked_up = tf.reshape(
tf.nn.embedding_lookup(self.sliced_std, self.atype_nloc),
[-1, natoms[0], 1],
)
self.recovered_r = (
tf.reshape(
tf.slice(tf.reshape(self.descrpt, [-1, 4]), [0, 0], [-1, 1]),
[-1, natoms[0], self.sel_all_a[0]],
)
* self.std_looked_up
+ self.avg_looked_up
)
uu = 1 - self.rcut_r_smth * self.recovered_r
self.recovered_switch = -uu * uu * uu + 1
self.recovered_switch = tf.clip_by_value(self.recovered_switch, 0.0, 1.0)
self.recovered_switch = tf.cast(
self.recovered_switch, self.filter_precision
)

self.dout, self.qmat = self._pass_filter(
self.descrpt_reshape,
Expand Down Expand Up @@ -1146,9 +1191,10 @@ def _filter_lower(
two_embd = tf.nn.embedding_lookup(
embedding_of_two_side_type_embedding, index_of_two_side
)

if self.smooth:
two_embd = two_embd * tf.reshape(self.recovered_switch, [-1, 1])
if not self.compress:
xyz_scatter = xyz_scatter * two_embd + two_embd
xyz_scatter = xyz_scatter * two_embd + xyz_scatter
else:
return op_module.tabulate_fusion_se_atten(
tf.cast(self.table.data[net], self.filter_precision),
Expand Down
115 changes: 115 additions & 0 deletions deepmd/descriptor/se_atten_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import logging
from typing import (
List,
Optional,
)

from .descriptor import (
Descriptor,
)
from .se_atten import (
DescrptSeAtten,
)

log = logging.getLogger(__name__)


@Descriptor.register("se_atten_v2")
class DescrptSeAttenV2(DescrptSeAtten):
r"""Smooth version 2.0 descriptor with attention.
Parameters
----------
rcut
The cut-off radius :math:`r_c`
rcut_smth
From where the environment matrix should be smoothed :math:`r_s`
sel : list[str]
sel[i] specifies the maxmum number of type i atoms in the cut-off radius
neuron : list[int]
Number of neurons in each hidden layers of the embedding net :math:`\mathcal{N}`
axis_neuron
Number of the axis neuron :math:`M_2` (number of columns of the sub-matrix of the embedding matrix)
resnet_dt
Time-step `dt` in the resnet construction:
y = x + dt * \phi (Wx + b)
trainable
If the weights of embedding net are trainable.
seed
Random seed for initializing the network parameters.
type_one_side
Try to build N_types embedding nets. Otherwise, building N_types^2 embedding nets
exclude_types : List[List[int]]
The excluded pairs of types which have no interaction with each other.
For example, `[[0, 1]]` means no interaction between type 0 and type 1.
set_davg_zero
Set the shift of embedding net input to zero.
activation_function
The activation function in the embedding net. Supported options are |ACTIVATION_FN|
precision
The precision of the embedding net parameters. Supported options are |PRECISION|
uniform_seed
Only for the purpose of backward compatibility, retrieves the old behavior of using the random seed
attn
The length of hidden vector during scale-dot attention computation.
attn_layer
The number of layers in attention mechanism.
attn_dotr
Whether to dot the relative coordinates on the attention weights as a gated scheme.
attn_mask
Whether to mask the diagonal in the attention weights.
multi_task
If the model has multi fitting nets to train.
"""

def __init__(
self,
rcut: float,
rcut_smth: float,
sel: int,
ntypes: int,
neuron: List[int] = [24, 48, 96],
axis_neuron: int = 8,
resnet_dt: bool = False,
trainable: bool = True,
seed: Optional[int] = None,
type_one_side: bool = True,
set_davg_zero: bool = False,
exclude_types: List[List[int]] = [],
activation_function: str = "tanh",
precision: str = "default",
uniform_seed: bool = False,
attn: int = 128,
attn_layer: int = 2,
attn_dotr: bool = True,
attn_mask: bool = False,
multi_task: bool = False,
**kwargs,
) -> None:
DescrptSeAtten.__init__(
self,
rcut,
rcut_smth,
sel,
ntypes,
neuron=neuron,
axis_neuron=axis_neuron,
resnet_dt=resnet_dt,
trainable=trainable,
seed=seed,
type_one_side=type_one_side,
set_davg_zero=set_davg_zero,
exclude_types=exclude_types,
activation_function=activation_function,
precision=precision,
uniform_seed=uniform_seed,
attn=attn,
attn_layer=attn_layer,
attn_dotr=attn_dotr,
attn_mask=attn_mask,
multi_task=multi_task,
stripped_type_embedding=True,
smooth_type_embdding=True,
**kwargs,
)
43 changes: 34 additions & 9 deletions deepmd/entrypoints/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test(
set_prefix : str
string prefix of set
numb_test : int
munber of tests to do
munber of tests to do. 0 means all data.
rand_seed : Optional[int]
seed for random generator
shuffle_test : bool
Expand All @@ -88,6 +88,9 @@ def test(
RuntimeError
if no valid system was found
"""
if numb_test == 0:
# only float has inf, but should work for min
numb_test = float("inf")
if datafile is not None:
datalist = open(datafile)
all_sys = datalist.read().splitlines()
Expand Down Expand Up @@ -934,18 +937,40 @@ def test_dipole(

if detail_file is not None:
detail_path = Path(detail_file)
if not atomic:
pe = np.concatenate(
(
np.reshape(test_data["dipole"][:numb_test], [-1, 3]),
np.reshape(dipole, [-1, 3]),
),
axis=1,
)
header_text = "data_x data_y data_z pred_x pred_y pred_z"
else:
pe = np.concatenate(
(
np.reshape(
test_data["atomic_dipole"][:numb_test], [-1, 3 * sel_natoms]
),
np.reshape(dipole, [-1, 3 * sel_natoms]),
),
axis=1,
)
header_text = [
f"{letter}{number}"
for number in range(1, sel_natoms + 1)
for letter in ["data_x", "data_y", "data_z"]
] + [
f"{letter}{number}"
for number in range(1, sel_natoms + 1)
for letter in ["pred_x", "pred_y", "pred_z"]
]
header_text = " ".join(header_text)

pe = np.concatenate(
(
np.reshape(test_data["dipole"][:numb_test], [-1, 3]),
np.reshape(dipole, [-1, 3]),
),
axis=1,
)
np.savetxt(
detail_path.with_suffix(".out"),
pe,
header="data_x data_y data_z pred_x pred_y pred_z",
header=header_text,
)
return {"rmse": (rmse_f, dipole.size)}

Expand Down
17 changes: 14 additions & 3 deletions deepmd/entrypoints/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def get_min_nbor_dist(jdata, rcut):


def parse_auto_sel(sel):
if type(sel) is not str:
if not isinstance(sel, str):
return False
words = sel.split(":")
if words[0] == "auto":
Expand Down Expand Up @@ -476,7 +476,15 @@ def update_one_sel(jdata, descriptor):
if descriptor["type"] == "loc_frame":
return descriptor
rcut = descriptor["rcut"]
tmp_sel = get_sel(jdata, rcut, one_type=descriptor["type"] in ("se_atten",))
tmp_sel = get_sel(
jdata,
rcut,
one_type=descriptor["type"]
in (
"se_atten",
"se_atten_v2",
),
)
sel = descriptor["sel"]
if isinstance(sel, int):
# convert to list and finnally convert back to int
Expand All @@ -495,7 +503,10 @@ def update_one_sel(jdata, descriptor):
"not less than %d, but you set it to %d. The accuracy"
" of your model may get worse." % (ii, tt, dd)
)
if descriptor["type"] in ("se_atten",):
if descriptor["type"] in (
"se_atten",
"se_atten_v2",
):
descriptor["sel"] = sel = sum(sel)
return descriptor

Expand Down

0 comments on commit 6cf7544

Please sign in to comment.