Skip to content

Commit

Permalink
feat(pt/tf/dp): support econf type embedding (#3781)
Browse files Browse the repository at this point in the history
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced `use_econf_tebd` and `type_map` parameters for advanced
electronic configuration type embedding.
  - Enhanced serialization to support new parameters.
  
- **Bug Fixes**
- Improved handling of type embedding configurations to ensure
consistency.

- **Documentation**
  - Updated documentation to include new parameters and their usage.

These updates provide more flexibility and control over electronic
configuration type embedding, enhancing the model's configurability and
performance.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: Duo <50307526+iProzd@users.noreply.github.com>
Co-authored-by: Han Wang <92130845+wanghan-iapcm@users.noreply.github.com>
  • Loading branch information
iProzd and wanghan-iapcm committed May 16, 2024
1 parent a676bf2 commit f6489c0
Show file tree
Hide file tree
Showing 19 changed files with 447 additions and 132 deletions.
14 changes: 14 additions & 0 deletions deepmd/dpmodel/descriptor/dpa1.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,12 @@ class DescrptDPA1(NativeOP, BaseDescriptor):
Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'.
Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'.
The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
Only used if `use_econf_tebd` is `True` in type embedding net.
spin
(Only support None to keep consistent with other backend references.)
(Not used in this version. Not-none option is not implemented.)
Expand Down Expand Up @@ -242,6 +248,8 @@ def __init__(
concat_output_tebd: bool = True,
spin: Optional[Any] = None,
stripped_type_embedding: Optional[bool] = None,
use_econf_tebd: bool = False,
type_map: Optional[List[str]] = None,
# consistent with argcheck, not used though
seed: Optional[int] = None,
) -> None:
Expand Down Expand Up @@ -287,12 +295,16 @@ def __init__(
trainable_ln=trainable_ln,
ln_eps=ln_eps,
)
self.use_econf_tebd = use_econf_tebd
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes=ntypes,
neuron=[tebd_dim],
padding=True,
activation_function="Linear",
precision=precision,
use_econf_tebd=use_econf_tebd,
type_map=type_map,
)
self.tebd_dim = tebd_dim
self.concat_output_tebd = concat_output_tebd
Expand Down Expand Up @@ -457,6 +469,8 @@ def serialize(self) -> dict:
"smooth_type_embedding": obj.smooth,
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
"type_map": self.type_map,
# make deterministic
"precision": np.dtype(PRECISION_DICT[obj.precision]).name,
"embeddings": obj.embeddings.serialize(),
Expand Down
13 changes: 13 additions & 0 deletions deepmd/dpmodel/descriptor/dpa2.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,8 @@ def __init__(
trainable: bool = True,
seed: Optional[int] = None,
add_tebd_to_repinit_out: bool = False,
use_econf_tebd: bool = False,
type_map: Optional[List[str]] = None,
):
r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
Expand Down Expand Up @@ -350,6 +352,11 @@ def __init__(
(Unused yet) Random seed for parameter initialization.
add_tebd_to_repinit_out : bool, optional
Whether to add type embedding to the output representation from repinit before inputting it into repformer.
use_econf_tebd : bool, Optional
Whether to use electronic configuration type embedding.
type_map : List[str], Optional
A list of strings. Give the name to each type of atoms.
Only used if `use_econf_tebd` is `True` in type embedding net.
Returns
-------
Expand Down Expand Up @@ -433,12 +440,16 @@ def init_subclass_params(sub_data, sub_class):
trainable_ln=self.repformer_args.trainable_ln,
ln_eps=self.repformer_args.ln_eps,
)
self.use_econf_tebd = use_econf_tebd
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes=ntypes,
neuron=[self.repinit_args.tebd_dim],
padding=True,
activation_function="Linear",
precision=precision,
use_econf_tebd=use_econf_tebd,
type_map=type_map,
)
self.concat_output_tebd = concat_output_tebd
self.precision = precision
Expand Down Expand Up @@ -641,6 +652,8 @@ def serialize(self) -> dict:
"env_protection": self.env_protection,
"trainable": self.trainable,
"add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
"use_econf_tebd": self.use_econf_tebd,
"type_map": self.type_map,
"type_embedding": self.type_embedding.serialize(),
"g1_shape_tranform": self.g1_shape_tranform.serialize(),
}
Expand Down
45 changes: 41 additions & 4 deletions deepmd/dpmodel/utils/type_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ class TypeEmbedNet(NativeOP):
Random seed for initializing the network parameters.
padding
Concat the zero padding to the output, as the default embedding of empty type.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
Only used if `use_econf_tebd` is `True` in type embedding net.
"""

def __init__(
Expand All @@ -52,6 +57,8 @@ def __init__(
trainable: bool = True,
seed: Optional[int] = None,
padding: bool = False,
use_econf_tebd: bool = False,
type_map: Optional[List[str]] = None,
) -> None:
self.ntypes = ntypes
self.neuron = neuron
Expand All @@ -61,8 +68,33 @@ def __init__(
self.activation_function = str(activation_function)
self.trainable = trainable
self.padding = padding
self.use_econf_tebd = use_econf_tebd
self.type_map = type_map
embed_input_dim = ntypes
if self.use_econf_tebd:
from deepmd.utils.econf_embd import (
ECONF_DIM,
electronic_configuration_embedding,
)
from deepmd.utils.econf_embd import type_map as periodic_table

assert (
self.type_map is not None
), "When using electronic configuration type embedding, type_map must be provided!"

missing_types = [t for t in self.type_map if t not in periodic_table]
assert not missing_types, (
"When using electronic configuration type embedding, "
"all element in type_map should be in periodic table! "
f"Found these invalid elements: {missing_types}"
)
self.econf_tebd = np.array(
[electronic_configuration_embedding[kk] for kk in self.type_map],
dtype=PRECISION_DICT[self.precision],
)
embed_input_dim = ECONF_DIM
self.embedding_net = EmbeddingNet(
ntypes,
embed_input_dim,
self.neuron,
self.activation_function,
self.resnet_dt,
Expand All @@ -71,9 +103,12 @@ def __init__(

def call(self) -> np.ndarray:
"""Compute the type embedding network."""
embed = self.embedding_net(
np.eye(self.ntypes, dtype=PRECISION_DICT[self.precision])
)
if not self.use_econf_tebd:
embed = self.embedding_net(
np.eye(self.ntypes, dtype=PRECISION_DICT[self.precision])
)
else:
embed = self.embedding_net(self.econf_tebd)
if self.padding:
embed = np.pad(embed, ((0, 1), (0, 0)), mode="constant")
return embed
Expand Down Expand Up @@ -120,5 +155,7 @@ def serialize(self) -> dict:
"activation_function": self.activation_function,
"trainable": self.trainable,
"padding": self.padding,
"use_econf_tebd": self.use_econf_tebd,
"type_map": self.type_map,
"embedding": self.embedding_net.serialize(),
}
19 changes: 18 additions & 1 deletion deepmd/pt/model/descriptor/dpa1.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ class DescrptDPA1(BaseDescriptor, torch.nn.Module):
Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'.
Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'.
The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
Only used if `use_econf_tebd` is `True` in type embedding net.
spin
(Only support None to keep consistent with other backend references.)
(Not used in this version. Not-none option is not implemented.)
Expand Down Expand Up @@ -220,6 +225,8 @@ def __init__(
smooth_type_embedding: bool = True,
type_one_side: bool = False,
stripped_type_embedding: Optional[bool] = None,
use_econf_tebd: bool = False,
type_map: Optional[List[str]] = None,
# not implemented
spin=None,
type: Optional[str] = None,
Expand Down Expand Up @@ -270,7 +277,15 @@ def __init__(
ln_eps=ln_eps,
old_impl=old_impl,
)
self.type_embedding = TypeEmbedNet(ntypes, tebd_dim, precision=precision)
self.use_econf_tebd = use_econf_tebd
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes,
tebd_dim,
precision=precision,
use_econf_tebd=use_econf_tebd,
type_map=type_map,
)
self.tebd_dim = tebd_dim
self.concat_output_tebd = concat_output_tebd
self.trainable = trainable
Expand Down Expand Up @@ -415,6 +430,8 @@ def serialize(self) -> dict:
"smooth_type_embedding": obj.smooth,
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
"type_map": self.type_map,
# make deterministic
"precision": RESERVED_PRECISON_DICT[obj.prec],
"embeddings": obj.filter_layers.serialize(),
Expand Down
17 changes: 16 additions & 1 deletion deepmd/pt/model/descriptor/dpa2.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def __init__(
trainable: bool = True,
seed: Optional[int] = None,
add_tebd_to_repinit_out: bool = False,
use_econf_tebd: bool = False,
type_map: Optional[List[str]] = None,
old_impl: bool = False,
):
r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
Expand Down Expand Up @@ -104,6 +106,11 @@ def __init__(
(Unused yet) Random seed for parameter initialization.
add_tebd_to_repinit_out : bool, optional
Whether to add type embedding to the output representation from repinit before inputting it into repformer.
use_econf_tebd : bool, Optional
Whether to use electronic configuration type embedding.
type_map : List[str], Optional
A list of strings. Give the name to each type of atoms.
Only used if `use_econf_tebd` is `True` in type embedding net.
Returns
-------
Expand Down Expand Up @@ -189,8 +196,14 @@ def init_subclass_params(sub_data, sub_class):
ln_eps=self.repformer_args.ln_eps,
old_impl=old_impl,
)
self.use_econf_tebd = use_econf_tebd
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes, self.repinit_args.tebd_dim, precision=precision
ntypes,
self.repinit_args.tebd_dim,
precision=precision,
use_econf_tebd=self.use_econf_tebd,
type_map=type_map,
)
self.concat_output_tebd = concat_output_tebd
self.precision = precision
Expand Down Expand Up @@ -368,6 +381,8 @@ def serialize(self) -> dict:
"env_protection": self.env_protection,
"trainable": self.trainable,
"add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
"use_econf_tebd": self.use_econf_tebd,
"type_map": self.type_map,
"type_embedding": self.type_embedding.embedding.serialize(),
"g1_shape_tranform": self.g1_shape_tranform.serialize(),
}
Expand Down
4 changes: 4 additions & 0 deletions deepmd/pt/model/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ def get_zbl_model(model_params):
ntypes = len(model_params["type_map"])
# descriptor
model_params["descriptor"]["ntypes"] = ntypes
if model_params["descriptor"].get("use_econf_tebd", False):
model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"])
descriptor = BaseDescriptor(**model_params["descriptor"])
# fitting
fitting_net = model_params.get("fitting_net", None)
Expand Down Expand Up @@ -152,6 +154,8 @@ def get_standard_model(model_params):
ntypes = len(model_params["type_map"])
# descriptor
model_params["descriptor"]["ntypes"] = ntypes
if model_params["descriptor"].get("use_econf_tebd", False):
model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"])
descriptor = BaseDescriptor(**model_params["descriptor"])
# fitting
fitting_net = model_params.get("fitting_net", None)
Expand Down

0 comments on commit f6489c0

Please sign in to comment.