Skip to content

Commit

Permalink
Clean TODOs and convert them into issues (#3519)
Browse files Browse the repository at this point in the history
About 50+ issues will be created after this PR is merged.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
njzjz and pre-commit-ci[bot] committed Mar 19, 2024
1 parent 9b6042b commit 76371ce
Show file tree
Hide file tree
Showing 37 changed files with 68 additions and 64 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/todo.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: TODO workflow
on:
push:
branches:
- devel
jobs:
build:
if: github.repository_owner == 'deepmodeling'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run tdg-github-action
uses: ribtoks/tdg-github-action@master
with:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPO: ${{ github.repository }}
SHA: ${{ github.sha }}
REF: ${{ github.ref }}
EXCLUDE_PATTERN: "(source/3rdparty|.git)/.*"
COMMENT_ON_ISSUES: 1
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ repos:
- id: check-json
- id: check-added-large-files
args: ['--maxkb=1024', '--enforce-all']
# TODO: remove the following after resolved
exclude: |
(?x)^(
source/tests/infer/dipolecharge_e.pbtxt|
Expand Down
14 changes: 8 additions & 6 deletions deepmd/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@
)


# TODO this is not a good way to do things. This is some global variable to which
# TODO anyone can write and there is no good way to keep track of the changes
# TODO: refactor data_requirement to make it not a global variable
# this is not a good way to do things. This is some global variable to which
# anyone can write and there is no good way to keep track of the changes
data_requirement = {}


Expand Down Expand Up @@ -180,9 +181,10 @@ def make_default_mesh(pbc: bool, mixed_type: bool) -> np.ndarray:
return default_mesh


# TODO maybe rename this to j_deprecated and only warn about deprecated keys,
# TODO if the deprecated_key argument is left empty function puppose is only custom
# TODO error since dict[key] already raises KeyError when the key is missing
# TODO: rename j_must_have to j_deprecated and only warn about deprecated keys
# maybe rename this to j_deprecated and only warn about deprecated keys,
# if the deprecated_key argument is left empty function puppose is only custom
# error since dict[key] already raises KeyError when the key is missing
def j_must_have(
jdata: Dict[str, "_DICT_VAL"], key: str, deprecated_key: List[str] = []
) -> "_DICT_VAL":
Expand Down Expand Up @@ -238,7 +240,7 @@ def j_loader(filename: Union[str, Path]) -> Dict[str, Any]:
raise TypeError("config file must be json, or yaml/yml")


# TODO port completely to pathlib when all callers are ported
# TODO port expand_sys_str completely to pathlib when all callers are ported
def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
"""Recursively iterate over directories taking those that contain `type.raw` file.
Expand Down
3 changes: 2 additions & 1 deletion deepmd/dpmodel/fitting/general_fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,8 @@ def _call_common(
)
xx = descriptor
if self.remove_vaccum_contribution is not None:
# TODO: Idealy, the input for vaccum should be computed;
# TODO: comput the input for vaccum when setting remove_vaccum_contribution
# Idealy, the input for vaccum should be computed;
# we consider it as always zero for convenience.
# Needs a compute_input_stats for vaccum passed from the
# descriptor.
Expand Down
3 changes: 2 additions & 1 deletion deepmd/dpmodel/utils/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ def __call__(self):
return self.count


# TODO: should be moved to otherwhere...
# TODO: move save_dp_model and load_dp_model to a seperated module
# should be moved to otherwhere...
def save_dp_model(filename: str, model_dict: dict) -> None:
"""Save a DP model to a file in the native format.
Expand Down
2 changes: 1 addition & 1 deletion deepmd/dpmodel/utils/update_sel.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ def neighbor_stat(self) -> Type[NeighborStat]:
return NeighborStat

def hook(self, min_nbor_dist, max_nbor_size):
# TODO: save to the model
# TODO: save to the model in UpdateSel.hook
pass
4 changes: 1 addition & 3 deletions deepmd/pt/entrypoints/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,7 @@ def freeze(FLAGS):
torch.jit.save(
model,
FLAGS.output,
{
# TODO: _extra_files
},
{},
)


Expand Down
2 changes: 1 addition & 1 deletion deepmd/pt/loss/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __init__(
self.has_f = (start_pref_f != 0.0 and limit_pref_f != 0.0) or inference
self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference

# TODO need support for atomic energy and atomic pref
# TODO EnergyStdLoss need support for atomic energy and atomic pref
self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference
self.has_pf = (start_pref_pf != 0.0 and limit_pref_pf != 0.0) or inference

Expand Down
2 changes: 1 addition & 1 deletion deepmd/pt/loss/ener_spin.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def __init__(
self.has_fr = (start_pref_fr != 0.0 and limit_pref_fr != 0.0) or inference
self.has_fm = (start_pref_fm != 0.0 and limit_pref_fm != 0.0) or inference

# TODO need support for virial, atomic energy and atomic pref
# TODO EnergySpinLoss needs support for virial, atomic energy and atomic pref
self.has_v = (start_pref_v != 0.0 and limit_pref_v != 0.0) or inference
self.has_ae = (start_pref_ae != 0.0 and limit_pref_ae != 0.0) or inference
self.has_pf = (start_pref_pf != 0.0 and limit_pref_pf != 0.0) or inference
Expand Down
3 changes: 2 additions & 1 deletion deepmd/pt/model/task/fitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,8 @@ def _forward_common(
):
xx = descriptor
if self.remove_vaccum_contribution is not None:
# TODO: Idealy, the input for vaccum should be computed;
# TODO: compute the input for vaccm when remove_vaccum_contribution is set
# Idealy, the input for vaccum should be computed;
# we consider it as always zero for convenience.
# Needs a compute_input_stats for vaccum passed from the
# descriptor.
Expand Down
6 changes: 4 additions & 2 deletions deepmd/pt/train/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,14 +558,16 @@ def get_loss(loss_params, start_lr, _ntypes, _model):
output_device=LOCAL_RANK,
)

# TODO ZD add lr warmups for multitask
# TODO add lr warmups for multitask
# author: iProzd
def warm_up_linear(step, warmup_steps):
if step < warmup_steps:
return step / warmup_steps
else:
return self.lr_exp.value(step - warmup_steps) / self.lr_exp.start_lr

# TODO ZD add optimizers for multitask
# TODO add optimizers for multitask
# author: iProzd
if self.opt_type == "Adam":
self.optimizer = torch.optim.Adam(
self.wrapper.parameters(), lr=self.lr_exp.start_lr
Expand Down
2 changes: 1 addition & 1 deletion deepmd/pt/utils/update_sel.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ def neighbor_stat(self) -> Type[NeighborStat]:
return NeighborStat

def hook(self, min_nbor_dist, max_nbor_size):
# TODO: save to the model
# TODO: save to the model in UpdateSel.hook
pass
5 changes: 0 additions & 5 deletions deepmd/tf/descriptor/descriptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,6 @@ def get_dim_rot_mat_1(self) -> int:
int
the first dimension of the rotation matrix
"""
# TODO: I think this method should be implemented as it's called by dipole and
# polar fitting network. However, currently not all descriptors have this
# method.
raise NotImplementedError

def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
Expand All @@ -121,8 +118,6 @@ def get_nlist(self) -> Tuple[tf.Tensor, tf.Tensor, List[int], List[int]]:
sel_r : list[int]
The number of neighbors with only radial information
"""
# TODO: I think this method should be implemented as it's called by energy
# model. However, se_ar and hybrid doesn't have this method.
raise NotImplementedError

@abstractmethod
Expand Down
3 changes: 2 additions & 1 deletion deepmd/tf/descriptor/se_a.py
Original file line number Diff line number Diff line change
Expand Up @@ -1426,7 +1426,8 @@ def serialize(self, suffix: str = "") -> dict:
raise NotImplementedError("spin is unsupported")
assert self.davg is not None
assert self.dstd is not None
# TODO: not sure how to handle type embedding - type embedding is not a model parameter,
# TODO: tf: handle type embedding in DescrptSeA.serialize
# not sure how to handle type embedding - type embedding is not a model parameter,
# but instead a part of the input data. Maybe the interface should be refactored...

return {
Expand Down
7 changes: 3 additions & 4 deletions deepmd/tf/descriptor/se_a_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,10 +249,9 @@ def compute_input_stats(
**kwargs
Additional keyword arguments.
"""
"""
TODO: Since not all input atoms are real in se_a_mask,
statistics should be reimplemented for se_a_mask descriptor.
"""
# TODO: implement compute_input_stats for DescrptSeAMask
# Since not all input atoms are real in se_a_mask,
# statistics should be reimplemented for se_a_mask descriptor.

self.davg = None
self.dstd = None
Expand Down
3 changes: 2 additions & 1 deletion deepmd/tf/descriptor/se_r.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,7 +766,8 @@ def serialize(self, suffix: str = "") -> dict:
raise NotImplementedError("spin is unsupported")
assert self.davg is not None
assert self.dstd is not None
# TODO: not sure how to handle type embedding - type embedding is not a model parameter,
# TODO: tf: handle type embedding in DescrptSeR.serialize
# not sure how to handle type embedding - type embedding is not a model parameter,
# but instead a part of the input data. Maybe the interface should be refactored...
return {
"@class": "Descriptor",
Expand Down
3 changes: 2 additions & 1 deletion deepmd/tf/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ def dlopen_library(module: str, filename: str):
r"(final)_layer_type_(\d+)/(matrix)|"
r"(final)_layer/(bias)|"
r"(final)_layer_type_(\d+)/(bias)|"
# TODO: not sure how to parse for shared layers...
# TODO: supporting extracting parameters for shared layers
# not sure how to parse for shared layers...
# layer_name
r"share_.+_type_\d/matrix|"
r"share_.+_type_\d/bias|"
Expand Down
4 changes: 2 additions & 2 deletions deepmd/tf/fit/dipole.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def serialize(self, suffix: str) -> dict:
"dim_descrpt": self.dim_descrpt,
"embedding_width": self.dim_rot_mat_1,
# very bad design: type embedding is not passed to the class
# TODO: refactor the class
# TODO: refactor the class for type embedding and dipole fitting
"mixed_types": False,
"dim_out": 3,
"neuron": self.n_neuron,
Expand All @@ -365,7 +365,7 @@ def serialize(self, suffix: str) -> dict:
"exclude_types": [],
"nets": self.serialize_network(
ntypes=self.ntypes,
# TODO: consider type embeddings
# TODO: consider type embeddings in dipole fitting
ndim=1,
in_dim=self.dim_descrpt,
out_dim=self.dim_rot_mat_1,
Expand Down
4 changes: 2 additions & 2 deletions deepmd/tf/fit/dos.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,7 @@ def serialize(self, suffix: str = "") -> dict:
"ntypes": self.ntypes,
"dim_descrpt": self.dim_descrpt,
# very bad design: type embedding is not passed to the class
# TODO: refactor the class
# TODO: refactor the class for DOSFitting and type embedding
"mixed_types": False,
"dim_out": self.numb_dos,
"neuron": self.n_neuron,
Expand All @@ -715,7 +715,7 @@ def serialize(self, suffix: str = "") -> dict:
"exclude_types": [],
"nets": self.serialize_network(
ntypes=self.ntypes,
# TODO: consider type embeddings
# TODO: consider type embeddings for DOSFitting
ndim=1,
in_dim=self.dim_descrpt + self.numb_fparam + self.numb_aparam,
out_dim=self.numb_dos,
Expand Down
4 changes: 2 additions & 2 deletions deepmd/tf/fit/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,7 @@ def serialize(self, suffix: str = "") -> dict:
"ntypes": self.ntypes,
"dim_descrpt": self.dim_descrpt,
# very bad design: type embedding is not passed to the class
# TODO: refactor the class
# TODO: refactor the class for energy fitting and type embedding
"mixed_types": False,
"dim_out": 1,
"neuron": self.n_neuron,
Expand All @@ -912,7 +912,7 @@ def serialize(self, suffix: str = "") -> dict:
"exclude_types": [],
"nets": self.serialize_network(
ntypes=self.ntypes,
# TODO: consider type embeddings
# TODO: consider type embeddings for type embedding
ndim=1,
in_dim=self.dim_descrpt + self.numb_fparam + self.numb_aparam,
neuron=self.n_neuron,
Expand Down
4 changes: 2 additions & 2 deletions deepmd/tf/fit/polar.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ def serialize(self, suffix: str) -> dict:
"dim_descrpt": self.dim_descrpt,
"embedding_width": self.dim_rot_mat_1,
# very bad design: type embedding is not passed to the class
# TODO: refactor the class
# TODO: refactor the class for polar fitting and type embedding
"mixed_types": False,
"dim_out": 3,
"neuron": self.n_neuron,
Expand All @@ -558,7 +558,7 @@ def serialize(self, suffix: str) -> dict:
"shift_diag": self.shift_diag,
"nets": self.serialize_network(
ntypes=self.ntypes,
# TODO: consider type embeddings
# TODO: consider type embeddings for polar fitting
ndim=1,
in_dim=self.dim_descrpt,
out_dim=self.dim_rot_mat_1,
Expand Down
1 change: 0 additions & 1 deletion deepmd/tf/infer/deep_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,6 @@ def eval_full(
if ghost_map is not None:
# add the value of ghost atoms to real atoms
force = np.reshape(force, [nframes * nout, -1, 3])
# TODO: is there some way not to use for loop?
for ii in range(nframes * nout):
np.add.at(force[ii], ghost_map, force[ii, nloc:])
if atomic:
Expand Down
1 change: 0 additions & 1 deletion deepmd/tf/loss/ener.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ def __init__(
"atom_pref", 1, atomic=True, must=False, high_prec=False, repeat=3
)
# drdq: the partial derivative of atomic coordinates w.r.t. generalized coordinates
# TODO: could numb_generalized_coord decided from the training data?
if self.has_gf > 0:
add_data_requirement(
"drdq",
Expand Down
1 change: 0 additions & 1 deletion deepmd/tf/model/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def __init__(self, models: List[dict], weights: List[float], **kwargs):
self.weights = [1 / len(models) for _ in range(len(models))]
elif weights == "sum":
self.weights = [1 for _ in range(len(models))]
# TODO: add more weights, for example, so-called committee models
else:
raise ValueError(f"Invalid weights {weights}")

Expand Down
2 changes: 0 additions & 2 deletions deepmd/tf/train/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,6 @@ def build(self, data=None, stop_batch=0, origin_type_map=None, suffix=""):
)

# neighbor_stat is moved to train.py as duplicated
# TODO: this is a simple fix but we should have a clear
# architecture to call neighbor stat
else:
self.model.enable_compression()

Expand Down
2 changes: 0 additions & 2 deletions deepmd/tf/utils/batch_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,4 @@ def is_oom_error(self, e: Exception) -> bool:
e : Exception
Exception
"""
# TODO: it's very slow to catch OOM error; I don't know what TF is doing here
# but luckily we only need to catch once
return isinstance(e, (tf.errors.ResourceExhaustedError, OutOfMemoryError))
2 changes: 1 addition & 1 deletion deepmd/tf/utils/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def replace_model_params_with_pretrained_model(
):
target_para = pretrained_jdata["model"][config_key]
cur_para = jdata["model"][config_key]
# keep some params that are irrelevant to model structures (need to discuss) TODO
# TODO: keep some params that are irrelevant to model structures (need to discuss)
if "trainable" in cur_para.keys():
target_para["trainable"] = cur_para["trainable"]
log.info(f"Change the '{config_key}' from {cur_para!s} to {target_para!s}.")
Expand Down
1 change: 0 additions & 1 deletion deepmd/tf/utils/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
)


# TODO (JZ): I think in this file we can merge some duplicated lines into one method...
def load_graph_def(model_file: str) -> Tuple[tf.Graph, tf.GraphDef]:
"""Load graph as well as the graph_def from the frozen model(model_file).
Expand Down
2 changes: 1 addition & 1 deletion deepmd/tf/utils/multi_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def replace_model_params_with_frz_multi_model(
def _change_sub_config(jdata: Dict[str, Any], src_jdata: Dict[str, Any], sub_key: str):
target_para = src_jdata[sub_key]
cur_para = jdata[sub_key]
# keep some params that are irrelevant to model structures (need to discuss) TODO
# TODO: keep some params that are irrelevant to model structures (need to discuss)
if "trainable" in cur_para.keys():
target_para["trainable"] = cur_para["trainable"]
log.info(f"Change the '{sub_key}' from {cur_para!s} to {target_para!s}.")
Expand Down
2 changes: 0 additions & 2 deletions deepmd/tf/utils/update_sel.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ def neighbor_stat(self) -> Type[NeighborStat]:

def hook(self, min_nbor_dist, max_nbor_size):
# moved from traier.py as duplicated
# TODO: this is a simple fix but we should have a clear
# architecture to call neighbor stat
tf.constant(
min_nbor_dist,
name="train_attr/min_nbor_dist",
Expand Down
1 change: 0 additions & 1 deletion deepmd/utils/batch_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class AutoBatchSize(ABC):

def __init__(self, initial_batch_size: int = 1024, factor: float = 2.0) -> None:
# See also PyTorchLightning/pytorch-lightning#1638
# TODO: discuss a proper initial batch size
self.current_batch_size = initial_batch_size
DP_INFER_BATCH_SIZE = int(os.environ.get("DP_INFER_BATCH_SIZE", 0))
if DP_INFER_BATCH_SIZE > 0:
Expand Down
1 change: 0 additions & 1 deletion deepmd/utils/data_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,6 @@ def print_summary(
% (
_format_name_length(system_dirs[ii], sys_width),
natoms[ii],
# TODO batch size * nbatches = number of structures
batch_size[ii],
nbatches[ii],
sys_probs[ii],
Expand Down
2 changes: 0 additions & 2 deletions deepmd/utils/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def __new__(cls, path: str, mode: str = "r"):
return super().__new__(DPOSPath)
elif os.path.isfile(path.split("#")[0]):
# assume h5 if it is not dir
# TODO: check if it is a real h5? or just check suffix?
return super().__new__(DPH5Path)
raise FileNotFoundError("%s not found" % path)
return super().__new__(cls)
Expand Down Expand Up @@ -217,7 +216,6 @@ def glob(self, pattern: str) -> List["DPPath"]:
list of paths
"""
# currently DPOSPath will only derivative DPOSPath
# TODO: discuss if we want to mix DPOSPath and DPH5Path?
return [type(self)(p, mode=self.mode) for p in self.path.glob(pattern)]

def rglob(self, pattern: str) -> List["DPPath"]:
Expand Down

0 comments on commit 76371ce

Please sign in to comment.