Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finish model compression for se_r descriptor! #1361

Merged
merged 12 commits into from
Jan 11, 2022
6 changes: 4 additions & 2 deletions deepmd/descriptor/se_a.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def enable_compression(self,

for ii in range(len(self.filter_neuron) - 1):
if self.filter_neuron[ii] * 2 != self.filter_neuron[ii + 1]:
raise RecursionError(
raise NotImplementedError(
"Model Compression error: descriptor neuron [%s] is not supported by model compression! "
"The size of the next layer of the neural network must be twice the size of the previous layer."
% ','.join([str(item) for item in self.filter_neuron])
Expand Down Expand Up @@ -709,7 +709,7 @@ def _filter_lower(
xyz_scatter, nframes, natoms, type_embedding)
if self.compress:
raise RuntimeError('compression of type embedded descriptor is not supported at the moment')
# with (natom x nei_type_i) x out_size
# natom x 4 x outputs_size
if self.compress and (not is_exclude):
info = [self.lower, self.upper, self.upper * self.table_config[0], self.table_config[1], self.table_config[2], self.table_config[3]]
if self.type_one_side:
Expand All @@ -719,6 +719,7 @@ def _filter_lower(
return op_module.tabulate_fusion_se_a(tf.cast(self.table.data[net], self.filter_precision), info, xyz_scatter, tf.reshape(inputs_i, [natom, shape_i[1]//4, 4]), last_layer_size = outputs_size[-1])
else:
if (not is_exclude):
# with (natom x nei_type_i) x out_size
xyz_scatter = embedding_net(
xyz_scatter,
self.filter_neuron,
Expand All @@ -744,6 +745,7 @@ def _filter_lower(
# but if sel is zero
# [588 0] -> [147 0 4] incorrect; the correct one is [588 0 4]
# So we need to explicitly assign the shape to tf.shape(inputs_i)[0] instead of -1
# natom x 4 x outputs_size
return tf.matmul(tf.reshape(inputs_i, [natom, shape_i[1]//4, 4]), xyz_scatter, transpose_a = True)


Expand Down
63 changes: 62 additions & 1 deletion deepmd/descriptor/se_r.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from deepmd.env import GLOBAL_NP_FLOAT_PRECISION
from deepmd.env import op_module
from deepmd.env import default_tf_session_config
from deepmd.utils.tabulate import DPTabulate
from deepmd.utils.graph import load_graph_def, get_tensor_by_name_from_graph
from deepmd.utils.network import embedding_net, embedding_net_rand_seed_shift
from deepmd.utils.sess import run_sess
from .descriptor import Descriptor
Expand Down Expand Up @@ -118,6 +120,7 @@ def __init__ (self,
self.useBN = False
self.davg = None
self.dstd = None
self.compress=False
self.embedding_net_variables = None

self.place_holders = {}
Expand Down Expand Up @@ -229,6 +232,60 @@ def compute_input_stats (self,
self.davg = np.array(all_davg)
self.dstd = np.array(all_dstd)

def enable_compression(self,
min_nbor_dist : float,
model_file : str = 'frozon_model.pb',
table_extrapolate : float = 5,
table_stride_1 : float = 0.01,
table_stride_2 : float = 0.1,
check_frequency : int = -1,
suffix : str = "",
) -> None:
"""
Reveive the statisitcs (distance, max_nbor_size and env_mat_range) of the training data.

Parameters
----------
min_nbor_dist
The nearest distance between atoms
model_file
The original frozen model, which will be compressed by the program
table_extrapolate
The scale of model extrapolation
table_stride_1
The uniform stride of the first table
table_stride_2
The uniform stride of the second table
check_frequency
The overflow check frequency
suffix : str, optional
The suffix of the scope
"""
assert (
not self.filter_resnet_dt
), "Model compression error: descriptor resnet_dt must be false!"

for ii in range(len(self.filter_neuron) - 1):
if self.filter_neuron[ii] * 2 != self.filter_neuron[ii + 1]:
raise NotImplementedError(
"Model Compression error: descriptor neuron [%s] is not supported by model compression! "
"The size of the next layer of the neural network must be twice the size of the previous layer."
% ','.join([str(item) for item in self.filter_neuron])
)

self.compress = True
self.table = DPTabulate(
self, self.filter_neuron, model_file, activation_fn = self.filter_activation_fn, suffix=suffix)
self.table_config = [table_extrapolate, table_stride_1, table_stride_2, check_frequency]
self.lower, self.upper \
= self.table.build(min_nbor_dist,
table_extrapolate,
table_stride_1,
table_stride_2)

graph, _ = load_graph_def(model_file)
self.davg = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_avg' % suffix)
self.dstd = get_tensor_by_name_from_graph(graph, 'descrpt_attr%s/t_std' % suffix)

def build (self,
coord_ : tf.Tensor,
Expand Down Expand Up @@ -476,7 +533,11 @@ def _filter_r(self,
shape_i = inputs_i.get_shape().as_list()
# with (natom x nei_type_i) x 1
xyz_scatter = tf.reshape(inputs_i, [-1, 1])
if (type_input, type_i) not in self.exclude_types:
if self.compress and ((type_input, type_i) not in self.exclude_types):
info = [self.lower, self.upper, self.upper * self.table_config[0], self.table_config[1], self.table_config[2], self.table_config[3]]
net = 'filter_' + str(type_input) + '_net_' + str(type_i)
xyz_scatter = op_module.tabulate_fusion_se_r(tf.cast(self.table.data[net], self.filter_precision), info, inputs_i, last_layer_size = outputs_size[-1])
elif (type_input, type_i) not in self.exclude_types:
xyz_scatter = embedding_net(xyz_scatter,
self.filter_neuron,
self.filter_precision,
Expand Down
2 changes: 1 addition & 1 deletion deepmd/descriptor/se_t.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def enable_compression(self,

for ii in range(len(self.filter_neuron) - 1):
if self.filter_neuron[ii] * 2 != self.filter_neuron[ii + 1]:
raise RecursionError(
raise NotImplementedError(
"Model Compression error: descriptor neuron [%s] is not supported by model compression! "
"The size of the next layer of the neural network must be twice the size of the previous layer."
% ','.join([str(item) for item in self.filter_neuron])
Expand Down
91 changes: 82 additions & 9 deletions deepmd/utils/tabulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,40 @@ def __init__(self,
self.sub_graph, self.sub_graph_def = self._load_sub_graph()
self.sub_sess = tf.Session(graph = self.sub_graph)

try:
self.sel_a = self.graph.get_operation_by_name('ProdEnvMatA').get_attr('sel_a')
self.prod_env_mat_op = self.graph.get_operation_by_name ('ProdEnvMatA')
except Exception:
self.sel_a = self.graph.get_operation_by_name('DescrptSeA').get_attr('sel_a')
self.prod_env_mat_op = self.graph.get_operation_by_name ('DescrptSeA')
if isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
try:
self.sel_a = self.graph.get_operation_by_name('ProdEnvMatR').get_attr('sel')
self.prod_env_mat_op = self.graph.get_operation_by_name ('ProdEnvMatR')
except KeyError:
self.sel_a = self.graph.get_operation_by_name('DescrptSeR').get_attr('sel')
self.prod_env_mat_op = self.graph.get_operation_by_name ('DescrptSeR')
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeA):
try:
self.sel_a = self.graph.get_operation_by_name('ProdEnvMatA').get_attr('sel_a')
self.prod_env_mat_op = self.graph.get_operation_by_name ('ProdEnvMatA')
except KeyError:
self.sel_a = self.graph.get_operation_by_name('DescrptSeA').get_attr('sel_a')
self.prod_env_mat_op = self.graph.get_operation_by_name ('DescrptSeA')
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think DescrptSeA and DescrptSeT are the same here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, they are the same. But considering the possible changes later, I have explicitly distinguished them.

try:
self.sel_a = self.graph.get_operation_by_name('ProdEnvMatA').get_attr('sel_a')
self.prod_env_mat_op = self.graph.get_operation_by_name ('ProdEnvMatA')
except KeyError:
self.sel_a = self.graph.get_operation_by_name('DescrptSeA').get_attr('sel_a')
self.prod_env_mat_op = self.graph.get_operation_by_name ('DescrptSeA')
else:
raise RuntimeError("Unsupported descriptor")

self.davg = get_tensor_by_name_from_graph(self.graph, f'descrpt_attr{self.suffix}/t_avg')
self.dstd = get_tensor_by_name_from_graph(self.graph, f'descrpt_attr{self.suffix}/t_std')
self.ntypes = get_tensor_by_name_from_graph(self.graph, 'descrpt_attr/ntypes')


self.rcut = self.prod_env_mat_op.get_attr('rcut_r')
self.rcut_smth = self.prod_env_mat_op.get_attr('rcut_r_smth')
if isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
self.rcut = self.prod_env_mat_op.get_attr('rcut')
self.rcut_smth = self.prod_env_mat_op.get_attr('rcut_smth')
else:
self.rcut = self.prod_env_mat_op.get_attr('rcut_r')
self.rcut_smth = self.prod_env_mat_op.get_attr('rcut_r_smth')

self.embedding_net_nodes = get_embedding_net_nodes_from_graph_def(self.graph_def, suffix=self.suffix)

Expand Down Expand Up @@ -172,6 +192,21 @@ def build(self,
net = "filter_" + str(ii) + "_net_" + str(jj)
self._build_lower(net, xx, idx, upper, lower, stride0, stride1, extrapolate)
idx += 1
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
xx = np.arange(lower, upper, stride0, dtype = self.data_type)
xx = np.append(xx, np.arange(upper, extrapolate * upper, stride1, dtype = self.data_type))
xx = np.append(xx, np.array([extrapolate * upper], dtype = self.data_type))
self.nspline = int((upper - lower) / stride0 + (extrapolate * upper - upper) / stride1)
for ii in range(self.table_size):
if self.type_one_side or (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
if self.type_one_side:
net = "filter_-1_net_" + str(ii)
else:
net = "filter_" + str(ii // self.ntypes) + "_net_" + str(ii % self.ntypes)
self._build_lower(net, xx, ii, upper, lower, stride0, stride1, extrapolate)
else:
raise RuntimeError("Unsupported descriptor")

return lower, upper

def _build_lower(self, net, xx, idx, upper, lower, stride0, stride1, extrapolate):
Expand All @@ -185,6 +220,9 @@ def _build_lower(self, net, xx, idx, upper, lower, stride0, stride1, extrapolate
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
tt = np.full((self.nspline, self.last_layer_size), stride1)
tt[int((lower - extrapolate * lower) / stride1) + 1:(int((lower - extrapolate * lower) / stride1) + int((upper - lower) / stride0)), :] = stride0
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
tt = np.full((self.nspline, self.last_layer_size), stride1)
tt[:int((upper - lower) / stride0), :] = stride0
else:
raise RuntimeError("Unsupported descriptor")

Expand Down Expand Up @@ -225,6 +263,18 @@ def _get_bias(self):
for jj in range(ii, self.ntypes):
node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/bias_{layer}_{ii}_{jj}"]
bias["layer_" + str(layer)].append(tf.make_ndarray(node))
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
if self.type_one_side:
for ii in range(0, self.ntypes):
node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/bias_{layer}_{ii}"]
bias["layer_" + str(layer)].append(tf.make_ndarray(node))
else:
for ii in range(0, self.ntypes * self.ntypes):
if (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
node = self.embedding_net_nodes[f"filter_type_{ii // self.ntypes}{self.suffix}/bias_{layer}_{ii % self.ntypes}"]
bias["layer_" + str(layer)].append(tf.make_ndarray(node))
else:
bias["layer_" + str(layer)].append(np.array([]))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add an else to raise the error of un-supported descritpor

return bias

def _get_matrix(self):
Expand All @@ -248,6 +298,18 @@ def _get_matrix(self):
for jj in range(ii, self.ntypes):
node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/matrix_{layer}_{ii}_{jj}"]
matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
if self.type_one_side:
for ii in range(0, self.ntypes):
node = self.embedding_net_nodes[f"filter_type_all{self.suffix}/matrix_{layer}_{ii}"]
matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
else:
for ii in range(0, self.ntypes * self.ntypes):
if (ii // self.ntypes, ii % self.ntypes) not in self.exclude_types:
node = self.embedding_net_nodes[f"filter_type_{ii // self.ntypes}{self.suffix}/matrix_{layer}_{ii % self.ntypes}"]
matrix["layer_" + str(layer)].append(tf.make_ndarray(node))
else:
matrix["layer_" + str(layer)].append(np.array([]))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add an else to raise the error of un-supported descritpor

return matrix

# one-by-one executions
Expand Down Expand Up @@ -317,6 +379,9 @@ def _get_env_mat_range(self,
var = np.square(sw / (min_nbor_dist * self.dstd[:, 1:4]))
lower = np.min(-var)
upper = np.max(var)
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
lower = np.min(-self.davg[:, 0] / self.dstd[:, 0])
upper = np.max(((1 / min_nbor_dist) * sw - self.davg[:, 0]) / self.dstd[:, 0])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add an else to raise the error of un-supported descritpor

log.info('training data with lower boundary: ' + str(lower))
log.info('training data with upper boundary: ' + str(upper))
return math.floor(lower), math.ceil(upper)
Expand All @@ -342,6 +407,10 @@ def _get_layer_size(self):
layer_size = len(self.embedding_net_nodes) // (self.ntypes * 2)
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
layer_size = len(self.embedding_net_nodes) // int(comb(self.ntypes + 1, 2) * 2)
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
layer_size = len(self.embedding_net_nodes) // ((self.ntypes * self.ntypes - len(self.exclude_types)) * 2)
if self.type_one_side :
layer_size = len(self.embedding_net_nodes) // (self.ntypes * 2)
return layer_size
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add an else to raise the error of un-supported descritpor


def _get_table_size(self):
Expand All @@ -352,6 +421,10 @@ def _get_table_size(self):
table_size = self.ntypes
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeT):
table_size = int(comb(self.ntypes + 1, 2))
elif isinstance(self.descrpt, deepmd.descriptor.DescrptSeR):
table_size = self.ntypes * self.ntypes
if self.type_one_side :
table_size = self.ntypes
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add an else to raise the error of un-supported descritpor

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK~

return table_size

def _get_data_type(self):
Expand Down
2 changes: 1 addition & 1 deletion doc/freeze/compress.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ The model compression interface requires the version of deepmd-kit used in origi

**Acceptable descriptor type**

Note only descriptors with `se_e2_a` or `se_e3` type are supported by the model compression feature. Hybrid mixed with above descriptors is also supported.
Descriptors with `se_e2_a`,`se_e3`,'se_e2_r' type are supported by the model compression feature. Hybrid mixed with above descriptors is also supported.


**Available activation functions for descriptor:**
Expand Down
Loading