refactor(tailor): use different trim logic (#100)

* refactor(tailor): use different trim logic * refactor(tailor): use different trim logic * refactor(tailor): use different trim logic for paddle, pytorch * refactor(tailor): use different trim logic for paddle, pytorch * refactor(tailor): use different trim logic for keras * fix(tailor): fix keras tailor * fix(tailor): fix keras tailor * fix(tailor): fix keras tailor * fix(tailor): fix keras tailor * fix(tailor): fix paddle & torch tailor * fix(tailor): fix paddle & torch tailor
jina-ai · Oct 7, 2021 · c06292c · c06292c
1 parent 82c2cc8
commit c06292c
Show file tree

Hide file tree

Showing 8 changed files with 330 additions and 562 deletions.
diff --git a/finetuner/tailor/__init__.py b/finetuner/tailor/__init__.py
@@ -19,10 +19,10 @@ def convert(
 def convert(
     model: AnyDNN,
     input_size: Tuple[int, ...],
-    freeze: bool = False,
+    input_dtype: str = 'float32',
     embedding_layer_name: Optional[str] = None,
     output_dim: Optional[int] = None,
-    input_dtype: str = 'float32',
+    freeze: bool = False,
 ) -> AnyDNN:
     ...
 
@@ -43,4 +43,4 @@ def convert(model: AnyDNN, **kwargs) -> AnyDNN:
 
         ft = PaddleTailor
 
-    return ft(model, **kwargs)().model
+    return ft(model, **kwargs).convert(**kwargs)
diff --git a/finetuner/tailor/base.py b/finetuner/tailor/base.py
@@ -10,34 +10,32 @@ class BaseTailor(abc.ABC):
     def __init__(
         self,
         model: AnyDNN,
-        freeze: bool = False,
-        embedding_layer_name: Optional[str] = None,
-        output_dim: Optional[int] = None,
         *args,
         **kwargs,
     ):
         """Tailor converts a general DNN model into an embedding model.
 
         :param model: a general DNN model
-        :param freeze: if set, then freeze the weights in :py:attr:`.model`
-        :param embedding_layer_name: the name of the layer that is used for output embeddings. All layers after that layer
-            will be removed. When not given, then the last layer listed in :py:attr:`.embedding_layers` will be used.
         :param args:
         :param kwargs:
         """
         self._model = model
-        self._freeze = freeze
-        self._embedding_layer_name = embedding_layer_name
-        self._output_dim = output_dim
 
     @abc.abstractmethod
-    def _freeze_weights(self) -> 'BaseTailor':
-        """Freeze the weights of :py:attr:`.model`."""
-        ...
+    def convert(
+        self,
+        embedding_layer_name: Optional[str] = None,
+        output_dim: Optional[int] = None,
+        freeze: bool = False,
+    ) -> AnyDNN:
+        """Convert a general model from :py:attr:`.model` to an embedding model.
 
-    @abc.abstractmethod
-    def _trim(self) -> 'BaseTailor':
-        """Trim :py:attr:`.model` to an embedding model."""
+        :param embedding_layer_name: the name of the layer that is used for output embeddings. All layers *after* that layer
+            will be removed. When set to ``None``, then the last layer listed in :py:attr:`.embedding_layers` will be used.
+        :param output_dim: the dimensionality of the embedding output.
+        :param freeze: if set, then freeze the weights in :py:attr:`.model`.
+
+        """
         ...
 
     @property
@@ -48,47 +46,3 @@ def embedding_layers(self) -> EmbeddingLayerInfoType:
         :return: layers info as :class:`list` of :class:`dict`.
         """
         ...
-
-    @property
-    def model(self) -> AnyDNN:
-        """Get the DNN model of this object.
-
-        :return: The DNN model.
-        """
-        return self._model
-
-    @property
-    @abc.abstractmethod
-    def output_dim(self) -> int:
-        """Get the user-defined output dimensionality.
-
-        :return: Output dimension of the attached linear layer
-        """
-        ...
-
-    @output_dim.setter
-    def output_dim(self, dim: int):
-        """Set a new output dimension for the model.
-
-        if set, the :py:attr:`self.model`'s attached dense layer will have this dim.
-        :param dim: Dimensionality of the attached linear layer.
-        """
-        self._output_dim = dim
-
-    @abc.abstractmethod
-    def _attach_dense_layer(self):
-        """Attach a dense layer to the end of the parsed model.
-
-        .. note::
-           The attached dense layer have the same shape as the last layer
-           in the parsed model.
-           The attached dense layer will ignore the :py:attr:`freeze`, this
-           layer always trainable.
-        """
-        ...
-
-    def __call__(self, *args, **kwargs):
-        if self._freeze:
-            self._trim()._freeze_weights()._attach_dense_layer()
-        else:
-            self._trim()._attach_dense_layer()
diff --git a/finetuner/tailor/keras/__init__.py b/finetuner/tailor/keras/__init__.py
@@ -1,9 +1,12 @@
+import copy
+from typing import Optional
+
+from jina.helper import cached_property
 from tensorflow.keras import Model
 from tensorflow.keras.layers import Dense
-from jina.helper import cached_property
 
 from ..base import BaseTailor
-from ...helper import EmbeddingLayerInfoType
+from ...helper import EmbeddingLayerInfoType, AnyDNN
 
 
 class KerasTailor(BaseTailor):
@@ -13,16 +16,21 @@ def embedding_layers(self) -> EmbeddingLayerInfoType:
 
         :return: layers info as :class:`list` of :class:`dict`.
         """
-        results = []
-        for idx, layer in enumerate(self._model.layers):
+
+        def _get_shape(layer):
             try:
-                output_shape = layer.output_shape
-            except AttributeError:
-                output_shape = 'multiple'
-            except RuntimeError:  # output_shape unknown in Eager mode.
-                output_shape = '?'
+                return layer.output_shape
+            except:
+                pass  #: return none when
 
-            if len(output_shape) != 2:
+        results = []
+        for idx, layer in enumerate(self._model.layers):
+            output_shape = _get_shape(layer)
+            if (
+                not output_shape
+                or len(output_shape) != 2
+                or not isinstance(output_shape[-1], int)
+            ):
                 continue
             else:
                 if not layer.built and not getattr(layer, '_is_graph_network', False):
@@ -36,54 +44,47 @@ def embedding_layers(self) -> EmbeddingLayerInfoType:
                     {
                         'name': layer.name,
                         'cls_name': layer.__class__.__name__,
+                        'output_shape': output_shape,
                         'output_features': output_shape[-1],
-                        'params': params,
+                        'nb_params': params,
                         'layer_idx': idx,
                         'module_name': layer.name,  # duplicate as `name` to make different backends consistent
                     }
                 )
         return results
 
-    @property
-    def output_dim(self) -> int:
-        """Get the user-defined output dimensionality.
+    def convert(
+        self,
+        embedding_layer_name: Optional[str] = None,
+        output_dim: Optional[int] = None,
+        freeze: bool = False,
+    ) -> AnyDNN:
 
-        :return: Output dimension of the attached linear layer
+        if embedding_layer_name:
+            _all_embed_layers = {l['name']: l for l in self.embedding_layers}
+            try:
+                _embed_layer = _all_embed_layers[embedding_layer_name]
+            except KeyError as e:
+                raise KeyError(
+                    f'`embedding_layer_name` must be one of {_all_embed_layers.keys()}, given {embedding_layer_name}'
+                ) from e
+        else:
+            # when not given, using the last layer
+            _embed_layer = self.embedding_layers[-1]
 
-        .. note::
-           if user didn't specify :py:attr:`output_dim`, return model's last layer output dim.
-        """
-        return self._output_dim or self._model.output_shape[1]
+        index = _embed_layer['layer_idx']
 
-    def _trim(self) -> 'KerasTailor':
-        if not self._embedding_layer_name:
-            index = self.embedding_layers[-1]['layer_idx']
+        if output_dim:
+            out = Dense(output_dim)(self._model.layers[index].output)
         else:
-            _embed_layers = {l['name']: l for l in self.embedding_layers}
-            try:
-                index = _embed_layers[self._embedding_layer_name]['layer_idx']
-            except KeyError as e:
-                raise e
-        self._model = Model(self._model.input, self._model.layers[index - 1].output)
-        return self
+            out = self._model.layers[index].output
 
-    def _freeze_weights(self) -> 'KerasTailor':
-        """Freeze an arbitrary model to make layers not trainable."""
-        for layer in self._model.layers:
-            layer.trainable = False
-        return self
+        model = Model(self._model.input, out)
 
-    def _attach_dense_layer(self):
-        """Attach a dense layer to the end of the parsed model.
+        if freeze:
+            for layer in model.layers:
+                layer.trainable = False
 
-        .. note::
-           The attached dense layer have the same shape as the last layer
-           in the parsed model.
-           The attached dense layer will ignore the :py:attr:`freeze`, this
-           layer always trainable.
-        """
-        if self._output_dim:
-            out = Dense(self._output_dim, activation=None, use_bias=True)(
-                self._model.layers[-1].output
-            )
-            self._model = Model(self._model.input, out)
+        # the last layer must be trainable
+        model.layers[-1].trainable = True
+        return model