jina-ai · tadejsv · Oct 19, 2021 · Oct 19, 2021 · Oct 19, 2021 · Oct 19, 2021
diff --git a/finetuner/tuner/__init__.py b/finetuner/tuner/__init__.py
@@ -38,6 +38,34 @@ def fit(
     device: str = 'cpu',
     **kwargs,
 ) -> TunerReturnType:
+    """Finetune the model on the training data.
+
+    :param train_data: Data on which to train the model
+    :param eval_data: Data on which to evaluate the model at the end of each epoch
+    :param epoch: Number of epochs to train the model
+    :param batch_size: The batch size to use for training and evaluation
+    :param learning_rate: Learning rate to use in training
+    :param optimizer: Which optimizer to use in training. Supported
+        values/optimizers are:
+        - ``"adam"`` for the Adam optimizer
+        - ``"rmsprop"`` for the RMSProp optimizer
+        - ``"sgd"`` for the SGD optimizer with momentum
+    :param optimizer_kwargs: Keyword arguments to pass to the optimizer. The
+        supported arguments, togethere with their defailt values, are:
+        - ``"adam"``:  ``{'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08}``
+        - ``"rmsprop"``::
+
+            {
+                'rho': 0.99,
+                'momentum': 0.0,
+                'epsilon': 1e-08,
+                'centered': False,
+            }
+
+        - ``"sgd"``: ``{'momentum': 0.0, 'nesterov': False}``
+    :param device: The device to which to move the model. Supported options are
+        ``"cpu"`` and ``"cuda"`` (for GPU)
+    """
     ft = get_tuner_class(embed_model)
     if catalog is None:
         train_data = DocumentArray(train_data() if callable(train_data) else train_data)
@@ -60,6 +88,14 @@ def fit(
 
 
 def save(embed_model: AnyDNN, model_path: str, *args, **kwargs) -> None:
+    """Save the embedding model.
+
+    :param embed_model: The embedding model to save
+    :param model_path: Path to file/folder where to save the model
+    :param args: Arguments to pass to framework-specific tuner's ``save`` method
+    :param kwargs: Keyword arguments to pass to framework-specific tuner's ``save``
+        method
+    """
     ft = get_tuner_class(embed_model)
 
     ft(embed_model).save(model_path, *args, **kwargs)
diff --git a/finetuner/tuner/base.py b/finetuner/tuner/base.py
@@ -27,6 +27,13 @@ def __init__(
         loss: Union[AnyDNN, str] = 'CosineSiameseLoss',
         **kwargs,
     ):
+        """Create the tuner instance.
+
+        :param embed_model: Model that produces embeddings from inputs
+        :param loss: Either the loss object instance, or the name of the loss function.
+            Currently available losses are ``CosineSiameseLoss``,
+            ``EuclideanSiameseLoss``, ``EuclideanTripletLoss`` and ``CosineTripletLoss``
+        """
         self._embed_model = embed_model
         self._loss = self._get_loss(loss)
         self._train_data_len = 0

diff --git a/finetuner/tuner/keras/__init__.py b/finetuner/tuner/keras/__init__.py
@@ -16,12 +16,15 @@
 
 class KerasTuner(BaseTuner):
     def _get_loss(self, loss: Union[BaseLoss, str]):
+        """Get the loss layer."""
+
         if isinstance(loss, str):
             return getattr(losses, loss)()
         elif isinstance(loss, BaseLoss):
             return loss
 
     def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):
+        """Get tensorflow ``Dataset`` from the input data. """
 
         ds = get_dataset(datasets, self.arity)
         input_shape = self.embed_model.input_shape[1:]
@@ -45,6 +48,8 @@ def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):
     def _get_optimizer(
         self, optimizer: str, optimizer_kwargs: Optional[dict], learning_rate: float
     ) -> Optimizer:
+        """Get the optimizer for training."""
+
         optimizer_kwargs = self._get_optimizer_kwargs(optimizer, optimizer_kwargs)
 
         if optimizer == 'adam':
@@ -59,6 +64,8 @@ def _get_optimizer(
             return keras.optimizers.SGD(learning_rate=learning_rate, **optimizer_kwargs)
 
     def _train(self, data, optimizer, description: str):
+        """Train the model on given labeled data"""
+
         losses = []
 
         log_generator = LogGenerator('T', losses)
@@ -88,6 +95,7 @@ def _train(self, data, optimizer, description: str):
         return losses
 
     def _eval(self, data, description: str = 'Evaluating', train_log: str = ''):
+        """Evaluate the model on given labeled data"""
 
         losses = []
 
@@ -120,6 +128,34 @@ def fit(
         device: str = 'cpu',
         **kwargs,
     ) -> TunerStats:
+        """Finetune the model on the training data.
+
+        :param train_data: Data on which to train the model
+        :param eval_data: Data on which to evaluate the model at the end of each epoch
+        :param epoch: Number of epochs to train the model
+        :param batch_size: The batch size to use for training and evaluation
+        :param learning_rate: Learning rate to use in training
+        :param optimizer: Which optimizer to use in training. Supported
+            values/optimizers are:
+            - ``"adam"`` for the Adam optimizer
+            - ``"rmsprop"`` for the RMSProp optimizer
+            - ``"sgd"`` for the SGD optimizer with momentum
+        :param optimizer_kwargs: Keyword arguments to pass to the optimizer. The
+            supported arguments, togethere with their defailt values, are:
+            - ``"adam"``:  ``{'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08}``
+            - ``"rmsprop"``::
+
+                {
+                    'rho': 0.99,
+                    'momentum': 0.0,
+                    'epsilon': 1e-08,
+                    'centered': False,
+                }
+
+            - ``"sgd"``: ``{'momentum': 0.0, 'nesterov': False}``
+        :param device: The device to which to move the model. Supported options are
+            ``"cpu"`` and ``"cuda"`` (for GPU)
+        """
 
         _train_data = self._get_data_loader(
             inputs=train_data, batch_size=batch_size, shuffle=False
@@ -167,4 +203,14 @@ def get_embeddings(self, data: DocumentArrayLike):
             doc.embedding = np.array(embed)
 
     def save(self, *args, **kwargs):
+        """Save the embedding model.
+
+        You need to pass the path where to save the model in either ``args`` or
+        ``kwargs`` (for ``filepath`` key).
+
+        :param args: Arguments to pass to ``save`` method of the embedding model
+        :param kwargs: Keyword arguments to pass to ``save`` method of the embedding
+            model
+        """
+
         self.embed_model.save(*args, **kwargs)
diff --git a/finetuner/tuner/keras/losses.py b/finetuner/tuner/keras/losses.py
@@ -5,9 +5,27 @@
 
 
 class CosineSiameseLoss(BaseLoss, Layer):
+    """Computes the loss for a siamese network using cosine distance.
+
+    The loss for a pair of objects equals ``(target - cos_sim)^2``, where ``target``
+    should equal 1 when both objects belong to the same class, and to -1 when they
+    belong to different classes. The ``cos_sim`` represents the cosime similarity
+    between both objects.
+
+    The final loss is the average over losses for all pairs of objects in the batch.
+    """
+
     arity = 2
 
     def call(self, inputs, **kwargs):
+        """Compute the loss.
+
+        :param inputs: Should be a list or a tuple containing three tensors:
+            - ``[N, D]`` tensor of embeddings of the first objects of the pair
+            - ``[N, D]`` tensor of embeddings of the second objects of the pair
+            - ``[N, ]`` tensor of target values
+        """
+
         l_emb, r_emb, target = inputs
         normalize_a = tf.nn.l2_normalize(l_emb, axis=-1)
         normalize_b = tf.nn.l2_normalize(r_emb, axis=-1)
@@ -16,13 +34,36 @@ def call(self, inputs, **kwargs):
 
 
 class EuclideanSiameseLoss(BaseLoss, Layer):
+    """Computes the loss for a siamese network using cosine distance.
+
+    This loss is also known as contrastive loss.
+
+    The loss being optimized equals::
+
+        [is_sim * dist + (1 - is_sim) * max(margin - dist, 0)]^2
+
+    where ``target`` should equal 1 when both objects belong to the same class,
+    and 0 otheriwse. The ``dist`` is the euclidean distance between the embeddings of
+    the objects, and ``margin`` is some number, used here to ensure better stability
+    of training.
+
+    The final loss is the average over losses for all pairs of objects in the batch.
+    """
+
     arity = 2
 
     def __init__(self, margin: float = 1.0):
         super().__init__()
         self.margin = margin
 
     def call(self, inputs, **kwargs):
+        """Compute the loss.
+
+        :param inputs: Should be a list or a tuple containing three tensors:
+            - ``[N, D]`` tensor of embeddings of the first objects of the pair
+            - ``[N, D]`` tensor of embeddings of the second objects of the pair
+            - ``[N, ]`` tensor of target values
+        """
         l_emb, r_emb, target = inputs
         eucl_dist = tf.reduce_sum(tf.math.squared_difference(l_emb, r_emb), axis=-1)
         is_similar = tf.cast(target > 0, tf.float32)
@@ -35,36 +76,70 @@ def call(self, inputs, **kwargs):
 
 
 class EuclideanTripletLoss(BaseLoss, Layer):
+    """Compute the loss for a triplet network using euclidean distance.
+
+    The loss is computed as ``max(dist_pos - dist_neg + margin, 0)``, where ``dist_pos``
+    is the euclidean distance between the anchor embedding and positive embedding,
+    ``dist_neg`` is the euclidean distance between the anchor and negative embedding,
+    and ``margin`` represents a wedge between the desired wedge between anchor-negative
+    and anchor-positive distances.
+
+    The final loss is the average over losses for all triplets in the batch.
+    """
+
     arity = 3
 
     def __init__(self, margin: float = 1.0, **kwargs):
         super().__init__(**kwargs)
         self._margin = margin
 
     def call(self, inputs, **kwargs):
+        """Compute the loss.
+
+        :param inputs: Should be a list or a tuple containing three tensors:
+            - ``[N, D]`` tensor of embeddings of the anchor objects
+            - ``[N, D]`` tensor of embeddings of the positive objects
+            - ``[N, D]`` tensor of embeddings of the negative objects
+        """
         anchor, positive, negative, _ = inputs
 
         # Seems that tf.norm suffers from numeric instability as explained here
         # https://github.com/tensorflow/tensorflow/issues/12071
         dist_pos = tf.reduce_sum(tf.math.squared_difference(anchor, positive), axis=-1)
         dist_neg = tf.reduce_sum(tf.math.squared_difference(anchor, negative), axis=-1)
 
-        dist_pos = tf.maximum(dist_pos, 1e-9)
-        dist_neg = tf.maximum(dist_neg, 1e-9)
+        dist_pos = tf.sqrt(tf.maximum(dist_pos, 1e-9))
+        dist_neg = tf.sqrt(tf.maximum(dist_neg, 1e-9))
 
-        return tf.reduce_mean(
-            tf.nn.relu(tf.sqrt(dist_pos) - tf.sqrt(dist_neg) + self._margin)
-        )
+        return tf.reduce_mean(tf.nn.relu(dist_pos - dist_neg + self._margin))
 
 
 class CosineTripletLoss(BaseLoss, Layer):
+    """Compute the loss for a triplet network using cosine distance.
+
+    The loss is computed as ``max(dist_pos - dist_neg + margin, 0)``, where ``dist_pos``
+    is the cosine distance between the anchor embedding and positive embedding,
+    ``dist_neg`` is the cosine distance between the anchor and negative embedding, and
+    ``margin`` represents a wedge between the desired wedge between anchor-negative and
+    anchor-positive distances.
+
+    The final loss is the average over losses for all triplets in the batch.
+    """
+
     arity = 3
 
     def __init__(self, margin: float = 1.0):
         super().__init__()
         self._margin = margin
 
     def call(self, inputs, **kwargs):
+        """Compute the loss.
+
+        :param inputs: Should be a list or a tuple containing three tensors:
+            - ``[N, D]`` tensor of embeddings of the anchor objects
+            - ``[N, D]`` tensor of embeddings of the positive objects
+            - ``[N, D]`` tensor of embeddings of the negative objects
+        """
         anchor, positive, negative, _ = inputs
 
         # Seems that tf.norm suffers from numeric instability as explained here

diff --git a/finetuner/tuner/paddle/__init__.py b/finetuner/tuner/paddle/__init__.py
@@ -16,12 +16,16 @@
 
 class PaddleTuner(BaseTuner):
     def _get_loss(self, loss: Union[BaseLoss, str]):
+        """Get the loss layer."""
+
         if isinstance(loss, str):
             return getattr(losses, loss)()
         elif isinstance(loss, BaseLoss):
             return loss
 
     def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):
+        """Get the paddle ``DataLoader`` from the input data. """
+
         ds = get_dataset(datasets, self.arity)
         return DataLoader(
             dataset=ds(inputs=inputs, catalog=self._catalog),
@@ -32,6 +36,8 @@ def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):
     def _get_optimizer(
         self, optimizer: str, optimizer_kwargs: Optional[dict], learning_rate: float
     ) -> Optimizer:
+        """Get the optimizer for training."""
+
         params = self._embed_model.parameters()
         optimizer_kwargs = self._get_optimizer_kwargs(optimizer, optimizer_kwargs)
 
@@ -56,6 +62,8 @@ def _get_optimizer(
             )
 
     def _eval(self, data, description: str = 'Evaluating', train_log: str = ''):
+        """Evaluate the model on given labeled data"""
+
         self._embed_model.eval()
 
         losses = []
@@ -78,6 +86,7 @@ def _eval(self, data, description: str = 'Evaluating', train_log: str = ''):
         return losses
 
     def _train(self, data, optimizer: Optimizer, description: str):
+        """Train the model on given labeled data"""
 
         self._embed_model.train()
 
@@ -119,6 +128,34 @@ def fit(
         device: str = 'cpu',
         **kwargs,
     ) -> TunerStats:
+        """Finetune the model on the training data.
+
+        :param train_data: Data on which to train the model
+        :param eval_data: Data on which to evaluate the model at the end of each epoch
+        :param epoch: Number of epochs to train the model
+        :param batch_size: The batch size to use for training and evaluation
+        :param learning_rate: Learning rate to use in training
+        :param optimizer: Which optimizer to use in training. Supported
+            values/optimizers are:
+            - ``"adam"`` for the Adam optimizer
+            - ``"rmsprop"`` for the RMSProp optimizer
+            - ``"sgd"`` for the SGD optimizer with momentum
+        :param optimizer_kwargs: Keyword arguments to pass to the optimizer. The
+            supported arguments, togethere with their defailt values, are:
+            - ``"adam"``:  ``{'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08}``
+            - ``"rmsprop"``::
+
+                {
+                    'rho': 0.99,
+                    'momentum': 0.0,
+                    'epsilon': 1e-08,
+                    'centered': False,
+                }
+
+            - ``"sgd"``: ``{'momentum': 0.0, 'nesterov': False}``
+        :param device: The device to which to move the model. Supported options are
+            ``"cpu"`` and ``"cuda"`` (for GPU)
+        """
 
         if device == 'cuda':
             paddle.set_device('gpu:0')
@@ -161,4 +198,12 @@ def get_embeddings(self, data: DocumentArrayLike):
             doc.embedding = np.array(embed)
 
     def save(self, *args, **kwargs):
+        """Save the embedding model.
+
+        You need to pass the path where to save the model in either ``args`` or
+        ``kwargs`` (for ``path`` key).
+
+        :param args: Arguments to pass to ``paddle.save`` function
+        :param kwargs: Keyword arguments to pass to ``paddle.save`` function
+        """
         paddle.save(self.embed_model.state_dict(), *args, **kwargs)