From 6fd3e1ea63ced61c32d1a12e4cb2d4ba76a8d870 Mon Sep 17 00:00:00 2001 From: Tadej Svetina Date: Tue, 19 Oct 2021 16:20:22 +0200 Subject: [PATCH] docs(tuner): add docstrings (#148) * docs(tuner): add docstrings * fix: typo * fix: apply suggestion Co-authored-by: Wang Bo Co-authored-by: Wang Bo --- finetuner/tuner/__init__.py | 36 ++++++++++++ finetuner/tuner/base.py | 7 +++ finetuner/tuner/keras/__init__.py | 46 ++++++++++++++++ finetuner/tuner/keras/losses.py | 85 +++++++++++++++++++++++++++-- finetuner/tuner/paddle/__init__.py | 45 +++++++++++++++ finetuner/tuner/paddle/losses.py | 76 ++++++++++++++++++++++++++ finetuner/tuner/pytorch/__init__.py | 45 +++++++++++++++ finetuner/tuner/pytorch/losses.py | 76 ++++++++++++++++++++++++++ 8 files changed, 411 insertions(+), 5 deletions(-) diff --git a/finetuner/tuner/__init__.py b/finetuner/tuner/__init__.py index d5ef27dc9..af1bb7684 100644 --- a/finetuner/tuner/__init__.py +++ b/finetuner/tuner/__init__.py @@ -38,6 +38,34 @@ def fit( device: str = 'cpu', **kwargs, ) -> TunerReturnType: + """Finetune the model on the training data. + + :param train_data: Data on which to train the model + :param eval_data: Data on which to evaluate the model at the end of each epoch + :param epoch: Number of epochs to train the model + :param batch_size: The batch size to use for training and evaluation + :param learning_rate: Learning rate to use in training + :param optimizer: Which optimizer to use in training. Supported + values/optimizers are: + - ``"adam"`` for the Adam optimizer + - ``"rmsprop"`` for the RMSProp optimizer + - ``"sgd"`` for the SGD optimizer with momentum + :param optimizer_kwargs: Keyword arguments to pass to the optimizer. The + supported arguments, togethere with their defailt values, are: + - ``"adam"``: ``{'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08}`` + - ``"rmsprop"``:: + + { + 'rho': 0.99, + 'momentum': 0.0, + 'epsilon': 1e-08, + 'centered': False, + } + + - ``"sgd"``: ``{'momentum': 0.0, 'nesterov': False}`` + :param device: The device to which to move the model. Supported options are + ``"cpu"`` and ``"cuda"`` (for GPU) + """ ft = get_tuner_class(embed_model) if catalog is None: train_data = DocumentArray(train_data() if callable(train_data) else train_data) @@ -60,6 +88,14 @@ def fit( def save(embed_model: AnyDNN, model_path: str, *args, **kwargs) -> None: + """Save the embedding model. + + :param embed_model: The embedding model to save + :param model_path: Path to file/folder where to save the model + :param args: Arguments to pass to framework-specific tuner's ``save`` method + :param kwargs: Keyword arguments to pass to framework-specific tuner's ``save`` + method + """ ft = get_tuner_class(embed_model) ft(embed_model).save(model_path, *args, **kwargs) diff --git a/finetuner/tuner/base.py b/finetuner/tuner/base.py index 5e4058cbb..d141b1915 100644 --- a/finetuner/tuner/base.py +++ b/finetuner/tuner/base.py @@ -27,6 +27,13 @@ def __init__( loss: Union[AnyDNN, str] = 'CosineSiameseLoss', **kwargs, ): + """Create the tuner instance. + + :param embed_model: Model that produces embeddings from inputs + :param loss: Either the loss object instance, or the name of the loss function. + Currently available losses are ``CosineSiameseLoss``, + ``EuclideanSiameseLoss``, ``EuclideanTripletLoss`` and ``CosineTripletLoss`` + """ self._embed_model = embed_model self._loss = self._get_loss(loss) self._train_data_len = 0 diff --git a/finetuner/tuner/keras/__init__.py b/finetuner/tuner/keras/__init__.py index 06d326a03..8394731e8 100644 --- a/finetuner/tuner/keras/__init__.py +++ b/finetuner/tuner/keras/__init__.py @@ -16,12 +16,15 @@ class KerasTuner(BaseTuner): def _get_loss(self, loss: Union[BaseLoss, str]): + """Get the loss layer.""" + if isinstance(loss, str): return getattr(losses, loss)() elif isinstance(loss, BaseLoss): return loss def _get_data_loader(self, inputs, batch_size: int, shuffle: bool): + """Get tensorflow ``Dataset`` from the input data. """ ds = get_dataset(datasets, self.arity) input_shape = self.embed_model.input_shape[1:] @@ -45,6 +48,8 @@ def _get_data_loader(self, inputs, batch_size: int, shuffle: bool): def _get_optimizer( self, optimizer: str, optimizer_kwargs: Optional[dict], learning_rate: float ) -> Optimizer: + """Get the optimizer for training.""" + optimizer_kwargs = self._get_optimizer_kwargs(optimizer, optimizer_kwargs) if optimizer == 'adam': @@ -59,6 +64,8 @@ def _get_optimizer( return keras.optimizers.SGD(learning_rate=learning_rate, **optimizer_kwargs) def _train(self, data, optimizer, description: str): + """Train the model on given labeled data""" + losses = [] log_generator = LogGenerator('T', losses) @@ -88,6 +95,7 @@ def _train(self, data, optimizer, description: str): return losses def _eval(self, data, description: str = 'Evaluating', train_log: str = ''): + """Evaluate the model on given labeled data""" losses = [] @@ -120,6 +128,34 @@ def fit( device: str = 'cpu', **kwargs, ) -> TunerStats: + """Finetune the model on the training data. + + :param train_data: Data on which to train the model + :param eval_data: Data on which to evaluate the model at the end of each epoch + :param epoch: Number of epochs to train the model + :param batch_size: The batch size to use for training and evaluation + :param learning_rate: Learning rate to use in training + :param optimizer: Which optimizer to use in training. Supported + values/optimizers are: + - ``"adam"`` for the Adam optimizer + - ``"rmsprop"`` for the RMSProp optimizer + - ``"sgd"`` for the SGD optimizer with momentum + :param optimizer_kwargs: Keyword arguments to pass to the optimizer. The + supported arguments, togethere with their defailt values, are: + - ``"adam"``: ``{'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08}`` + - ``"rmsprop"``:: + + { + 'rho': 0.99, + 'momentum': 0.0, + 'epsilon': 1e-08, + 'centered': False, + } + + - ``"sgd"``: ``{'momentum': 0.0, 'nesterov': False}`` + :param device: The device to which to move the model. Supported options are + ``"cpu"`` and ``"cuda"`` (for GPU) + """ _train_data = self._get_data_loader( inputs=train_data, batch_size=batch_size, shuffle=False @@ -167,4 +203,14 @@ def get_embeddings(self, data: DocumentArrayLike): doc.embedding = np.array(embed) def save(self, *args, **kwargs): + """Save the embedding model. + + You need to pass the path where to save the model in either ``args`` or + ``kwargs`` (for ``filepath`` key). + + :param args: Arguments to pass to ``save`` method of the embedding model + :param kwargs: Keyword arguments to pass to ``save`` method of the embedding + model + """ + self.embed_model.save(*args, **kwargs) diff --git a/finetuner/tuner/keras/losses.py b/finetuner/tuner/keras/losses.py index fa04a2534..f37e2d5de 100644 --- a/finetuner/tuner/keras/losses.py +++ b/finetuner/tuner/keras/losses.py @@ -5,9 +5,27 @@ class CosineSiameseLoss(BaseLoss, Layer): + """Computes the loss for a siamese network using cosine distance. + + The loss for a pair of objects equals ``(target - cos_sim)^2``, where ``target`` + should equal 1 when both objects belong to the same class, and to -1 when they + belong to different classes. The ``cos_sim`` represents the cosime similarity + between both objects. + + The final loss is the average over losses for all pairs of objects in the batch. + """ + arity = 2 def call(self, inputs, **kwargs): + """Compute the loss. + + :param inputs: Should be a list or a tuple containing three tensors: + - ``[N, D]`` tensor of embeddings of the first objects of the pair + - ``[N, D]`` tensor of embeddings of the second objects of the pair + - ``[N, ]`` tensor of target values + """ + l_emb, r_emb, target = inputs normalize_a = tf.nn.l2_normalize(l_emb, axis=-1) normalize_b = tf.nn.l2_normalize(r_emb, axis=-1) @@ -16,6 +34,22 @@ def call(self, inputs, **kwargs): class EuclideanSiameseLoss(BaseLoss, Layer): + """Computes the loss for a siamese network using cosine distance. + + This loss is also known as contrastive loss. + + The loss being optimized equals:: + + [is_sim * dist + (1 - is_sim) * max(margin - dist, 0)]^2 + + where ``target`` should equal 1 when both objects belong to the same class, + and 0 otheriwse. The ``dist`` is the euclidean distance between the embeddings of + the objects, and ``margin`` is some number, used here to ensure better stability + of training. + + The final loss is the average over losses for all pairs of objects in the batch. + """ + arity = 2 def __init__(self, margin: float = 1.0): @@ -23,6 +57,13 @@ def __init__(self, margin: float = 1.0): self.margin = margin def call(self, inputs, **kwargs): + """Compute the loss. + + :param inputs: Should be a list or a tuple containing three tensors: + - ``[N, D]`` tensor of embeddings of the first objects of the pair + - ``[N, D]`` tensor of embeddings of the second objects of the pair + - ``[N, ]`` tensor of target values + """ l_emb, r_emb, target = inputs eucl_dist = tf.reduce_sum(tf.math.squared_difference(l_emb, r_emb), axis=-1) is_similar = tf.cast(target > 0, tf.float32) @@ -35,6 +76,17 @@ def call(self, inputs, **kwargs): class EuclideanTripletLoss(BaseLoss, Layer): + """Compute the loss for a triplet network using euclidean distance. + + The loss is computed as ``max(dist_pos - dist_neg + margin, 0)``, where ``dist_pos`` + is the euclidean distance between the anchor embedding and positive embedding, + ``dist_neg`` is the euclidean distance between the anchor and negative embedding, + and ``margin`` represents a wedge between the desired wedge between anchor-negative + and anchor-positive distances. + + The final loss is the average over losses for all triplets in the batch. + """ + arity = 3 def __init__(self, margin: float = 1.0, **kwargs): @@ -42,6 +94,13 @@ def __init__(self, margin: float = 1.0, **kwargs): self._margin = margin def call(self, inputs, **kwargs): + """Compute the loss. + + :param inputs: Should be a list or a tuple containing three tensors: + - ``[N, D]`` tensor of embeddings of the anchor objects + - ``[N, D]`` tensor of embeddings of the positive objects + - ``[N, D]`` tensor of embeddings of the negative objects + """ anchor, positive, negative, _ = inputs # Seems that tf.norm suffers from numeric instability as explained here @@ -49,15 +108,24 @@ def call(self, inputs, **kwargs): dist_pos = tf.reduce_sum(tf.math.squared_difference(anchor, positive), axis=-1) dist_neg = tf.reduce_sum(tf.math.squared_difference(anchor, negative), axis=-1) - dist_pos = tf.maximum(dist_pos, 1e-9) - dist_neg = tf.maximum(dist_neg, 1e-9) + dist_pos = tf.sqrt(tf.maximum(dist_pos, 1e-9)) + dist_neg = tf.sqrt(tf.maximum(dist_neg, 1e-9)) - return tf.reduce_mean( - tf.nn.relu(tf.sqrt(dist_pos) - tf.sqrt(dist_neg) + self._margin) - ) + return tf.reduce_mean(tf.nn.relu(dist_pos - dist_neg + self._margin)) class CosineTripletLoss(BaseLoss, Layer): + """Compute the loss for a triplet network using cosine distance. + + The loss is computed as ``max(dist_pos - dist_neg + margin, 0)``, where ``dist_pos`` + is the cosine distance between the anchor embedding and positive embedding, + ``dist_neg`` is the cosine distance between the anchor and negative embedding, and + ``margin`` represents a wedge between the desired wedge between anchor-negative and + anchor-positive distances. + + The final loss is the average over losses for all triplets in the batch. + """ + arity = 3 def __init__(self, margin: float = 1.0): @@ -65,6 +133,13 @@ def __init__(self, margin: float = 1.0): self._margin = margin def call(self, inputs, **kwargs): + """Compute the loss. + + :param inputs: Should be a list or a tuple containing three tensors: + - ``[N, D]`` tensor of embeddings of the anchor objects + - ``[N, D]`` tensor of embeddings of the positive objects + - ``[N, D]`` tensor of embeddings of the negative objects + """ anchor, positive, negative, _ = inputs # Seems that tf.norm suffers from numeric instability as explained here diff --git a/finetuner/tuner/paddle/__init__.py b/finetuner/tuner/paddle/__init__.py index 2a7500c11..0632a75e9 100644 --- a/finetuner/tuner/paddle/__init__.py +++ b/finetuner/tuner/paddle/__init__.py @@ -16,12 +16,16 @@ class PaddleTuner(BaseTuner): def _get_loss(self, loss: Union[BaseLoss, str]): + """Get the loss layer.""" + if isinstance(loss, str): return getattr(losses, loss)() elif isinstance(loss, BaseLoss): return loss def _get_data_loader(self, inputs, batch_size: int, shuffle: bool): + """Get the paddle ``DataLoader`` from the input data. """ + ds = get_dataset(datasets, self.arity) return DataLoader( dataset=ds(inputs=inputs, catalog=self._catalog), @@ -32,6 +36,8 @@ def _get_data_loader(self, inputs, batch_size: int, shuffle: bool): def _get_optimizer( self, optimizer: str, optimizer_kwargs: Optional[dict], learning_rate: float ) -> Optimizer: + """Get the optimizer for training.""" + params = self._embed_model.parameters() optimizer_kwargs = self._get_optimizer_kwargs(optimizer, optimizer_kwargs) @@ -56,6 +62,8 @@ def _get_optimizer( ) def _eval(self, data, description: str = 'Evaluating', train_log: str = ''): + """Evaluate the model on given labeled data""" + self._embed_model.eval() losses = [] @@ -78,6 +86,7 @@ def _eval(self, data, description: str = 'Evaluating', train_log: str = ''): return losses def _train(self, data, optimizer: Optimizer, description: str): + """Train the model on given labeled data""" self._embed_model.train() @@ -119,6 +128,34 @@ def fit( device: str = 'cpu', **kwargs, ) -> TunerStats: + """Finetune the model on the training data. + + :param train_data: Data on which to train the model + :param eval_data: Data on which to evaluate the model at the end of each epoch + :param epoch: Number of epochs to train the model + :param batch_size: The batch size to use for training and evaluation + :param learning_rate: Learning rate to use in training + :param optimizer: Which optimizer to use in training. Supported + values/optimizers are: + - ``"adam"`` for the Adam optimizer + - ``"rmsprop"`` for the RMSProp optimizer + - ``"sgd"`` for the SGD optimizer with momentum + :param optimizer_kwargs: Keyword arguments to pass to the optimizer. The + supported arguments, togethere with their defailt values, are: + - ``"adam"``: ``{'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08}`` + - ``"rmsprop"``:: + + { + 'rho': 0.99, + 'momentum': 0.0, + 'epsilon': 1e-08, + 'centered': False, + } + + - ``"sgd"``: ``{'momentum': 0.0, 'nesterov': False}`` + :param device: The device to which to move the model. Supported options are + ``"cpu"`` and ``"cuda"`` (for GPU) + """ if device == 'cuda': paddle.set_device('gpu:0') @@ -161,4 +198,12 @@ def get_embeddings(self, data: DocumentArrayLike): doc.embedding = np.array(embed) def save(self, *args, **kwargs): + """Save the embedding model. + + You need to pass the path where to save the model in either ``args`` or + ``kwargs`` (for ``path`` key). + + :param args: Arguments to pass to ``paddle.save`` function + :param kwargs: Keyword arguments to pass to ``paddle.save`` function + """ paddle.save(self.embed_model.state_dict(), *args, **kwargs) diff --git a/finetuner/tuner/paddle/losses.py b/finetuner/tuner/paddle/losses.py index 8c48a4569..30cab642c 100644 --- a/finetuner/tuner/paddle/losses.py +++ b/finetuner/tuner/paddle/losses.py @@ -8,11 +8,28 @@ class CosineSiameseLoss(BaseLoss, nn.Layer): + """Computes the loss for a siamese network using cosine distance. + + The loss for a pair of objects equals ``(target - cos_sim)^2``, where ``target`` + should equal 1 when both objects belong to the same class, and to -1 when they + belong to different classes. The ``cos_sim`` represents the cosime similarity + between both objects. + + The final loss is the average over losses for all pairs of objects in the batch. + """ + arity = 2 def forward( self, embeddings: List[paddle.Tensor], target: paddle.Tensor ) -> paddle.Tensor: + """Compute the loss. + + :param embeddings: Should be a list or a tuple containing two tensors: + - ``[N, D]`` tensor of embeddings of the first objects of the pair + - ``[N, D]`` tensor of embeddings of the second objects of the pair + :param target: A ``[N, ]`` tensor of target values + """ l_emb, r_emb = embeddings cos_sim = F.cosine_similarity(l_emb, r_emb) loss = F.mse_loss(cos_sim, target) @@ -20,6 +37,22 @@ def forward( class EuclideanSiameseLoss(BaseLoss, nn.Layer): + """Computes the loss for a siamese network using cosine distance. + + This loss is also known as contrastive loss. + + The loss being optimized equals:: + + [is_sim * dist + (1 - is_sim) * max(margin - dist, 0)]^2 + + where ``target`` should equal 1 when both objects belong to the same class, + and 0 otheriwse. The ``dist`` is the euclidean distance between the embeddings of + the objects, and ``margin`` is some number, used here to ensure better stability + of training. + + The final loss is the average over losses for all pairs of objects in the batch. + """ + arity = 2 def __init__(self, margin: float = 1.0): @@ -30,6 +63,13 @@ def __init__(self, margin: float = 1.0): def forward( self, embeddings: List[paddle.Tensor], target: paddle.Tensor ) -> paddle.Tensor: + """Compute the loss. + + :param inputs: Should be a list or a tuple containing three tensors: + - ``[N, D]`` tensor of embeddings of the first objects of the pair + - ``[N, D]`` tensor of embeddings of the second objects of the pair + :param target: A ``[N, ]`` tensor of target values + """ l_emb, r_emb = embeddings eucl_dist = self._dist(l_emb, r_emb) is_similar = paddle.cast(target > 0, paddle.float32) @@ -41,6 +81,17 @@ def forward( class EuclideanTripletLoss(BaseLoss, nn.Layer): + """Compute the loss for a triplet network using euclidean distance. + + The loss is computed as ``max(dist_pos - dist_neg + margin, 0)``, where ``dist_pos`` + is the euclidean distance between the anchor embedding and positive embedding, + ``dist_neg`` is the euclidean distance between the anchor and negative embedding, + and ``margin`` represents a wedge between the desired wedge between anchor-negative + and anchor-positive distances. + + The final loss is the average over losses for all triplets in the batch. + """ + arity = 3 def __init__(self, margin: float = 1.0): @@ -51,6 +102,13 @@ def __init__(self, margin: float = 1.0): def forward( self, embeddings: List[paddle.Tensor], target: paddle.Tensor ) -> paddle.Tensor: + """Compute the loss. + + :param inputs: Should be a list or a tuple containing three tensors: + - ``[N, D]`` tensor of embeddings of the anchor objects + - ``[N, D]`` tensor of embeddings of the positive objects + - ``[N, D]`` tensor of embeddings of the negative objects + """ anchor, positive, negative = embeddings dist_pos = self._dist(anchor, positive) dist_neg = self._dist(anchor, negative) @@ -59,6 +117,17 @@ def forward( class CosineTripletLoss(BaseLoss, nn.Layer): + """Compute the loss for a triplet network using cosine distance. + + The loss is computed as ``max(dist_pos - dist_neg + margin, 0)``, where ``dist_pos`` + is the cosine distance between the anchor embedding and positive embedding, + ``dist_neg`` is the cosine distance between the anchor and negative embedding, and + ``margin`` represents a wedge between the desired wedge between anchor-negative and + anchor-positive distances. + + The final loss is the average over losses for all triplets in the batch. + """ + arity = 3 def __init__(self, margin: float = 1.0): @@ -68,6 +137,13 @@ def __init__(self, margin: float = 1.0): def forward( self, embeddings: List[paddle.Tensor], target: paddle.Tensor ) -> paddle.Tensor: + """Compute the loss. + + :param inputs: Should be a list or a tuple containing three tensors: + - ``[N, D]`` tensor of embeddings of the anchor objects + - ``[N, D]`` tensor of embeddings of the positive objects + - ``[N, D]`` tensor of embeddings of the negative objects + """ anchor, positive, negative = embeddings dist_pos = 1 - F.cosine_similarity(anchor, positive) dist_neg = 1 - F.cosine_similarity(anchor, negative) diff --git a/finetuner/tuner/pytorch/__init__.py b/finetuner/tuner/pytorch/__init__.py index a514b2092..c85011c66 100644 --- a/finetuner/tuner/pytorch/__init__.py +++ b/finetuner/tuner/pytorch/__init__.py @@ -15,12 +15,16 @@ class PytorchTuner(BaseTuner): def _get_loss(self, loss: Union[BaseLoss, str]): + """Get the loss layer.""" + if isinstance(loss, str): return getattr(losses, loss)() elif isinstance(loss, BaseLoss): return loss def _get_data_loader(self, inputs, batch_size: int, shuffle: bool): + """Get pytorch ``DataLoader`` data loader from the input data. """ + ds = get_dataset(datasets, self.arity) return DataLoader( dataset=ds(inputs=inputs, catalog=self._catalog), @@ -31,6 +35,8 @@ def _get_data_loader(self, inputs, batch_size: int, shuffle: bool): def _get_optimizer( self, optimizer: str, optimizer_kwargs: Optional[dict], learning_rate: float ) -> Optimizer: + """Get the optimizer for training.""" + params = self._embed_model.parameters() optimizer_kwargs = self._get_optimizer_kwargs(optimizer, optimizer_kwargs) @@ -59,6 +65,8 @@ def _get_optimizer( ) def _eval(self, data, description: str = 'Evaluating', train_log: str = ''): + """Evaluate the model on given labeled data""" + self._embed_model.eval() losses = [] @@ -85,6 +93,7 @@ def _eval(self, data, description: str = 'Evaluating', train_log: str = ''): return losses def _train(self, data, optimizer: Optimizer, description: str): + """Train the model on given labeled data""" self._embed_model.train() @@ -129,6 +138,34 @@ def fit( device: str = 'cpu', **kwargs, ) -> TunerStats: + """Finetune the model on the training data. + + :param train_data: Data on which to train the model + :param eval_data: Data on which to evaluate the model at the end of each epoch + :param epoch: Number of epochs to train the model + :param batch_size: The batch size to use for training and evaluation + :param learning_rate: Learning rate to use in training + :param optimizer: Which optimizer to use in training. Supported + values/optimizers are: + - ``"adam"`` for the Adam optimizer + - ``"rmsprop"`` for the RMSProp optimizer + - ``"sgd"`` for the SGD optimizer with momentum + :param optimizer_kwargs: Keyword arguments to pass to the optimizer. The + supported arguments, togethere with their defailt values, are: + - ``"adam"``: ``{'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08}`` + - ``"rmsprop"``:: + + { + 'rho': 0.99, + 'momentum': 0.0, + 'epsilon': 1e-08, + 'centered': False, + } + + - ``"sgd"``: ``{'momentum': 0.0, 'nesterov': False}`` + :param device: The device to which to move the model. Supported options are + ``"cpu"`` and ``"cuda"`` (for GPU) + """ if device == 'cpu': self.device = torch.device('cpu') elif device == 'cuda': @@ -177,4 +214,12 @@ def get_embeddings(self, data: DocumentArrayLike): doc.embedding = embed.cpu().numpy() def save(self, *args, **kwargs): + """Save the embedding model. + + You need to pass the path where to save the model in either ``args`` or + ``kwargs`` (for ``f`` key). + + :param args: Arguments to pass to ``torch.save`` function + :param kwargs: Keyword arguments to pass to ``torch.save`` function + """ torch.save(self.embed_model.state_dict(), *args, **kwargs) diff --git a/finetuner/tuner/pytorch/losses.py b/finetuner/tuner/pytorch/losses.py index 2eb5469b5..c8ee3e2a4 100644 --- a/finetuner/tuner/pytorch/losses.py +++ b/finetuner/tuner/pytorch/losses.py @@ -8,11 +8,28 @@ class CosineSiameseLoss(BaseLoss, nn.Module): + """Computes the loss for a siamese network using cosine distance. + + The loss for a pair of objects equals ``(target - cos_sim)^2``, where ``target`` + should equal 1 when both objects belong to the same class, and to -1 when they + belong to different classes. The ``cos_sim`` represents the cosime similarity + between both objects. + + The final loss is the average over losses for all pairs of objects in the batch. + """ + arity = 2 def forward( self, embeddings: List[torch.Tensor], target: torch.Tensor ) -> torch.Tensor: + """Compute the loss. + + :param embeddings: Should be a list or a tuple containing two tensors: + - ``[N, D]`` tensor of embeddings of the first objects of the pair + - ``[N, D]`` tensor of embeddings of the second object of the pair + :param target: A ``[N, ]`` tensor of target values + """ l_emb, r_emb = embeddings cos_sim = F.cosine_similarity(l_emb, r_emb) loss = F.mse_loss(cos_sim, target) @@ -20,6 +37,22 @@ def forward( class EuclideanSiameseLoss(BaseLoss, nn.Module): + """Computes the loss for a siamese network using cosine distance. + + This loss is also known as contrastive loss. + + The loss being optimized equals:: + + [is_sim * dist + (1 - is_sim) * max(margin - dist, 0)]^2 + + where ``target`` should equal 1 when both objects belong to the same class, + and 0 otheriwse. The ``dist`` is the euclidean distance between the embeddings of + the objects, and ``margin`` is some number, used here to ensure better stability + of training. + + The final loss is the average over losses for all pairs of objects in the batch. + """ + arity = 2 def __init__(self, margin: float = 1.0): @@ -29,6 +62,13 @@ def __init__(self, margin: float = 1.0): def forward( self, embeddings: List[torch.Tensor], target: torch.Tensor ) -> torch.Tensor: + """Compute the loss. + + :param inputs: Should be a list or a tuple containing three tensors: + - ``[N, D]`` tensor of embeddings of the first objects of the pair + - ``[N, D]`` tensor of embeddings of the second objects of the pair + :param target: A ``[N, ]`` tensor of target values + """ l_emb, r_emb = embeddings eucl_dist = F.pairwise_distance(l_emb, r_emb, p=2) is_similar = (target > 0).to(torch.float32) @@ -40,6 +80,17 @@ def forward( class EuclideanTripletLoss(BaseLoss, nn.Module): + """Compute the loss for a triplet network using euclidean distance. + + The loss is computed as ``max(dist_pos - dist_neg + margin, 0)``, where ``dist_pos`` + is the euclidean distance between the anchor embedding and positive embedding, + ``dist_neg`` is the euclidean distance between the anchor and negative embedding, + and ``margin`` represents a wedge between the desired wedge between anchor-negative + and anchor-positive distances. + + The final loss is the average over losses for all triplets in the batch. + """ + arity = 3 def __init__(self, margin: float = 1.0): @@ -49,6 +100,13 @@ def __init__(self, margin: float = 1.0): def forward( self, embeddings: List[torch.Tensor], target: torch.Tensor ) -> torch.Tensor: + """Compute the loss. + + :param inputs: Should be a list or a tuple containing three tensors: + - ``[N, D]`` tensor of embeddings of the anchor objects + - ``[N, D]`` tensor of embeddings of the positive objects + - ``[N, D]`` tensor of embeddings of the negative objects + """ anchor, positive, negative = embeddings dist_pos = F.pairwise_distance(anchor, positive, p=2) dist_neg = F.pairwise_distance(anchor, negative, p=2) @@ -57,9 +115,27 @@ def forward( class CosineTripletLoss(EuclideanTripletLoss): + """Compute the loss for a triplet network using cosine distance. + + The loss is computed as ``max(dist_pos - dist_neg + margin, 0)``, where ``dist_pos`` + is the cosine distance between the anchor embedding and positive embedding, + ``dist_neg`` is the cosine distance between the anchor and negative embedding, and + ``margin`` represents a wedge between the desired wedge between anchor-negative and + anchor-positive distances. + + The final loss is the average over losses for all triplets in the batch. + """ + def forward( self, embeddings: List[torch.Tensor], target: torch.Tensor ) -> torch.Tensor: + """Compute the loss. + + :param inputs: Should be a list or a tuple containing three tensors: + - ``[N, D]`` tensor of embeddings of the anchor objects + - ``[N, D]`` tensor of embeddings of the positive objects + - ``[N, D]`` tensor of embeddings of the negative objects + """ anchor, positive, negative = embeddings dist_pos = 1 - F.cosine_similarity(anchor, positive) dist_neg = 1 - F.cosine_similarity(anchor, negative)