Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

docs(tuner): add docstrings #148

Merged
merged 4 commits into from
Oct 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions finetuner/tuner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,34 @@ def fit(
device: str = 'cpu',
**kwargs,
) -> TunerReturnType:
"""Finetune the model on the training data.

:param train_data: Data on which to train the model
:param eval_data: Data on which to evaluate the model at the end of each epoch
:param epoch: Number of epochs to train the model
:param batch_size: The batch size to use for training and evaluation
:param learning_rate: Learning rate to use in training
:param optimizer: Which optimizer to use in training. Supported
values/optimizers are:
- ``"adam"`` for the Adam optimizer
- ``"rmsprop"`` for the RMSProp optimizer
- ``"sgd"`` for the SGD optimizer with momentum
:param optimizer_kwargs: Keyword arguments to pass to the optimizer. The
supported arguments, togethere with their defailt values, are:
- ``"adam"``: ``{'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08}``
- ``"rmsprop"``::

{
'rho': 0.99,
'momentum': 0.0,
'epsilon': 1e-08,
'centered': False,
}

- ``"sgd"``: ``{'momentum': 0.0, 'nesterov': False}``
:param device: The device to which to move the model. Supported options are
``"cpu"`` and ``"cuda"`` (for GPU)
"""
ft = get_tuner_class(embed_model)
if catalog is None:
train_data = DocumentArray(train_data() if callable(train_data) else train_data)
Expand All @@ -60,6 +88,14 @@ def fit(


def save(embed_model: AnyDNN, model_path: str, *args, **kwargs) -> None:
"""Save the embedding model.

:param embed_model: The embedding model to save
:param model_path: Path to file/folder where to save the model
:param args: Arguments to pass to framework-specific tuner's ``save`` method
:param kwargs: Keyword arguments to pass to framework-specific tuner's ``save``
method
"""
ft = get_tuner_class(embed_model)

ft(embed_model).save(model_path, *args, **kwargs)
7 changes: 7 additions & 0 deletions finetuner/tuner/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ def __init__(
loss: Union[AnyDNN, str] = 'CosineSiameseLoss',
**kwargs,
):
"""Create the tuner instance.

:param embed_model: Model that produces embeddings from inputs
:param loss: Either the loss object instance, or the name of the loss function.
Currently available losses are ``CosineSiameseLoss``,
``EuclideanSiameseLoss``, ``EuclideanTripletLoss`` and ``CosineTripletLoss``
"""
self._embed_model = embed_model
self._loss = self._get_loss(loss)
self._train_data_len = 0
Expand Down
46 changes: 46 additions & 0 deletions finetuner/tuner/keras/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,15 @@

class KerasTuner(BaseTuner):
def _get_loss(self, loss: Union[BaseLoss, str]):
"""Get the loss layer."""

if isinstance(loss, str):
return getattr(losses, loss)()
elif isinstance(loss, BaseLoss):
return loss

def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):
"""Get tensorflow ``Dataset`` from the input data. """

ds = get_dataset(datasets, self.arity)
input_shape = self.embed_model.input_shape[1:]
Expand All @@ -45,6 +48,8 @@ def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):
def _get_optimizer(
self, optimizer: str, optimizer_kwargs: Optional[dict], learning_rate: float
) -> Optimizer:
"""Get the optimizer for training."""

optimizer_kwargs = self._get_optimizer_kwargs(optimizer, optimizer_kwargs)

if optimizer == 'adam':
Expand All @@ -59,6 +64,8 @@ def _get_optimizer(
return keras.optimizers.SGD(learning_rate=learning_rate, **optimizer_kwargs)

def _train(self, data, optimizer, description: str):
"""Train the model on given labeled data"""

losses = []

log_generator = LogGenerator('T', losses)
Expand Down Expand Up @@ -88,6 +95,7 @@ def _train(self, data, optimizer, description: str):
return losses

def _eval(self, data, description: str = 'Evaluating', train_log: str = ''):
"""Evaluate the model on given labeled data"""

losses = []

Expand Down Expand Up @@ -120,6 +128,34 @@ def fit(
device: str = 'cpu',
**kwargs,
) -> TunerStats:
"""Finetune the model on the training data.

:param train_data: Data on which to train the model
:param eval_data: Data on which to evaluate the model at the end of each epoch
:param epoch: Number of epochs to train the model
:param batch_size: The batch size to use for training and evaluation
:param learning_rate: Learning rate to use in training
:param optimizer: Which optimizer to use in training. Supported
values/optimizers are:
- ``"adam"`` for the Adam optimizer
- ``"rmsprop"`` for the RMSProp optimizer
- ``"sgd"`` for the SGD optimizer with momentum
:param optimizer_kwargs: Keyword arguments to pass to the optimizer. The
supported arguments, togethere with their defailt values, are:
- ``"adam"``: ``{'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08}``
- ``"rmsprop"``::

{
'rho': 0.99,
'momentum': 0.0,
'epsilon': 1e-08,
'centered': False,
}

- ``"sgd"``: ``{'momentum': 0.0, 'nesterov': False}``
:param device: The device to which to move the model. Supported options are
``"cpu"`` and ``"cuda"`` (for GPU)
"""

_train_data = self._get_data_loader(
inputs=train_data, batch_size=batch_size, shuffle=False
Expand Down Expand Up @@ -167,4 +203,14 @@ def get_embeddings(self, data: DocumentArrayLike):
doc.embedding = np.array(embed)

def save(self, *args, **kwargs):
"""Save the embedding model.

You need to pass the path where to save the model in either ``args`` or
``kwargs`` (for ``filepath`` key).

:param args: Arguments to pass to ``save`` method of the embedding model
:param kwargs: Keyword arguments to pass to ``save`` method of the embedding
model
"""

self.embed_model.save(*args, **kwargs)
85 changes: 80 additions & 5 deletions finetuner/tuner/keras/losses.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,27 @@


class CosineSiameseLoss(BaseLoss, Layer):
"""Computes the loss for a siamese network using cosine distance.

The loss for a pair of objects equals ``(target - cos_sim)^2``, where ``target``
should equal 1 when both objects belong to the same class, and to -1 when they
belong to different classes. The ``cos_sim`` represents the cosime similarity
between both objects.

The final loss is the average over losses for all pairs of objects in the batch.
"""

arity = 2

def call(self, inputs, **kwargs):
"""Compute the loss.

:param inputs: Should be a list or a tuple containing three tensors:
- ``[N, D]`` tensor of embeddings of the first objects of the pair
- ``[N, D]`` tensor of embeddings of the second objects of the pair
- ``[N, ]`` tensor of target values
"""

l_emb, r_emb, target = inputs
normalize_a = tf.nn.l2_normalize(l_emb, axis=-1)
normalize_b = tf.nn.l2_normalize(r_emb, axis=-1)
Expand All @@ -16,13 +34,36 @@ def call(self, inputs, **kwargs):


class EuclideanSiameseLoss(BaseLoss, Layer):
"""Computes the loss for a siamese network using cosine distance.

This loss is also known as contrastive loss.

The loss being optimized equals::

[is_sim * dist + (1 - is_sim) * max(margin - dist, 0)]^2

where ``target`` should equal 1 when both objects belong to the same class,
and 0 otheriwse. The ``dist`` is the euclidean distance between the embeddings of
the objects, and ``margin`` is some number, used here to ensure better stability
of training.

The final loss is the average over losses for all pairs of objects in the batch.
"""

arity = 2

def __init__(self, margin: float = 1.0):
super().__init__()
self.margin = margin

def call(self, inputs, **kwargs):
"""Compute the loss.

:param inputs: Should be a list or a tuple containing three tensors:
- ``[N, D]`` tensor of embeddings of the first objects of the pair
- ``[N, D]`` tensor of embeddings of the second objects of the pair
- ``[N, ]`` tensor of target values
"""
l_emb, r_emb, target = inputs
eucl_dist = tf.reduce_sum(tf.math.squared_difference(l_emb, r_emb), axis=-1)
is_similar = tf.cast(target > 0, tf.float32)
Expand All @@ -35,36 +76,70 @@ def call(self, inputs, **kwargs):


class EuclideanTripletLoss(BaseLoss, Layer):
"""Compute the loss for a triplet network using euclidean distance.

The loss is computed as ``max(dist_pos - dist_neg + margin, 0)``, where ``dist_pos``
is the euclidean distance between the anchor embedding and positive embedding,
``dist_neg`` is the euclidean distance between the anchor and negative embedding,
and ``margin`` represents a wedge between the desired wedge between anchor-negative
and anchor-positive distances.

The final loss is the average over losses for all triplets in the batch.
"""

arity = 3

def __init__(self, margin: float = 1.0, **kwargs):
super().__init__(**kwargs)
self._margin = margin

def call(self, inputs, **kwargs):
"""Compute the loss.

:param inputs: Should be a list or a tuple containing three tensors:
- ``[N, D]`` tensor of embeddings of the anchor objects
- ``[N, D]`` tensor of embeddings of the positive objects
- ``[N, D]`` tensor of embeddings of the negative objects
"""
anchor, positive, negative, _ = inputs

# Seems that tf.norm suffers from numeric instability as explained here
# https://github.com/tensorflow/tensorflow/issues/12071
dist_pos = tf.reduce_sum(tf.math.squared_difference(anchor, positive), axis=-1)
dist_neg = tf.reduce_sum(tf.math.squared_difference(anchor, negative), axis=-1)

dist_pos = tf.maximum(dist_pos, 1e-9)
dist_neg = tf.maximum(dist_neg, 1e-9)
dist_pos = tf.sqrt(tf.maximum(dist_pos, 1e-9))
dist_neg = tf.sqrt(tf.maximum(dist_neg, 1e-9))

return tf.reduce_mean(
tf.nn.relu(tf.sqrt(dist_pos) - tf.sqrt(dist_neg) + self._margin)
)
return tf.reduce_mean(tf.nn.relu(dist_pos - dist_neg + self._margin))


class CosineTripletLoss(BaseLoss, Layer):
"""Compute the loss for a triplet network using cosine distance.

The loss is computed as ``max(dist_pos - dist_neg + margin, 0)``, where ``dist_pos``
is the cosine distance between the anchor embedding and positive embedding,
``dist_neg`` is the cosine distance between the anchor and negative embedding, and
``margin`` represents a wedge between the desired wedge between anchor-negative and
anchor-positive distances.

The final loss is the average over losses for all triplets in the batch.
"""

arity = 3

def __init__(self, margin: float = 1.0):
super().__init__()
self._margin = margin

def call(self, inputs, **kwargs):
"""Compute the loss.

:param inputs: Should be a list or a tuple containing three tensors:
- ``[N, D]`` tensor of embeddings of the anchor objects
- ``[N, D]`` tensor of embeddings of the positive objects
- ``[N, D]`` tensor of embeddings of the negative objects
"""
anchor, positive, negative, _ = inputs

# Seems that tf.norm suffers from numeric instability as explained here
Expand Down
45 changes: 45 additions & 0 deletions finetuner/tuner/paddle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@

class PaddleTuner(BaseTuner):
def _get_loss(self, loss: Union[BaseLoss, str]):
"""Get the loss layer."""

if isinstance(loss, str):
return getattr(losses, loss)()
elif isinstance(loss, BaseLoss):
return loss

def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):
"""Get the paddle ``DataLoader`` from the input data. """

ds = get_dataset(datasets, self.arity)
return DataLoader(
dataset=ds(inputs=inputs, catalog=self._catalog),
Expand All @@ -32,6 +36,8 @@ def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):
def _get_optimizer(
self, optimizer: str, optimizer_kwargs: Optional[dict], learning_rate: float
) -> Optimizer:
"""Get the optimizer for training."""

params = self._embed_model.parameters()
optimizer_kwargs = self._get_optimizer_kwargs(optimizer, optimizer_kwargs)

Expand All @@ -56,6 +62,8 @@ def _get_optimizer(
)

def _eval(self, data, description: str = 'Evaluating', train_log: str = ''):
"""Evaluate the model on given labeled data"""

self._embed_model.eval()

losses = []
Expand All @@ -78,6 +86,7 @@ def _eval(self, data, description: str = 'Evaluating', train_log: str = ''):
return losses

def _train(self, data, optimizer: Optimizer, description: str):
"""Train the model on given labeled data"""

self._embed_model.train()

Expand Down Expand Up @@ -119,6 +128,34 @@ def fit(
device: str = 'cpu',
**kwargs,
) -> TunerStats:
"""Finetune the model on the training data.

:param train_data: Data on which to train the model
:param eval_data: Data on which to evaluate the model at the end of each epoch
:param epoch: Number of epochs to train the model
:param batch_size: The batch size to use for training and evaluation
:param learning_rate: Learning rate to use in training
:param optimizer: Which optimizer to use in training. Supported
values/optimizers are:
- ``"adam"`` for the Adam optimizer
- ``"rmsprop"`` for the RMSProp optimizer
- ``"sgd"`` for the SGD optimizer with momentum
:param optimizer_kwargs: Keyword arguments to pass to the optimizer. The
supported arguments, togethere with their defailt values, are:
- ``"adam"``: ``{'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08}``
- ``"rmsprop"``::

{
'rho': 0.99,
'momentum': 0.0,
'epsilon': 1e-08,
'centered': False,
}

- ``"sgd"``: ``{'momentum': 0.0, 'nesterov': False}``
:param device: The device to which to move the model. Supported options are
``"cpu"`` and ``"cuda"`` (for GPU)
"""

if device == 'cuda':
paddle.set_device('gpu:0')
Expand Down Expand Up @@ -161,4 +198,12 @@ def get_embeddings(self, data: DocumentArrayLike):
doc.embedding = np.array(embed)

def save(self, *args, **kwargs):
"""Save the embedding model.

You need to pass the path where to save the model in either ``args`` or
``kwargs`` (for ``path`` key).

:param args: Arguments to pass to ``paddle.save`` function
:param kwargs: Keyword arguments to pass to ``paddle.save`` function
"""
paddle.save(self.embed_model.state_dict(), *args, **kwargs)
Loading