Skip to content

Commit

Permalink
feat(tuner): allow adjustment of optimizer (#128)
Browse files Browse the repository at this point in the history
* feat: allow adjustment of optimizer

* fix: lint

* fix: initial lr

* refactor: move methods to base class

* fix: expand args

* fix: f string

* fix: typo

* fix: add docstring of the type

* test: add tests for non-existing optimizer

* fix: black
  • Loading branch information
Tadej Svetina committed Oct 15, 2021
1 parent 98c584e commit b624a62
Show file tree
Hide file tree
Showing 11 changed files with 286 additions and 23 deletions.
14 changes: 13 additions & 1 deletion finetuner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
__default_tag_key__ = 'finetuner'

# define the high-level API: fit()
from typing import Optional, overload, TYPE_CHECKING, Tuple
from typing import Dict, Optional, overload, TYPE_CHECKING, Tuple

if TYPE_CHECKING:
from .helper import AnyDNN, DocumentArrayLike, TunerReturnType
Expand All @@ -21,6 +21,9 @@ def fit(
epochs: int = 10,
batch_size: int = 256,
head_layer: str = 'CosineLayer',
learning_rate: float = 1e-3,
optimizer: str = 'adam',
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
) -> 'TunerReturnType':
...
Expand All @@ -35,6 +38,9 @@ def fit(
epochs: int = 10,
batch_size: int = 256,
head_layer: str = 'CosineLayer',
learning_rate: float = 1e-3,
optimizer: str = 'adam',
optimizer_kwargs: Optional[Dict] = None,
to_embedding_model: bool = True, #: below are tailor args
input_size: Optional[Tuple[int, ...]] = None,
input_dtype: str = 'float32',
Expand All @@ -56,6 +62,9 @@ def fit(
port_expose: Optional[int] = None,
runtime_backend: str = 'thread',
head_layer: str = 'CosineLayer',
learning_rate: float = 1e-3,
optimizer: str = 'adam',
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
) -> None:
...
Expand All @@ -71,6 +80,9 @@ def fit(
port_expose: Optional[int] = None,
runtime_backend: str = 'thread',
head_layer: str = 'CosineLayer',
learning_rate: float = 1e-3,
optimizer: str = 'adam',
optimizer_kwargs: Optional[Dict] = None,
to_embedding_model: bool = True, #: below are tailor args
input_size: Optional[Tuple[int, ...]] = None,
input_dtype: str = 'float32',
Expand Down
3 changes: 3 additions & 0 deletions finetuner/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
AnyDataLoader = TypeVar(
'AnyDataLoader'
) #: The type of any implementation of a data loader
AnyOptimizer = TypeVar(
'AnyOptimizer'
) #: The type of any implementation of an optimizer for training the model
DocumentSequence = TypeVar(
'DocumentSequence',
Sequence[Document],
Expand Down
14 changes: 12 additions & 2 deletions finetuner/tuner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,23 @@ def fit(
epochs: int = 10,
batch_size: int = 256,
head_layer: str = 'CosineLayer',
learning_rate: float = 1e-3,
optimizer: str = 'adam',
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
**kwargs
**kwargs,
) -> TunerReturnType:
ft = _get_tuner_class(embed_model)

return ft(embed_model, head_layer=head_layer).fit(
train_data, eval_data, epochs=epochs, batch_size=batch_size, device=device
train_data,
eval_data,
epochs=epochs,
batch_size=batch_size,
device=device,
learning_rate=learning_rate,
optimizer=optimizer,
optimizer_kwargs=optimizer_kwargs,
)


Expand Down
44 changes: 43 additions & 1 deletion finetuner/tuner/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from jina.logging.logger import JinaLogger

from ..helper import AnyDNN, AnyDataLoader, DocumentArrayLike
from ..helper import AnyDNN, AnyDataLoader, AnyOptimizer, DocumentArrayLike


class BaseHead:
Expand Down Expand Up @@ -48,6 +48,42 @@ def __init__(
self._head_layer = head_layer
self.logger = JinaLogger(self.__class__.__name__)

def _get_optimizer_kwargs(self, optimizer: str, custom_kwargs: Optional[Dict]):
"""Merges user-provided optimizer kwargs with default ones."""

DEFAULT_OPTIMIZER_KWARGS = {
'adam': {'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08},
'rmsprop': {
'rho': 0.99,
'momentum': 0.0,
'epsilon': 1e-08,
'centered': False,
},
'sgd': {'momentum': 0.0, 'nesterov': False},
}

try:
opt_kwargs = DEFAULT_OPTIMIZER_KWARGS[optimizer]
except KeyError:
raise ValueError(
f'Optimizer "{optimizer}" not supported, the supported'
' optimizers are "adam", "rmsprop" and "sgd"'
)

# Raise warning for non-existing keys passed
custom_kwargs = custom_kwargs or {}
extra_args = set(custom_kwargs.keys()) - set(opt_kwargs.keys())
if extra_args:
self.logger.warning(
f'The following arguments are not valid for the optimizer {optimizer}:'
f' {extra_args}'
)

# Update only existing keys
opt_kwargs.update((k, v) for k, v in custom_kwargs.items() if k in opt_kwargs)

return opt_kwargs

@property
def embed_model(self) -> AnyDNN:
"""Get the base model of this object."""
Expand Down Expand Up @@ -79,6 +115,12 @@ def head_layer(self) -> AnyDNN:
"""Get the head layer of this object."""
...

@abc.abstractmethod
def _get_optimizer(
self, optimizer: str, optimizer_kwargs: Optional[dict], learning_rate: float
) -> AnyOptimizer:
"""Get the optimizer for training."""

@abc.abstractmethod
def fit(
self,
Expand Down
40 changes: 31 additions & 9 deletions finetuner/tuner/keras/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import Optional
from typing import Dict, Optional

import tensorflow as tf
from jina.helper import cached_property
from jina.logging.profile import ProgressBar
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Optimizer

from . import head_layers, datasets
from .head_layers import HeadLayer
Expand All @@ -22,18 +22,21 @@ def head_layer(self) -> HeadLayer:
elif isinstance(self._head_layer, HeadLayer):
return self._head_layer

@cached_property
@property
def wrapped_model(self) -> Model:
if self.embed_model is None:
raise ValueError(f'embed_model is not set')
raise ValueError('embed_model is not set')

if getattr(self, '_wrapped_model', None) is not None:
return self._wrapped_model

input_shape = self.embed_model.input_shape[1:]
input_values = [keras.Input(shape=input_shape) for _ in range(self.arity)]
head_layer = self.head_layer()
head_values = head_layer(*(self.embed_model(v) for v in input_values))
wrapped_model = Model(inputs=input_values, outputs=head_values)
self._wrapped_model = Model(inputs=input_values, outputs=head_values)

return wrapped_model
return self._wrapped_model

def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):

Expand All @@ -56,6 +59,22 @@ def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):

return tf_data.batch(batch_size)

def _get_optimizer(
self, optimizer: str, optimizer_kwargs: Optional[dict], learning_rate: float
) -> Optimizer:
optimizer_kwargs = self._get_optimizer_kwargs(optimizer, optimizer_kwargs)

if optimizer == 'adam':
return keras.optimizers.Adam(
learning_rate=learning_rate, **optimizer_kwargs
)
elif optimizer == 'rmsprop':
return keras.optimizers.RMSprop(
learning_rate=learning_rate, **optimizer_kwargs
)
elif optimizer == 'sgd':
return keras.optimizers.SGD(learning_rate=learning_rate, **optimizer_kwargs)

def _train(self, data, optimizer, description: str):
head_layer = self.head_layer()

Expand Down Expand Up @@ -113,6 +132,9 @@ def fit(
eval_data: Optional[DocumentArrayLike] = None,
epochs: int = 10,
batch_size: int = 256,
learning_rate: float = 1e-3,
optimizer: str = 'adam',
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
**kwargs,
):
Expand All @@ -126,8 +148,6 @@ def fit(
inputs=eval_data, batch_size=batch_size, shuffle=False
)

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.01)

if device == 'cuda':
device = '/GPU:0'
elif device == 'cpu':
Expand All @@ -136,6 +156,8 @@ def fit(
raise ValueError(f'Device {device} not recognized')
device = tf.device(device)

_optimizer = self._get_optimizer(optimizer, optimizer_kwargs, learning_rate)

losses_train = []
metrics_train = []
losses_eval = []
Expand All @@ -145,7 +167,7 @@ def fit(
for epoch in range(epochs):
lt, mt = self._train(
_train_data,
optimizer,
_optimizer,
description=f'Epoch {epoch + 1}/{epochs}',
)
losses_train.extend(lt)
Expand Down
47 changes: 40 additions & 7 deletions finetuner/tuner/paddle/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from typing import Optional
from typing import Dict, Optional

import paddle
from jina.logging.profile import ProgressBar
from paddle import nn
from paddle.io import DataLoader
from paddle.optimizer import Optimizer

from . import head_layers, datasets
from ..base import BaseTuner, BaseHead, BaseArityModel
from ...helper import DocumentArrayLike
Expand All @@ -27,9 +28,13 @@ def head_layer(self) -> BaseHead:
@property
def wrapped_model(self) -> nn.Layer:
if self.embed_model is None:
raise ValueError(f'embed_model is not set')
raise ValueError('embed_model is not set')

if getattr(self, '_wrapped_model', None) is not None:
return self._wrapped_model

return self.head_layer(_ArityModel(self.embed_model)) # wrap with head layer
self._wrapped_model = self.head_layer(_ArityModel(self.embed_model))
return self._wrapped_model

def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):
ds = get_dataset(datasets, self.arity)
Expand All @@ -39,6 +44,32 @@ def _get_data_loader(self, inputs, batch_size: int, shuffle: bool):
shuffle=shuffle,
)

def _get_optimizer(
self, optimizer: str, optimizer_kwargs: Optional[dict], learning_rate: float
) -> Optimizer:
params = self.wrapped_model.parameters()
optimizer_kwargs = self._get_optimizer_kwargs(optimizer, optimizer_kwargs)

if optimizer == 'adam':
return paddle.optimizer.Adam(
parameters=params,
learning_rate=learning_rate,
beta1=optimizer_kwargs['beta_1'],
beta2=optimizer_kwargs['beta_2'],
epsilon=optimizer_kwargs['epsilon'],
)
elif optimizer == 'rmsprop':
return paddle.optimizer.RMSProp(
parameters=params, learning_rate=learning_rate, **optimizer_kwargs
)
elif optimizer == 'sgd':
return paddle.optimizer.Momentum(
parameters=params,
learning_rate=learning_rate,
momentum=optimizer_kwargs['momentum'],
use_nesterov=optimizer_kwargs['nesterov'],
)

def _eval(self, data, description: str = 'Evaluating', train_log: str = ''):
self.wrapped_model.eval()

Expand Down Expand Up @@ -95,12 +126,12 @@ def fit(
eval_data: Optional[DocumentArrayLike] = None,
epochs: int = 10,
batch_size: int = 256,
learning_rate: float = 1e-3,
optimizer: str = 'adam',
optimizer_kwargs: Optional[Dict] = None,
device: str = 'cpu',
**kwargs,
):
optimizer = paddle.optimizer.RMSProp(
learning_rate=0.01, parameters=self.wrapped_model.parameters()
)

if device == 'cuda':
paddle.set_device('gpu:0')
Expand All @@ -109,6 +140,8 @@ def fit(
else:
raise ValueError(f'Device {device} not recognized')

_optimizer = self._get_optimizer(optimizer, optimizer_kwargs, learning_rate)

losses_train = []
metrics_train = []
losses_eval = []
Expand All @@ -120,7 +153,7 @@ def fit(
)
lt, mt = self._train(
_data,
optimizer,
_optimizer,
description=f'Epoch {epoch + 1}/{epochs}',
)
losses_train.extend(lt)
Expand Down
Loading

0 comments on commit b624a62

Please sign in to comment.