Skip to content

Commit

Permalink
docs: updated optimizer docstrings (#1876)
Browse files Browse the repository at this point in the history
* docs: updated optimizer docstrings

* fix: correct interface of parent classes

* docs: removed abc again
  • Loading branch information
maximilianwerk committed Feb 6, 2021
1 parent 4df6527 commit c5e275b
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 73 deletions.
60 changes: 44 additions & 16 deletions jina/optimizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,39 @@


class OptimizerCallback(JAMLCompatible):
"""Callback interface for storing and calculating evaluation metric during an optimization.
Should be used, whenever a custom evaluation aggregation during an Flow optimization is needed.
"""
Callback interface for storing and calculating evaluation metric during an optimization.
Should be used, whenever a custom evaluation aggregation during an Flow optimization is needed.
"""

def get_empty_copy(self) -> 'OptimizerCallback':
"""
:returns: An empty copy of the :class:`OptimizerCallback`.
:raises NotImplementedError: :class:`OptimizerCallback` is just an interface. Please use any implemented subclass.
"""
raise NotImplementedError

def get_final_evaluation(self) -> float:
"""
:returns: The aggregation of all evaluation collected via :method:`__call__`
:raises NotImplementedError: :class:`OptimizerCallback` is just an interface. Please use any implemented subclass.
"""
raise NotImplementedError

def __call__(self, response):
"""
Collects the results of evaluators in the response object for aggregation.
:param response: A response object of a Flow.
:raises NotImplementedError: :class:`OptimizerCallback` is just an interface. Please use any implemented subclass.
"""
raise NotImplementedError


class MeanEvaluationCallback(OptimizerCallback):
"""Calculates the mean of all evaluations during a single :py:class:`FlowRunner`
execution from the :py:class:`FlowOptimizer`.
"""
Calculates the mean of all evaluations during a single :py:class:`FlowRunner`
execution from the :py:class:`FlowOptimizer`.
"""

def __init__(self, eval_name: Optional[str] = None):
Expand All @@ -49,7 +65,11 @@ def get_empty_copy(self):
return MeanEvaluationCallback(self._eval_name)

def get_final_evaluation(self):
"""Returns mean evaluation value on the eval_name metric."""
"""
Calculates and returns mean evaluation value on the metric defined in the :method:`__init__`.
:returns: The aggregation of all evaluation collected via :method:`__call__`
"""
if self._eval_name is not None:
evaluation_name = self._eval_name
else:
Expand All @@ -61,7 +81,6 @@ def get_final_evaluation(self):
return self._evaluation_values[evaluation_name] / self._n_docs

def __call__(self, response):
"""Will be used as the callback in a :py:class:`Flow` run in the :py:class:`FlowRunner`."""
self._n_docs += len(response.search.docs)
logger.info(f'Num of docs evaluated: {self._n_docs}')
for doc in response.search.docs:
Expand All @@ -84,16 +103,21 @@ def __init__(self, study: 'optuna.study.Study'):

@property
def study(self):
"""Raw optuna study as calculated by the :py:class:`FlowOptimizer`."""
"""
:returns: Raw optuna study as calculated by the :py:class:`FlowOptimizer`.
"""
return self._study

@property
def best_parameters(self):
"""The parameter set, which got the best evaluation result during the optimization."""
"""
:returns: The parameter set, which got the best evaluation result during the optimization.
"""
return self._best_parameters

def save_parameters(self, filepath: str = 'config/best_config.yml'):
"""Stores the best parameters in the given file.
"""
Stores the best parameters in the given file.
:param filepath: path where the best parameter config will be saved
"""
Expand All @@ -103,10 +127,11 @@ def save_parameters(self, filepath: str = 'config/best_config.yml'):


class FlowOptimizer(JAMLCompatible):
"""Optimizer runs the given flows on multiple parameter configurations in order
to find the best performing parameters. Uses `optuna` behind the scenes.
For a detailed information how the parameters are sampled by optuna see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html
"""
Optimizer runs the given flows on multiple parameter configurations in order
to find the best performing parameters. Uses `optuna` behind the scenes.
For a detailed information how the parameters are sampled by optuna see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html
"""

def __init__(
Expand Down Expand Up @@ -198,9 +223,11 @@ def _objective(self, trial):
return eval_score

def optimize_flow(self, **kwargs) -> 'ResultProcessor':
"""Will run the actual optimization.
"""
Will run the actual optimization.
:param kwargs: extra parameters for optuna sampler
:returns: The aggregated result of the optimization run as a :class:`ResultProcessor`.
"""
with ImportExtensions(required=True):
import optuna
Expand All @@ -215,11 +242,12 @@ def optimize_flow(self, **kwargs) -> 'ResultProcessor':


def run_optimizer_cli(args: 'Namespace'):
"""Used to run the FlowOptimizer from command line interface.
"""
Used to run the FlowOptimizer from command line interface.
:param args: arguments passed via cli
"""
# The following import is needed to initialize the JYML parser
# The following import is needed to initialize the JAML parser
from .flow_runner import SingleFlowRunner, MultiFlowRunner
with open(args.uses) as f:
optimizer = JAML.load(f)
Expand Down
76 changes: 36 additions & 40 deletions jina/optimizers/flow_runner.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import shutil
from collections.abc import Iterable
from typing import Iterable, Optional, Union, List
from typing import Optional, Union, List

from ..flow import Flow
from ..helper import colored
Expand All @@ -19,28 +19,20 @@ def run(
callback=None,
**kwargs,
):
"""Runs the defined flow(s).
:param trial_parameters: parameters to be used as context
:param workspace: directory to be used for the flows
:param callback: callback that will be called by the flows. Should store the evaluation results.
"""
Runs the defined Flow(s).
:param trial_parameters: Parameters to be used as context
:param workspace: Directory to be used for the flows
:param callback: Callback that will be called by the Flows. Should store the evaluation results.
:param **kwargs: Further arguments passed to the Flow(s) as `context`
:raises NotImplementedError: :class:`FlowRunner` is just an interface. Please use any implemented subclass.
"""
raise NotImplementedError


class SingleFlowRunner(FlowRunner):
"""Module to define and run a flow.
`documents` maps to a parameter of the `execution_method`, depending on the method.
If you use a generator function/list as documents, the default will work out of the box.
Otherwise, the following settings will work:
indexing + jsonlines file: `execution_methos='index_lines', documents_parameter_name='filepath'`
search + jsonlines file: `execution_methos='search_lines', documents_parameter_name='filepath'`
indexing + file pattern: `execution_methos='index_files', documents_parameter_name='pattern'`
search + file pattern: `execution_methos='search_files', documents_parameter_name='pattern'`
For more reasonable values, have a look at the `Flow`.
"""
""":class:`SingleFlowRunner` enables running a flow repeadetly with different `context`."""

def __init__(
self,
Expand All @@ -52,21 +44,33 @@ def __init__(
overwrite_workspace: bool = False,
):
"""
:param flow_yaml: path to Flow yaml
:param documents: input parameter for `execution_method` for iterating documents.
(e.g. a list of documents for `index` or a .jsonlines file for `index_lines`)
:param request_size: request size used in the flow
:param execution_method: one of the methods of the Jina :py:class:`Flow` (e.g. `index_lines`)
:param documents_parameter_name: to which parameter of the `execution_function` the `documents` will be mapped.
See `jina/flow/__init__.py::Flow` for more details.
:param overwrite_workspace: True, means workspace created by the Flow will be overwriten
`documents` maps to a parameter of the `execution_method`, depending on the method.
If you use a generator function/list as `documents`, the default will work out of the box.
Otherwise, the following settings will work:
indexing + jsonlines file: `execution_methos='index_lines', documents_parameter_name='filepath'`
search + jsonlines file: `execution_methos='search_lines', documents_parameter_name='filepath'`
indexing + file pattern: `execution_methos='index_files', documents_parameter_name='pattern'`
search + file pattern: `execution_methos='search_files', documents_parameter_name='pattern'`
For more reasonable values, have a look at the :class:`Flow`.
:param flow_yaml: Path to Flow yaml
:param documents: Input parameter for `execution_method` for iterating documents.
(e.g. a list of documents for `index` or a .jsonlines file for `index_lines`)
:param request_size: Request size used in the flow
:param execution_method: One of the methods of the Jina :py:class:`Flow` (e.g. `index_lines`)
:param documents_parameter_name: The `documents` will be mapped to `documents_parameter_name` in the function `execution_function`.
See `jina/flow/__init__.py::Flow` for more details.
:param overwrite_workspace: True, means workspace created by the Flow will be overwriten with each execution.
:raises TypeError: When the documents are neither a `str` nor an `Iterable`
"""
super().__init__()
self._flow_yaml = flow_yaml

if type(documents) is str:
self._documents = documents

elif isinstance(documents, Iterable):
self._documents = list(documents)
else:
Expand Down Expand Up @@ -103,12 +107,6 @@ def run(
callback=None,
**kwargs,
):
"""Runs a Flow according to the definition of the `FlowRunner`.
:param trial_parameters: context for the Flow
:param workspace: directory to be used for artifacts generated
:param callback: The callback function, which should store results comming from evaluation.
"""

self._setup_workspace(workspace)
additional_arguments = {self._documents_parameter_name: self._documents}
Expand All @@ -122,11 +120,14 @@ def run(


class MultiFlowRunner(FlowRunner):
"""Chain and run multiple Flows. It is an interface for common patterns like IndexFlow -> SearchFlow"""
"""
:class:`MultiFlowRunner` chains and runs multiple Flows.
It is an interface for common patterns like IndexFlow -> SearchFlow.
"""

def __init__(self, flows: List[FlowRunner]):
"""
:param flows: Flows to be executed in sequence
:param flows: Flows to be executed in sequence.
"""
super().__init__()
self.flows = flows
Expand All @@ -138,10 +139,5 @@ def run(
callback=None,
**kwargs,
):
"""
:param trial_parameters: parameters to be used as environment variables
:param workspace: directory to be used for the flows
:param callback: will be forwarded to every single Flow.
"""
for flow in self.flows:
flow.run(trial_parameters, workspace, callback, **kwargs)
45 changes: 28 additions & 17 deletions jina/optimizers/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@ def __init__(


class IntegerParameter(OptimizationParameter):
"""Used for optimizing integer parameters with the FlowOptimizer.
For detailed information about sampling and usage see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.suggest_int
"""
Used for optimizing integer parameters with the FlowOptimizer.
For detailed information about sampling and usage see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.suggest_int
"""

def __init__(
Expand All @@ -38,16 +39,18 @@ def __init__(
super().__init__(*args, **kwargs)
self.low = low
self.high = high
if log and step_size != 1:
raise ValueError('''The step_size != 1 and log arguments cannot be used at the same time. When setting log argument to True, set the step argument to 1.''')

self.step_size = step_size
# The step != 1 and log arguments cannot be used at the same time.
# To set the log argument to True, set the step argument to 1.
self.log = log


class UniformParameter(OptimizationParameter):
"""Used for optimizing float parameters with the FlowOptimizer with uniform sampling.
For detailed information about sampling and usage see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.suggest_discrete_uniform
"""
Used for optimizing float parameters with the FlowOptimizer with uniform sampling.
For detailed information about sampling and usage see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.suggest_discrete_uniform
"""

def __init__(self, low: float, high: float, *args, **kwargs):
Expand All @@ -57,9 +60,10 @@ def __init__(self, low: float, high: float, *args, **kwargs):


class LogUniformParameter(OptimizationParameter):
"""Used for optimizing float parameters with the FlowOptimizer with loguniform sampling.
For detailed information about sampling and usage see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.suggest_loguniform
"""
Used for optimizing float parameters with the FlowOptimizer with loguniform sampling.
For detailed information about sampling and usage see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.suggest_loguniform
"""

def __init__(self, low: float, high: float, *args, **kwargs):
Expand All @@ -69,9 +73,10 @@ def __init__(self, low: float, high: float, *args, **kwargs):


class CategoricalParameter(OptimizationParameter):
"""Used for optimizing categorical parameters with the FlowOptimizer.
For detailed information about sampling and usage see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.suggest_categorical
"""
Used for optimizing categorical parameters with the FlowOptimizer.
For detailed information about sampling and usage see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.suggest_categorical
"""

def __init__(
Expand All @@ -82,9 +87,10 @@ def __init__(


class DiscreteUniformParameter(OptimizationParameter):
"""Used for optimizing discrete parameters with the FlowOptimizer with uniform sampling.
For detailed information about sampling and usage it is used by Jina with optuna see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.suggest_discrete_uniform
"""
Used for optimizing discrete parameters with the FlowOptimizer with uniform sampling.
For detailed information about sampling and usage it is used by Jina with optuna see
https://optuna.readthedocs.io/en/stable/reference/generated/optuna.trial.Trial.html#optuna.trial.Trial.suggest_discrete_uniform
"""

def __init__(self, low: float, high: float, q: float, *args, **kwargs):
Expand All @@ -95,6 +101,11 @@ def __init__(self, low: float, high: float, q: float, *args, **kwargs):


def load_optimization_parameters(filepath: str):
"""
Loads optimization parameters from a `.yml` file and parses it with the JAML parser.
:param filepath: Path to a file that contains optimization parameters.
:returns: The loaded :class:`OptimizationParameter` objects.
"""

with open(filepath, encoding='utf8') as fp:
return JAML.load(fp)

0 comments on commit c5e275b

Please sign in to comment.