jina-ai · JoanFM · Mar 8, 2021 · Mar 3, 2021 · Mar 3, 2021 · Mar 3, 2021
diff --git a/jina/executors/decorators.py b/jina/executors/decorators.py
@@ -5,14 +5,14 @@
 
 import inspect
 from functools import wraps
-from typing import Callable, Any, Union, Iterator, List, Optional, Dict
+from typing import Callable, Any, Union, Iterator, List, Optional, Dict, Iterable
 
 import numpy as np
 
 from .metas import get_default_metas
 from ..helper import batch_iterator, typename, convert_tuple_to_list
 from ..logging import default_logger
-from itertools import islice
+from itertools import islice, chain
 
 
 def as_aggregate_method(func: Callable) -> Callable:
@@ -199,14 +199,17 @@ def _get_total_size(full_data_size, batch_size, num_batch):
     return total_size
 
 
-def _merge_results_after_batching(final_result, merge_over_axis: int = 0):
+def _merge_results_after_batching(
+    final_result, merge_over_axis: int = 0, flatten: bool = True
+):
     if len(final_result) == 1:
         # the only result of one batch
         return final_result[0]
 
-    if len(final_result) and merge_over_axis is not None:
+    if final_result:
         if isinstance(final_result[0], np.ndarray):
-            final_result = np.concatenate(final_result, merge_over_axis)
+            if len(final_result[0].shape) > 1:
+                final_result = np.concatenate(final_result, merge_over_axis)
         elif isinstance(final_result[0], tuple):
             reduced_result = []
             num_cols = len(final_result[0])
@@ -215,6 +218,8 @@ def _merge_results_after_batching(final_result, merge_over_axis: int = 0):
                     np.concatenate([row[col] for row in final_result], merge_over_axis)
                 )
             final_result = tuple(reduced_result)
+        elif isinstance(final_result[0], list) and flatten:
+            final_result = list(chain.from_iterable(final_result))
 
     if len(final_result):
         return final_result
@@ -229,6 +234,7 @@ def batching(
     slice_on: int = 1,
     label_on: Optional[int] = None,
     ordinal_idx_arg: Optional[int] = None,
+    flatten_output: bool = True,
 ) -> Any:
     """Split the input of a function into small batches and call :func:`func` on each batch
     , collect the merged result and return. This is useful when the input is too big to fit into memory
@@ -244,6 +250,7 @@ def batching(
     :param ordinal_idx_arg: the location of the ordinal indexes argument. Needed for classes
             where function decorated needs to know the ordinal indexes of the data in the batch
             (Not used when label_on is used)
+    :param flatten_output: If this is set to True, the results from different batches will be chained and the returning value is a list of the results. Otherwise, the returning value is a list of lists, in which each element is a list containing the result from one single batch. Note if there is only one batch returned, the returned result is always flatten.
     :return: the merged result as if run :func:`func` once on the input.
 
     Example:
@@ -321,7 +328,9 @@ def arg_wrapper(*args, **kwargs):
                 if r is not None:
                     final_result.append(r)
 
-            return _merge_results_after_batching(final_result, merge_over_axis)
+            return _merge_results_after_batching(
+                final_result, merge_over_axis, flatten_output
+            )
 
         return arg_wrapper
 
@@ -436,6 +445,7 @@ def single(
     func: Callable[[Any], np.ndarray] = None,
     merge_over_axis: int = 0,
     slice_on: int = 1,
+    flatten_output: bool = True,
 ) -> Any:
     """
     Guarantee that the input of a function is provided as a single instance and not in batches
@@ -444,6 +454,7 @@ def single(
     :param merge_over_axis: merge over which axis into a single result
     :param slice_on: the location of the data. When using inside a class,
             ``slice_on`` should take ``self`` into consideration.
+    :param flatten_output: Flag to determine if a result of list of lists needs to be flattened in output
     :return: the merged result as if run :func:`func` once on the input.
 
     Example:
@@ -458,9 +469,12 @@ def craft(self, text: str) -> Dict:
     def _single(func):
         @wraps(func)
         def arg_wrapper(*args, **kwargs):
-            # priority: decorator > class_attribute
             # by default data is in args[1] (self needs to be taken into account)
             data = args[slice_on]
+
+            if not isinstance(data, Iterable):
+                return func(*args, **kwargs)
+
             args = list(args)
 
             default_logger.debug(f'batching disabled for {func.__qualname__}')
@@ -472,8 +486,74 @@ def arg_wrapper(*args, **kwargs):
                 if r is not None:
                     final_result.append(r)
 
+            return _merge_results_after_batching(
+                final_result, merge_over_axis, flatten_output
+            )
+
+        return arg_wrapper
+
+    if func:
+        return _single(func)
+    else:
+        return _single
+
+
+def single_multi_input(
+    func: Callable[[Any], np.ndarray] = None,
+    merge_over_axis: int = 0,
+    slice_on: int = 1,
+    num_data: int = 1,
+) -> Any:
+    """Guarantee that the inputs of a function with more than one argument is provided as single instances and not in batches
+
+    :param func: function to decorate
+    :param merge_over_axis: merge over which axis into a single result
+    :param slice_on: the location of the data. When using inside a class,
+            ``slice_on`` should take ``self`` into consideration.
+    :param num_data: the number of data inside the arguments
+    :return: the merged result as if run :func:`func` once on the input.
+
+    ..warning:
+        data arguments will be taken starting from ``slice_on` to ``slice_on + num_data``
+
+    Example:
+        .. highlight:: python
+        .. code-block:: python
+
+            class OneByOneCrafter:
+
+                @single_multi_input
+                def craft(self, text: str, id: str) -> Dict:
+            ...
+    """
+
+    def _single_multi_input(func):
+        @wraps(func)
+        def arg_wrapper(*args, **kwargs):
+            # by default data is in args[1:] (self needs to be taken into account)
+            args = list(args)
+            default_logger.debug(f'batching disabled for {func.__qualname__}')
+            data_iterators = [args[slice_on + i] for i in range(0, num_data)]
+
+            if not isinstance(data_iterators[0], Iterable):
+                return func(*args, **kwargs)
+
+            final_result = []
+            for i, instance in enumerate(data_iterators[0]):
+                args[slice_on] = instance
+                for idx in range(1, num_data):
+                    args[slice_on + idx] = data_iterators[idx][i]
+
+                r = func(*args, **kwargs)
+
+                if r is not None:
+                    final_result.append(r)
+
             return _merge_results_after_batching(final_result, merge_over_axis)
 
         return arg_wrapper
 
-    return _single(func)
+    if func:
+        return _single_multi_input(func)
+    else:
+        return _single_multi_input