Skip to content

Commit

Permalink
refactor(encoders): code style inheritance refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
hanxiao committed Aug 5, 2020
1 parent e3f47b4 commit 88730a3
Show file tree
Hide file tree
Showing 20 changed files with 220 additions and 293 deletions.
12 changes: 6 additions & 6 deletions docs/chapters/all_exec.md
Expand Up @@ -37,12 +37,12 @@ This version of Jina includes 80 Executors.
- `BaseEncoder`
- `BaseFrameworkExecutor`
- `BasePaddleExecutor`
- `BasePaddlehubEncoder`
- `BasePaddleEncoder`
- `BaseTextPaddlehubEncoder`
- `TextPaddlehubEncoder`
- `BaseCVPaddlehubEncoder`
- `BaseCVPaddleEncoder`
- `ImagePaddlehubEncoder`
- `VideoPaddlehubEncoder`
- `VideoPaddleEncoder`
- `BaseTFExecutor`
- `BaseTFEncoder`
- `BaseCVTFEncoder`
Expand Down Expand Up @@ -97,7 +97,7 @@ This version of Jina includes 80 Executors.
| `AnnoyIndexer` | `jina.executors.indexers.vector.nmslib` |
| `BM25Ranker` | `jina.executors.rankers.tfidf` |
| `BaseAudioEncoder` | `jina.executors.encoders` |
| `BaseCVPaddlehubEncoder` | `jina.executors.encoders.frameworks` |
| `BaseCVPaddleEncoder` | `jina.executors.encoders.frameworks` |
| `BaseCVTFEncoder` | `jina.executors.encoders.frameworks` |
| `BaseCVTorchEncoder` | |
| `BaseCVTorchEncoder` | `jina.executors.encoders.frameworks` |
Expand All @@ -116,7 +116,7 @@ This version of Jina includes 80 Executors.
| `BaseOnnxExecutor` | `jina.executors.frameworks` |
| `BasePaddleExecutor` | |
| `BasePaddleExecutor` | `jina.executors.frameworks` |
| `BasePaddlehubEncoder` | |
| `BasePaddleEncoder` | |
| `BasePbIndexer` | `jina.executors.indexers` |
| `Chunk2DocRanker` | `jina.executors.encoders` |
| `BaseSegmenter` | `jina.executors.crafters` |
Expand Down Expand Up @@ -173,5 +173,5 @@ This version of Jina includes 80 Executors.
| `TfIdfRanker` | `jina.executors.rankers.bi_match` |
| `TransformerTFEncoder` | |
| `TransformerTorchEncoder` | |
| `VideoPaddlehubEncoder` | `jina.executors.encoders.frameworks` |
| `VideoPaddleEncoder` | `jina.executors.encoders.frameworks` |
| `VideoTorchEncoder` | |
8 changes: 6 additions & 2 deletions jina/executors/clients.py
@@ -1,10 +1,12 @@
__copyright__ = "Copyright (c) 2020 Jina AI Limited. All rights reserved."
__license__ = "Apache-2.0"

from . import BaseExecutor
import grpc
from typing import Dict

import grpc

from . import BaseExecutor

if False:
from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import classification_pb2
Expand All @@ -16,6 +18,7 @@ class BaseClientExecutor(BaseExecutor):
:class:`BaseClientExecutor` is the base class for the executors that wrap up a client to other server.
"""

def __init__(self, host: str, port: str, timeout: int = -1, *args, **kwargs):
"""
:param host: the host address of the server
Expand Down Expand Up @@ -56,6 +59,7 @@ def get_output(self, response):
return np.array(response.result().outputs['output_feature'].float_val)
"""

def __init__(self, model_name: str, signature_name: str = 'serving_default', method_name: str = 'Predict',
*args, **kwargs):
"""
Expand Down
153 changes: 4 additions & 149 deletions jina/executors/encoders/frameworks.py
Expand Up @@ -3,10 +3,7 @@

import os

import numpy as np

from . import BaseEncoder
from ..decorators import batching, as_ndarray
from ..devices import OnnxDevice, PaddleDevice, TorchDevice, TFDevice
from ...helper import is_url

Expand Down Expand Up @@ -58,154 +55,12 @@ def _append_outputs(input_fn, outputs_name_to_append, output_fn):


class BaseTFEncoder(TFDevice, BaseEncoder):

def __init__(self, model_name: str = None, *args, **kwargs):
super().__init__(*args, **kwargs)
self.model_name = model_name
pass


class BaseTorchEncoder(TorchDevice, BaseEncoder):
pass

def __init__(self, model_name: str = None, *args, **kwargs):
super().__init__(*args, **kwargs)
self.model_name = model_name


class BasePaddlehubEncoder(PaddleDevice, BaseEncoder):

def __init__(self, model_name: str = None, *args, **kwargs):
super().__init__(*args, **kwargs)
self.model_name = model_name


class BaseTextTFEncoder(BaseTFEncoder):

def encode(self, data: 'np.ndarray', *args, **kwargs) -> 'np.ndarray':
"""
:param data: an 1d array of string type (data.dtype.kind == 'U') in size B
:return: an ndarray of `B x D`
"""


class BaseTextTorchEncoder(BaseTorchEncoder):
def encode(self, data: 'np.ndarray', *args, **kwargs) -> 'np.ndarray':
"""
:param data: an 1d array of string type (data.dtype.kind == 'U') in size B
:return: an ndarray of `B x D`
"""


class BaseTextPaddlehubEncoder(BasePaddlehubEncoder):
def encode(self, data: 'np.ndarray', *args, **kwargs) -> 'np.ndarray':
"""
:param data: an 1d array of string type (data.dtype.kind == 'U') in size B
:return: an ndarray of `B x D`
"""


class BaseCVTFEncoder(BaseTFEncoder):
def encode(self, data: 'np.ndarray', *args, **kwargs) -> 'np.ndarray':
"""
:param data: a `B x ([T] x D)` numpy ``ndarray``, `B` is the size of the batch
:return: a `B x D` numpy ``ndarray``
"""


class BaseCVTorchEncoder(BaseTorchEncoder):
""""
:class:`BaseTorchEncoder` implements the common part for :class:`ImageTorchEncoder` and :class:`VideoTorchEncoder`.
..warning::
:class:`BaseTorchEncoder` is not intented to be used to do the real encoding.
"""

def __init__(self, channel_axis: int = 1, *args, **kwargs):
super().__init__(*args, **kwargs)
self.channel_axis = channel_axis
self._default_channel_axis = 1

@batching
@as_ndarray
def encode(self, data: 'np.ndarray', *args, **kwargs) -> 'np.ndarray':
if self.channel_axis != self._default_channel_axis:
data = np.moveaxis(data, self.channel_axis, self._default_channel_axis)
import torch
_input = torch.from_numpy(data.astype('float32'))
if self.on_gpu:
_input = _input.cuda()
_feature = self._get_features(_input).detach()
if self.on_gpu:
_feature = _feature.cpu()
_feature = _feature.numpy()
return self._get_pooling(_feature)

def _get_features(self, data):
raise NotImplementedError

def _get_pooling(self, feature_map):
return feature_map


class BaseCVPaddlehubEncoder(BasePaddlehubEncoder):
"""
:class:`BaseCVPaddlehubEncoder` implements the common parts for :class:`ImagePaddlehubEncoder` and
:class:`VideoPaddlehubEncoder`.
..warning::
:class:`BaseCVPaddlehubEncoder` is not intented to be used to do the real encoding.
"""

def __init__(self,
output_feature: str = None,
pool_strategy: str = None,
channel_axis: int = -3,
*args,
**kwargs):
super().__init__(*args, **kwargs)
self.pool_strategy = pool_strategy
self.outputs_name = output_feature
self.inputs_name = None
self.channel_axis = channel_axis
self._default_channel_axis = -3

def post_init(self):
super().post_init()
import paddlehub as hub
module = hub.Module(name=self.model_name)
inputs, outputs, self.model = module.context(trainable=False)
self.get_inputs_and_outputs_name(inputs, outputs)
self.exe = self.to_device()

def close(self):
self.exe.close()

def get_inputs_and_outputs_name(self, input_dict, output_dict):
raise NotImplementedError

@batching
@as_ndarray
def encode(self, data: 'np.ndarray', *args, **kwargs) -> 'np.ndarray':
"""
:param data: a `B x T x (Channel x Height x Width)` numpy ``ndarray``, `B` is the size of the batch, `T` is the
number of frames
:return: a `B x D` numpy ``ndarray``, `D` is the output dimension
"""
if self.channel_axis != self._default_channel_axis:
data = np.moveaxis(data, self.channel_axis, self._default_channel_axis)
feature_map, *_ = self.exe.run(
program=self.model,
fetch_list=[self.outputs_name],
feed={self.inputs_name: data.astype('float32')},
return_numpy=True
)
if feature_map.ndim == 2 or self.pool_strategy is None:
return feature_map
return self.get_pooling(feature_map)

def get_pooling(self, data: 'np.ndarray', axis=None) -> 'np.ndarray':
_reduce_axis = tuple((i for i in range(len(data.shape)) if i > 1))
return getattr(np, self.pool_strategy)(data, axis=_reduce_axis)
class BasePaddleEncoder(PaddleDevice, BaseEncoder):
pass
105 changes: 105 additions & 0 deletions jina/executors/encoders/image/__init__.py
@@ -0,0 +1,105 @@
__copyright__ = "Copyright (c) 2020 Jina AI Limited. All rights reserved."
__license__ = "Apache-2.0"

import numpy as np

from ..frameworks import BaseTorchEncoder, BasePaddleEncoder
from ...decorators import batching, as_ndarray


class BaseCVTorchEncoder(BaseTorchEncoder):
""""
:class:`BaseTorchEncoder` implements the common part for :class:`ImageTorchEncoder` and :class:`VideoTorchEncoder`.
..warning::
:class:`BaseTorchEncoder` is not intented to be used to do the real encoding.
"""

def __init__(self, channel_axis: int = 1, *args, **kwargs):
super().__init__(*args, **kwargs)
self.channel_axis = channel_axis
self._default_channel_axis = 1

@batching
@as_ndarray
def encode(self, data: 'np.ndarray', *args, **kwargs) -> 'np.ndarray':
if self.channel_axis != self._default_channel_axis:
data = np.moveaxis(data, self.channel_axis, self._default_channel_axis)
import torch
_input = torch.from_numpy(data.astype('float32'))
if self.on_gpu:
_input = _input.cuda()
_feature = self._get_features(_input).detach()
if self.on_gpu:
_feature = _feature.cpu()
_feature = _feature.numpy()
return self._get_pooling(_feature)

def _get_features(self, data):
raise NotImplementedError

def _get_pooling(self, feature_map):
return feature_map


class BaseCVPaddleEncoder(BasePaddleEncoder):
"""
:class:`BaseCVPaddleEncoder` implements the common parts for :class:`ImagePaddlehubEncoder` and
:class:`VideoPaddleEncoder`.
..warning::
:class:`BaseCVPaddleEncoder` is not intented to be used to do the real encoding.
"""

def __init__(self,
model_name: str,
output_feature: str = None,
pool_strategy: str = None,
channel_axis: int = -3,
*args,
**kwargs):
super().__init__(*args, **kwargs)
self.pool_strategy = pool_strategy
self.outputs_name = output_feature
self.inputs_name = None
self.channel_axis = channel_axis
self._default_channel_axis = -3

def post_init(self):
super().post_init()
import paddlehub as hub
module = hub.Module(name=self.model_name)
inputs, outputs, self.model = module.context(trainable=False)
self.get_inputs_and_outputs_name(inputs, outputs)
self.exe = self.to_device()

def close(self):
self.exe.close()

def get_inputs_and_outputs_name(self, input_dict, output_dict):
raise NotImplementedError

@batching
@as_ndarray
def encode(self, data: 'np.ndarray', *args, **kwargs) -> 'np.ndarray':
"""
:param data: a `B x T x (Channel x Height x Width)` numpy ``ndarray``, `B` is the size of the batch, `T` is the
number of frames
:return: a `B x D` numpy ``ndarray``, `D` is the output dimension
"""
if self.channel_axis != self._default_channel_axis:
data = np.moveaxis(data, self.channel_axis, self._default_channel_axis)
feature_map, *_ = self.exe.run(
program=self.model,
fetch_list=[self.outputs_name],
feed={self.inputs_name: data.astype('float32')},
return_numpy=True
)
if feature_map.ndim == 2 or self.pool_strategy is None:
return feature_map
return self.get_pooling(feature_map)

def get_pooling(self, data: 'np.ndarray', axis=None) -> 'np.ndarray':
_reduce_axis = tuple((i for i in range(len(data.shape)) if i > 1))
return getattr(np, self.pool_strategy)(data, axis=_reduce_axis)
4 changes: 2 additions & 2 deletions jina/executors/encoders/image/bigtransfer.py
Expand Up @@ -3,11 +3,11 @@

import numpy as np

from ..frameworks import BaseCVTFEncoder
from ..frameworks import BaseTFEncoder
from ...decorators import batching, as_ndarray


class BiTImageEncoder(BaseCVTFEncoder):
class BiTImageEncoder(BaseTFEncoder):
"""
:class:`BiTImageEncoder` is Big Transfer (BiT) presented by Google (https://github.com/google-research/big_transfer),
this class use pretrained BiT to encode data from a ndarray, potentially B x (Channel x Height x Width) into a
Expand Down
34 changes: 0 additions & 34 deletions jina/executors/encoders/image/customtfkeras.py

This file was deleted.

0 comments on commit 88730a3

Please sign in to comment.