Skip to content
Permalink
Browse files

refactor(base): component renamed to components

  • Loading branch information...
hanxiao committed Aug 16, 2019
1 parent defe9b1 commit 66d020bd8e59770dd31cfa872da912df2228bf98
Showing with 100 additions and 100 deletions.
  1. +1 −1 README.md
  2. +2 −2 docs/chapter/yaml-config.md
  3. +24 −24 gnes/base/__init__.py
  4. +1 −1 gnes/composer/base.py
  5. +5 −5 gnes/encoder/base.py
  6. +1 −1 gnes/encoder/text/bert.py
  7. +4 −4 gnes/indexer/base.py
  8. +5 −5 gnes/preprocessor/base.py
  9. +2 −2 tests/test_image_encoder.py
  10. +10 −10 tests/test_load_dump_pipeline.py
  11. +2 −2 tests/test_onnx_image_encoder.py
  12. +1 −1 tests/test_pipelinepreprocess.py
  13. +2 −2 tests/test_pretrain_encoder.py
  14. +2 −2 tests/test_yaml.py
  15. +2 −2 tests/yaml/base-elmo-nes.yml
  16. +2 −2 tests/yaml/base-encoder.yml
  17. +2 −2 tests/yaml/base-eu-nes.yml
  18. +2 −2 tests/yaml/base-flair-nes.yml
  19. +2 −2 tests/yaml/base-gpt-nes.yml
  20. +2 −2 tests/yaml/base-gpt2-nes.yml
  21. +1 −1 tests/yaml/base-indexer.yml
  22. +1 −1 tests/yaml/base-indexer2.yml
  23. +1 −1 tests/yaml/base-indexer4.yml
  24. +2 −2 tests/yaml/base-nes.yml
  25. +1 −1 tests/yaml/bert-binary-encoder.yml
  26. +1 −1 tests/yaml/dummy-pipeline.yml
  27. +1 −1 tests/yaml/elmo-binary-encoder.yml
  28. +1 −1 tests/yaml/flair-binary-encoder.yml
  29. +1 −1 tests/yaml/gpt-binary-encoder.yml
  30. +1 −1 tests/yaml/gpt2-binary-encoder.yml
  31. +1 −1 tests/yaml/hash-encoder.yml
  32. +1 −1 tests/yaml/lopq-encoder-2-np.yml
  33. +1 −1 tests/yaml/lopq-encoder-2-tf.yml
  34. +1 −1 tests/yaml/lopq-encoder-3.yml
  35. +1 −1 tests/yaml/lopq-encoder.yml
  36. +1 −1 tests/yaml/resize-image-prep.yml
  37. +1 −1 tutorials/component-yaml-spec.md
  38. +2 −2 yaml-example/component/encoder.bas-pca.yml
  39. +2 −2 yaml-example/component/encoder.bas.yml
  40. +1 −1 yaml-example/component/encoder.inception.yml
  41. +1 −1 yaml-example/component/encoder.resnet.yml
  42. +1 −1 yaml-example/component/encoder.vgg.yml
  43. +1 −1 yaml-example/component/encoder.w2v.yml
@@ -275,7 +275,7 @@ gnes_config:

```yaml
!PipelineEncoder
component:
components:
- !GPT2Encoder
parameter:
model_dir: $GPT2_CI_MODEL
@@ -22,7 +22,7 @@ Together they define the behavior of a GNES system. Roughly speaking,

```yaml
!PipelineEncoder
component:
components:
- !Word2VecEncoder
parameter:
model_dir: /ext_data/sgns.wiki.bigram-char.refine
@@ -40,7 +40,7 @@ One can also append extra component to this pipeline, e.g. adding quantization.

```yaml
!PipelineEncoder
component:
components:
- !Word2VecEncoder
parameter:
model_dir: /ext_data/sgns.wiki.bigram-char.refine
@@ -361,61 +361,61 @@ def _copy_from(self, x: 'TrainableBase') -> None:
class CompositionalTrainableBase(TrainableBase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._component = None # type: List[T]
self._components = None # type: List[T]

@property
def component(self) -> Union[List[T], Dict[str, T]]:
return self._component
def components(self) -> Union[List[T], Dict[str, T]]:
return self._components

@property
def is_pipeline(self):
return isinstance(self.component, list)
return isinstance(self.components, list)

@component.setter
def component(self, comps: Callable[[], Union[list, dict]]):
@components.setter
def components(self, comps: Callable[[], Union[list, dict]]):
if not callable(comps):
raise TypeError('component must be a callable function that returns '
raise TypeError('components must be a callable function that returns '
'a List[BaseEncoder]')
if not getattr(self, 'init_from_yaml', False):
self._component = comps()
self._components = comps()
else:
self.logger.info('component is omitted from construction, '
self.logger.info('components is omitted from construction, '
'as it is initialized from yaml config')

def close(self):
super().close()
# pipeline
if isinstance(self.component, list):
for be in self.component:
if isinstance(self.components, list):
for be in self.components:
be.close()
# no typology
elif isinstance(self.component, dict):
for be in self.component.values():
elif isinstance(self.components, dict):
for be in self.components.values():
be.close()
elif self.component is None:
elif self.components is None:
pass
else:
raise TypeError('component must be dict or list, received %s' % type(self.component))
raise TypeError('components must be dict or list, received %s' % type(self.components))

def _copy_from(self, x: T):
if isinstance(self.component, list):
for be1, be2 in zip(self.component, x.component):
if isinstance(self.components, list):
for be1, be2 in zip(self.components, x.components):
be1._copy_from(be2)
elif isinstance(self.component, dict):
for k, v in self.component.items():
v._copy_from(x.component[k])
elif isinstance(self.components, dict):
for k, v in self.components.items():
v._copy_from(x.components[k])
else:
raise TypeError('component must be dict or list, received %s' % type(self.component))
raise TypeError('components must be dict or list, received %s' % type(self.components))

@classmethod
def to_yaml(cls, representer, data):
tmp = super()._dump_instance_to_yaml(data)
tmp['component'] = data.component
tmp['components'] = data.components
return representer.represent_mapping('!' + cls.__name__, tmp)

@classmethod
def from_yaml(cls, constructor, node):
obj, data, from_dump = super()._get_instance_from_yaml(constructor, node)
if not from_dump and 'component' in data:
obj.component = lambda: data['component']
if not from_dump and 'components' in data:
obj.components = lambda: data['components']
return obj
@@ -126,7 +126,7 @@ def __init__(self, args):
else:
raise ValueError(comp)
else:
self.logger.error('yaml file defines an empty graph! no "component" field exists!')
self.logger.error('yaml file defines an empty graph! no "services" field exists!')

def check_fields(self, comp: Dict) -> bool:
if 'name' not in comp:
@@ -70,16 +70,16 @@ def encode(self, data: np.ndarray, *args, **kwargs) -> bytes:

class PipelineEncoder(CompositionalTrainableBase):
def encode(self, data: Any, *args, **kwargs) -> Any:
if not self.component:
if not self.components:
raise NotImplementedError
for be in self.component:
for be in self.components:
data = be.encode(data, *args, **kwargs)
return data

def train(self, data, *args, **kwargs):
if not self.component:
if not self.components:
raise NotImplementedError
for idx, be in enumerate(self.component):
for idx, be in enumerate(self.components):
be.train(data, *args, **kwargs)
if idx + 1 < len(self.component):
if idx + 1 < len(self.components):
data = be.encode(data, *args, **kwargs)
@@ -45,7 +45,7 @@ def close(self):

class BertEncoderWithServer(CompositionalTrainableBase):
def encode(self, text: List[str], *args, **kwargs) -> np.ndarray:
return self.component['bert_client'].encode(text, *args, **kwargs)
return self.components['bert_client'].encode(text, *args, **kwargs)


class BertEncoderServer(BaseTextEncoder):
@@ -72,11 +72,11 @@ def normalize_score(self, *args, **kwargs):
class JointIndexer(CompositionalTrainableBase):

@property
def component(self):
def components(self):
return self._component

@component.setter
def component(self, comps: Callable[[], Union[list, dict]]):
@components.setter
def components(self, comps: Callable[[], Union[list, dict]]):
if not callable(comps):
raise TypeError('component must be a callable function that returns '
'a List[BaseIndexer]')
@@ -88,7 +88,7 @@ def component(self, comps: Callable[[], Union[list, dict]]):

self._binary_indexer = None
self._doc_indexer = None
for c in self.component:
for c in self.components:
if isinstance(c, BaseVectorIndexer):
self._binary_indexer = c
elif isinstance(c, BaseTextIndexer):
@@ -43,17 +43,17 @@ def apply(self, doc: 'gnes_pb2.Document') -> None:

class PipelinePreprocessor(CompositionalTrainableBase):
def apply(self, doc: 'gnes_pb2.Document') -> None:
if not self.component:
if not self.components:
raise NotImplementedError
for be in self.component:
for be in self.components:
be.apply(doc)

def train(self, data, *args, **kwargs):
if not self.component:
if not self.components:
raise NotImplementedError
for idx, be in enumerate(self.component):
for idx, be in enumerate(self.components):
be.train(data, *args, **kwargs)
if idx + 1 < len(self.component):
if idx + 1 < len(self.components):
data = be.apply(data, *args, **kwargs)


@@ -21,8 +21,8 @@ def img_process_for_test(dirname):

test_img_all_preprocessor = []
pipline_prep1 = PipelinePreprocessor()
pipline_prep1.component = lambda: [BaseUnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE),
ResizeChunkPreprocessor()]
pipline_prep1.components = lambda: [BaseUnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE),
ResizeChunkPreprocessor()]
for preprocessor in [pipline_prep1,
VanillaSlidingPreprocessor()]:
test_img_copy = copy.deepcopy(test_img)
@@ -44,7 +44,7 @@ def test_name_warning(self):
d1.name = ''
d2.name = ''
d3 = PipelineEncoder()
d3.component = lambda: [d1, d2]
d3.components = lambda: [d1, d2]
d3.name = 'dummy-pipeline'
d3.work_dir = './'
d3.dump()
@@ -65,17 +65,17 @@ def test_dummytf(self):
self.assertTrue(d2.is_trained)

d3 = PipelineEncoder()
d3.component = lambda: [d1, d2]
d3.components = lambda: [d1, d2]
self.assertEqual(d3.encode(1), 3)
self.assertFalse(d3.is_trained)
self.assertTrue(d3.component[0].is_trained)
self.assertTrue(d3.component[1].is_trained)
self.assertTrue(d3.components[0].is_trained)
self.assertTrue(d3.components[1].is_trained)

d3.dump()
d31 = BaseEncoder.load(d3.dump_full_path)
self.assertFalse(d31.is_trained)
self.assertTrue(d31.component[0].is_trained)
self.assertTrue(d31.component[1].is_trained)
self.assertTrue(d31.components[0].is_trained)
self.assertTrue(d31.components[1].is_trained)

d3.work_dir = self.dirname
d3.name = 'dummy-pipeline'
@@ -85,13 +85,13 @@ def test_dummytf(self):

d4 = PipelineEncoder.load(d3.dump_full_path)
self.assertTrue(d4.is_trained)
self.assertTrue(d4.component[0].is_trained)
self.assertTrue(d4.component[1].is_trained)
self.assertTrue(d4.components[0].is_trained)
self.assertTrue(d4.components[1].is_trained)

d4 = PipelineEncoder.load_yaml(d3.yaml_full_path)
self.assertTrue(d4.is_trained)
self.assertTrue(d4.component[0].is_trained)
self.assertTrue(d4.component[1].is_trained)
self.assertTrue(d4.components[0].is_trained)
self.assertTrue(d4.components[1].is_trained)

self.assertEqual(d4.encode(4), 6)

@@ -20,8 +20,8 @@ def img_process_for_test(dirname):

test_img_all_preprocessor = []
pipline_prep1 = PipelinePreprocessor()
pipline_prep1.component = lambda: [BaseUnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE),
ResizeChunkPreprocessor()]
pipline_prep1.components = lambda: [BaseUnaryPreprocessor(doc_type=gnes_pb2.Document.IMAGE),
ResizeChunkPreprocessor()]
for preprocessor in [pipline_prep1,
VanillaSlidingPreprocessor()]:
test_img_copy = copy.deepcopy(test_img)
@@ -30,7 +30,7 @@ def tearDown(self):

def test_pipelinepreproces(self):
p3 = PipelinePreprocessor()
p3.component = lambda: [P1(), P2()]
p3.components = lambda: [P1(), P2()]
d = gnes_pb2.Document()
d.doc_id = 1
p3.apply(d)
@@ -44,13 +44,13 @@ def train(self, data, *args, **kwargs):
class _LOPQEncoder(PipelineEncoder):
def __init__(self):
super().__init__()
self.component = lambda: [_PCAEncoder(), _PQEncoder()]
self.components = lambda: [_PCAEncoder(), _PQEncoder()]


class _BertBinaryEncoder(PipelineEncoder):
def __init__(self):
super().__init__()
self.component = lambda: [_BertEncoder(), _LOPQEncoder()]
self.components = lambda: [_BertEncoder(), _LOPQEncoder()]


class TestDocument(unittest.TestCase):
@@ -36,8 +36,8 @@ class dummyPipeline(PipelineEncoder):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.component = lambda: [foo1(*args, **kwargs),
foo2(*args, **kwargs), ]
self.components = lambda: [foo1(*args, **kwargs),
foo2(*args, **kwargs), ]


class TestYaml(unittest.TestCase):
@@ -1,8 +1,8 @@
!GNES
component:
components:
encoder:
!PipelineEncoder
component:
components:
- !ElmoEncoder
parameter:
model_dir: $ELMO_CI_MODEL
@@ -1,7 +1,7 @@
!PipelineEncoder
component:
components:
- !BertEncoderWithServer
component:
components:
bert_server:
!BertEncoderServer
parameter:
@@ -1,8 +1,8 @@
!GNES
component:
components:
encoder:
!PipelineEncoder
component:
components:
- !BertEncoder
parameter:
kwargs:
@@ -1,8 +1,8 @@
!GNES
component:
components:
encoder:
!PipelineEncoder
component:
components:
- !FlairEncoder
parameter:
model_dir: $FLAIR_CI_MODEL
@@ -1,8 +1,8 @@
!GNES
component:
components:
encoder:
!PipelineEncoder
component:
components:
- !GPTEncoder
parameter:
model_dir: $GPT_CI_MODEL
@@ -1,8 +1,8 @@
!GNES
component:
components:
encoder:
!PipelineEncoder
component:
components:
- !GPT2Encoder
parameter:
model_dir: $GPT2_CI_MODEL
@@ -1,5 +1,5 @@
!JointIndexer
component:
components:
- !BIndexer
parameter:
num_bytes: 2
@@ -1,5 +1,5 @@
!JointIndexer
component:
components:
- !NumpyIndexer
parameter:
num_bytes: 2
@@ -1,5 +1,5 @@
!JointIndexer
component:
components:
- !NumpyIndexer
parameter:
num_bytes: 2

0 comments on commit 66d020b

Please sign in to comment.
You can’t perform that action at this time.