Skip to content
Permalink
Browse files

fix(ci): fix unit tests for modules

  • Loading branch information...
hanxiao committed Aug 21, 2019
1 parent 27dc34d commit 944b8c092650e63327854a32bcb6509d2163a4b1
@@ -278,7 +278,7 @@ Now let's see what the YAML config says. First impression, it is pretty intuitiv
<summary>Preprocessor config: text-prep.yml (click to expand...)</summary>

```yaml
!TextPreprocessor
!PunctSplitPreprocessor
parameters:
start_doc_id: 0
random_doc_id: True
@@ -33,7 +33,7 @@
'FFmpegVideoSegmentor': 'video.ffmpeg',
'ShotDetectPreprocessor': 'video.shotdetect',
'AudioVanilla': 'audio.audio_vanilla',
'BaseAudioPreprocessor': 'base'
'BaseAudioPreprocessor': 'base',
'RawChunkPreprocessor': 'base'
}

@@ -8,4 +8,3 @@ def __init__(self, bar: int, *args, **kwargs):
self.is_trained = True
self.bar = bar
self.logger.info('look at me, I override the original GNES faiss indexer')

@@ -8,4 +8,3 @@ def __init__(self, bar: int, *args, **kwargs):
self.is_trained = True
self.bar = bar
self.logger.info('look at me, I override the overrided faiss indexer!!!')

@@ -1,10 +1,8 @@
import os
import shutil
import unittest

import numpy as np

from gnes.helper import touch_dir
from gnes.indexer.vector.annoy import AnnoyIndexer


@@ -43,4 +43,4 @@ def test_video_preprocessor_service_realdata(self):
self.assertGreater(len(d.chunks), 0)
for _ in range(len(d.chunks)):
shape = blob2array(d.chunks[_].blob).shape
self.assertEqual(len(shape), 1)
self.assertEqual(len(shape), 1)
@@ -22,7 +22,7 @@ def setUp(self):
[3, 2, 1, 2]]).astype(np.uint8)

self.toy_exp = [[(234, 0, 1., 1,), (123, 1, 1., 1)], [(432, 0, 1., 1), (1, 0, 1., 1)],
[(234, 0, 1., 0.75), (123, 1, 1., 0.75)]]
[(234, 0, 1., 0.75), (123, 1, 1., 0.75)]]
self.weights = [1.] * len(self.toy_label)

dirname = os.path.dirname(__file__)
@@ -48,5 +48,3 @@ def test_query_docs(self):
res = self.db.query(query_list)
num_non_empty = sum(1 for d in res if d)
self.assertEqual(num_non_empty, 1)


@@ -1,10 +1,10 @@
import copy
import os
import unittest

from gnes.preprocessor.base import BasePreprocessor
from gnes.preprocessor.video.ffmpeg import FFmpegVideoSegmentor
from gnes.proto import gnes_pb2
import copy


class TestPartition(unittest.TestCase):
@@ -39,4 +39,4 @@ def test_dump_load(self):

def tearDown(self):
if os.path.exists(self.dump_path):
os.remove(self.dump_path)
os.remove(self.dump_path)
@@ -31,12 +31,12 @@ def test_train_pred(self):

out = m.encode(self.test_data)
self.assertEqual(self.x, out.shape[0])
self.assertEqual(self.num_idx+self.num_bytes, out.shape[1])
self.assertEqual(self.num_idx + self.num_bytes, out.shape[1])
self.assertEqual(np.uint32, out.dtype)

def test_yaml_load(self):
pca_hash = PipelineEncoder.load_yaml(self.hash_yaml)
pca_hash.train(self.test_data)
out = pca_hash.encode(self.test_data)
self.assertEqual(self.x, out.shape[0])
self.assertEqual(self.num_idx+self.num_bytes, out.shape[1])
self.assertEqual(self.num_idx + self.num_bytes, out.shape[1])
@@ -1,8 +1,10 @@
import os
import unittest

import numpy as np

from gnes.indexer.vector.hbindexer import HBIndexer
import shutil


class TestMHIndexer(unittest.TestCase):

@@ -13,7 +15,7 @@ def setUp(self):
self.n = 100

self.test_label = [(_, 1) for _ in range(self.n)]
t = np.random.randint(0, 100, size=[self.n, self.n_idx+self.num_bytes])
t = np.random.randint(0, 100, size=[self.n, self.n_idx + self.num_bytes])
self.test_data = t.astype(np.uint32)
self.weights = [1.] * len(self.test_label)
self.data_path = 'test_path'
@@ -3,7 +3,7 @@
import unittest
import zipfile

from gnes.encoder.image.base import BasePytorchEncoder
from gnes.encoder.base import BaseEncoder
from gnes.preprocessor.base import UnaryPreprocessor, PipelinePreprocessor
from gnes.preprocessor.image.resize import ResizeChunkPreprocessor
from gnes.preprocessor.image.sliding_window import VanillaSlidingPreprocessor
@@ -45,43 +45,43 @@ def setUp(self):
self.mobilenet_yaml = os.path.join(dirname, 'yaml', 'mobilenet-encoder.yml')

def test_vgg_encoding(self):
self.encoder = BasePytorchEncoder.load_yaml(self.vgg_yaml)
self.encoder = BaseEncoder.load_yaml(self.vgg_yaml)
for test_img in self.test_img:
vec = self.encoder.encode(test_img)
print("the length of data now is:", len(test_img))
self.assertEqual(vec.shape[0], len(test_img))
self.assertEqual(vec.shape[1], 4096)

def test_resnet_encoding(self):
self.encoder = BasePytorchEncoder.load_yaml(self.res_yaml)
self.encoder = BaseEncoder.load_yaml(self.res_yaml)
for test_img in self.test_img:
vec = self.encoder.encode(test_img)
print("the length of data now is:", len(test_img))
self.assertEqual(vec.shape[0], len(test_img))
self.assertEqual(vec.shape[1], 2048)

def test_inception_encoding(self):
self.encoder = BasePytorchEncoder.load_yaml(self.inception_yaml)
self.encoder = BaseEncoder.load_yaml(self.inception_yaml)
for test_img in self.test_img:
vec = self.encoder.encode(test_img)
print("the length of data now is:", len(test_img))
self.assertEqual(vec.shape[0], len(test_img))
self.assertEqual(vec.shape[1], 2048)

def test_mobilenet_encoding(self):
self.encoder = BasePytorchEncoder.load_yaml(self.mobilenet_yaml)
self.encoder = BaseEncoder.load_yaml(self.mobilenet_yaml)
for test_img in self.test_img:
vec = self.encoder.encode(test_img)
print("the length of data now is:", len(test_img))
self.assertEqual(vec.shape[0], len(test_img))
self.assertEqual(vec.shape[1], 1280)

def test_dump_load(self):
self.encoder = BasePytorchEncoder.load_yaml(self.vgg_yaml)
self.encoder = BaseEncoder.load_yaml(self.vgg_yaml)

self.encoder.dump(self.dump_path)

vgg_encoder2 = BasePytorchEncoder.load(self.dump_path)
vgg_encoder2 = BaseEncoder.load(self.dump_path)

for test_img in self.test_img:
vec = vgg_encoder2.encode(test_img)
@@ -31,4 +31,4 @@ def test_mfcc_encoding(self):
vec = self.encoder.encode(self.audios)
self.assertEqual(len(vec.shape), 2)
self.assertEqual(vec.shape[0], len(self.audios))
self.assertEqual(vec.shape[1] % self.encoder.n_mfcc, 0)
self.assertEqual(vec.shape[1] % self.encoder.n_mfcc, 0)
@@ -9,6 +9,7 @@
from gnes.preprocessor.image.sliding_window import VanillaSlidingPreprocessor
from gnes.proto import gnes_pb2, blob2array


def img_process_for_test(dirname):
zipfile_ = zipfile.ZipFile(os.path.join(dirname, 'imgs/test.zip'))
all_bytes = [zipfile_.open(v).read() for v in zipfile_.namelist()]
@@ -31,6 +32,7 @@ def img_process_for_test(dirname):
for img in test_img_copy for chunk in img.chunks])
return test_img_all_preprocessor


class TestONNXImageEncoder(unittest.TestCase):

def setUp(self):
@@ -3,6 +3,7 @@

from gnes.encoder.text.torch_transformers import TorchTransformersEncoder


class TestTorchTransformersEncoder(unittest.TestCase):

def setUp(self):
@@ -17,7 +17,7 @@ def setUp(self):
self.publish_router_yaml = '!PublishRouter {parameters: {num_part: 2}}'
self.batch_router_yaml = '!DocBatchRouter {gnes_config: {batch_size: 2}}'
self.reduce_router_yaml = 'BaseReduceRouter'
self.chunk_router_yaml = 'ChunkToDocumentRouter'
self.chunk_router_yaml = 'ChunkToDocRouter'
self.chunk_sum_yaml = 'ChunkSumRouter'
self.doc_router_yaml = 'DocFillRouter'
self.doc_sum_yaml = 'DocSumRouter'
@@ -65,7 +65,7 @@ def test_video_cut_by_frame(self):
r = client.recv_message()
for d in r.request.index.docs:
self.assertGreater(len(d.chunks), 0)
for _ in range(len(d.chunks)-1):
for _ in range(len(d.chunks) - 1):
shape = blob2array(d.chunks[_].blob).shape
self.assertEqual(shape, (30, 168, 192, 3))
shape = blob2array(d.chunks[-1].blob).shape
@@ -1,4 +1,4 @@
!TextPreprocessor
!PunctSplitPreprocessor
parameters:
start_doc_id: 0
random_doc_id: True
@@ -1 +1 @@
!ChunkToDocumentRouter {}
!ChunkToDocRouter {}
@@ -1,4 +1,4 @@
!TextPreprocessor
!PunctSplitPreprocessor
parameters:
start_doc_id: 0
random_doc_id: True
@@ -65,10 +65,9 @@ In this example, we define a `BasePytorchEncoder` that loads a pretrained VGG16
|`!CLS`| Component Type |
|---|---|
|`!BasePreprocessor`|Preprocessor|
|`!TextPreprocessor`|Preprocessor|
|`!PunctSplitPreprocessor`|Preprocessor|
|`!BaseImagePreprocessor`|Preprocessor|
|`!BaseTextPreprocessor`|Preprocessor|
|`!BaseSlidingPreprocessor`|Preprocessor|
|`!VanillaSlidingPreprocessor`|Preprocessor|
|`!WeightedSlidingPreprocessor`|Preprocessor|
|`!SegmentPreprocessor`|Preprocessor|
@@ -110,7 +109,7 @@ In this example, we define a `BasePytorchEncoder` that loads a pretrained VGG16
|`!BaseRouter`|Router|
|`!BaseMapRouter`|Router|
|`!BaseReduceRouter`|Router|
|`!ChunkToDocumentRouter`|Router|
|`!ChunkToDocRouter`|Router|
|`!DocFillRouter`|Router|
|`!ConcatEmbedRouter`|Router|
|`!PublishRouter`|Router|
@@ -216,7 +215,7 @@ Note that how we defines a map under `kwargs` to describe the arguments, they wi
The examples above are all about encoder. In fact, every component including encoder, preprocessor, router, indexer can all be described with YAML and loaded to GNES. For example,
```yaml
!TextPreprocessor
!PunctSplitPreprocessor
parameters:
start_doc_id: 0
random_doc_id: True
@@ -1,4 +1,4 @@
!TextPreprocessor
!PunctSplitPreprocessor
parameters:
start_doc_id: 0
random_doc_id: True

0 comments on commit 944b8c0

Please sign in to comment.
You can’t perform that action at this time.