Skip to content

Commit

Permalink
fix: benchmark dump test (#2307)
Browse files Browse the repository at this point in the history
  • Loading branch information
cristianmtr committed Apr 14, 2021
1 parent 042691d commit 03b6f21
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 14 deletions.
4 changes: 3 additions & 1 deletion jina/types/message/__init__.py
Expand Up @@ -126,7 +126,9 @@ def is_data_request(self) -> bool:
:return: boolean which states if data is requested
"""
return self.envelope.request_type != 'ControlRequest' or self.request.propagate
return (
self.envelope.request_type != 'ControlRequest' or self.request.propagate
) and self.envelope.request_type != 'DumpRequest'

def _add_envelope(
self,
Expand Down
28 changes: 15 additions & 13 deletions tests/integration/dump/test_dump_dbms.py
Expand Up @@ -74,13 +74,17 @@ def assert_dump_data(dump_path, docs, shards, pea_id):
)

# assert with Indexers
# noinspection PyTypeChecker
# TODO currently metas are only passed to the parent Compound, not to the inner components
cp: CompoundQueryExecutor = BaseQueryIndexer.load_config(
'indexer_query.yml',
pea_id=pea_id,
metas={'workspace': os.path.join(dump_path, 'new_ws'), 'dump_path': dump_path},
)
with TimeContext(f'### reloading {len(docs_expected)}'):
# noinspection PyTypeChecker
cp: CompoundQueryExecutor = BaseQueryIndexer.load_config(
'indexer_query.yml',
pea_id=pea_id,
metas={
'workspace': os.path.join(dump_path, 'new_ws'),
'dump_path': dump_path,
},
)
for c in cp.components:
assert c.size == len(docs_expected)

Expand All @@ -107,7 +111,7 @@ def path_size(dump_path):
@pytest.mark.parametrize('shards', [6, 3, 1])
@pytest.mark.parametrize('nr_docs', [7])
@pytest.mark.parametrize('emb_size', [10])
def test_dump_keyvalue(tmpdir, shards, nr_docs, emb_size, benchmark=False):
def test_dump_keyvalue(tmpdir, shards, nr_docs, emb_size, run_basic=False):
docs = list(get_documents(nr=nr_docs, index_start=0, emb_size=emb_size))
assert len(docs) == nr_docs
nr_search = 1
Expand All @@ -132,7 +136,7 @@ def _validate_results_nonempty(resp):
def error_callback(resp):
raise Exception('error callback called')

if benchmark:
if run_basic:
basic_benchmark(
tmpdir, docs, _validate_results_nonempty, error_callback, nr_search
)
Expand All @@ -144,8 +148,7 @@ def error_callback(resp):
flow_dbms.index(docs)

with TimeContext(f'### dumping {len(docs)} docs'):
# TODO move to control request approach
flow_dbms.dump('indexer_dbms', dump_path, shards=shards, timeout=120)
flow_dbms.dump('indexer_dbms', dump_path, shards=shards, timeout=-1)

dir_size = path_size(dump_path)
print(f'### dump path size: {dir_size} MBs')
Expand All @@ -160,8 +163,7 @@ def error_callback(resp):
'GITHUB_WORKFLOW' in os.environ, reason='skip the benchmark test on github workflow'
)
def test_benchmark(tmpdir):
# TODO 10000 seems to break the test
nr_docs = 8000
nr_docs = 100000
return test_dump_keyvalue(
tmpdir, shards=1, nr_docs=nr_docs, emb_size=128, benchmark=True
tmpdir, shards=1, nr_docs=nr_docs, emb_size=128, run_basic=True
)

0 comments on commit 03b6f21

Please sign in to comment.