Skip to content

Commit

Permalink
test: empty or corrupt docs (#1700)
Browse files Browse the repository at this point in the history
* test: empty or corrupt docs

* test: add only embedding

* test: add mime types

* test: add malformed and some refactor

* test: add test for using diff doc fields

* test: add test for wrong shape in search
  • Loading branch information
cristianmtr committed Jan 19, 2021
1 parent 5f557e5 commit 23f1ac8
Show file tree
Hide file tree
Showing 8 changed files with 469 additions and 5 deletions.
10 changes: 5 additions & 5 deletions jina/executors/indexers/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,13 @@ def update(self, keys: Sequence[int], values: Sequence[bytes], *args, **kwargs)
if getattr(keys, 'size', len(keys)):
# expects np array for computing shapes
keys = np.array(list(keys))
self._delete(keys)
self._delete(keys, keys_precomputed=True)
self.add(np.array(keys), np.array(values))

def _delete(self, keys):
def _delete(self, keys, keys_precomputed):
# could be empty
if keys_precomputed is not True:
keys = self._filter_nonexistent_keys(keys, self.ext2int_id.keys(), self.save_abspath)
# please do not use "if keys:", it wont work on both sequence and ndarray
if getattr(keys, 'size', len(keys)):
# expects np array for computing shapes
Expand All @@ -149,9 +151,7 @@ def _delete(self, keys):
self._size -= 1

def delete(self, keys: Sequence[int], *args, **kwargs) -> None:
if kwargs.get('keys_precomputed') is not True:
keys = self._filter_nonexistent_keys(keys, self.ext2int_id.keys(), self.save_abspath)
self._delete(keys)
self._delete(keys, keys_precomputed=False)

def get_query_handler(self) -> Optional['np.ndarray']:
"""Open a gzip file and load it as a numpy ndarray
Expand Down
Empty file.
9 changes: 9 additions & 0 deletions tests/integration/crud_corrupted_docs/flow-parallel.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
!Flow
pods:
vector:
uses: vector.yml
kv:
uses: kv.yml
needs: [gateway]
last:
needs: [kv, vector]
4 changes: 4 additions & 0 deletions tests/integration/crud_corrupted_docs/flow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
!Flow
pods:
indexer:
uses: index.yml
62 changes: 62 additions & 0 deletions tests/integration/crud_corrupted_docs/index.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
!CompoundIndexer
components:
- !NumpyIndexer
with:
index_filename: vec.gz
metric: cosine
metas:
name: vecidx
workspace: $JINA_CORRUPTED_DOCS_TEST_DIR
- !BinaryPbIndexer
with:
index_filename: doc.gz
metas:
name: docidx
workspace: $JINA_CORRUPTED_DOCS_TEST_DIR
metas:
name: chunk_indexer
workspace: $JINA_CORRUPTED_DOCS_TEST_DIR
requests:
on:
UpdateRequest:
- !VectorIndexDriver
with:
method: update
executor: vecidx
traversal_paths: [ 'r' ]
- !KVIndexDriver
with:
method: update
executor: docidx
traversal_paths: [ 'r' ]
DeleteRequest:
- !VectorIndexDriver
with:
method: delete
executor: vecidx
traversal_paths: [ 'r' ]
- !KVIndexDriver
with:
method: delete
executor: docidx
traversal_paths: [ 'r' ]
IndexRequest:
- !VectorIndexDriver
with:
executor: vecidx
traversal_paths: ['r']
- !KVIndexDriver
with:
executor: docidx
traversal_paths: ['r']
SearchRequest:
- !VectorSearchDriver
with:
executor: vecidx
top_k: $JINA_TOPK
traversal_paths: ['r']
- !KVSearchDriver
with:
executor: docidx
top_k: $JINA_TOPK
traversal_paths: ['m']
32 changes: 32 additions & 0 deletions tests/integration/crud_corrupted_docs/kv.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
!BinaryPbIndexer
with:
index_filename: doc.gz
metas:
name: docidx
workspace: $JINA_CORRUPTED_DOCS_TEST_DIR
requests:
on:
UpdateRequest:
- !KVIndexDriver
with:
method: update
executor: docidx
traversal_paths: [ 'r' ]
DeleteRequest:
- !KVIndexDriver
with:
method: delete
executor: docidx
traversal_paths: [ 'r' ]
IndexRequest:
- !KVIndexDriver
with:
executor: docidx
traversal_paths: ['r']
SearchRequest:
- !KVSearchDriver
with:
executor: docidx
top_k: $JINA_TOPK
traversal_paths: ['m']

0 comments on commit 23f1ac8

Please sign in to comment.