Skip to content

Commit

Permalink
fix: add delete for cached prop
Browse files Browse the repository at this point in the history
  • Loading branch information
cristianmtr committed Mar 10, 2021
1 parent 18b1db9 commit 37c0de8
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 10 deletions.
9 changes: 4 additions & 5 deletions jina/executors/indexers/keyvalue.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,13 @@ def update(
keys, values = self._filter_nonexistent_keys_values(
keys, values, self.query_handler.header.keys()
)
del self.query_handler
self.handler_mutex = False
if keys:
self._delete(keys)
self.add(keys, values)

def _delete(self, keys: Iterable[str]) -> None:
self.query_handler.close()
self.handler_mutex = False
for key in keys:
self.write_handler.header.write(
np.array(
Expand All @@ -229,9 +229,6 @@ def _delete(self, keys: Iterable[str]) -> None:
],
).tobytes()
)

if self.query_handler:
del self.query_handler.header[key]
self._size -= 1

def delete(self, keys: Iterable[str], *args, **kwargs) -> None:
Expand All @@ -242,6 +239,8 @@ def delete(self, keys: Iterable[str], *args, **kwargs) -> None:
:param kwargs: keyword arguments
"""
keys = self._filter_nonexistent_keys(keys, self.query_handler.header.keys())
del self.query_handler
self.handler_mutex = False
if keys:
self._delete(keys)

Expand Down
8 changes: 4 additions & 4 deletions jina/executors/indexers/vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def _post_clean_memmap(self, valid):
self.key_bytes = valid_key_bytes
self._size = len(valid)
self.valid_indices = valid
del self.__dict__['CACHED__int2ext_id']
del self.__dict__['CACHED__ext2int_id']
del self._int2ext_id
del self._ext2int_id

def _clean_memmap(self):
# clean up the underlying matrix of entries marked for deletion
Expand All @@ -105,7 +105,7 @@ def _clean_memmap(self):
self.handler_mutex = False
# force the raw_ndarray (underlying matrix) to re-read from disk
# (needed when there were writing ops to be flushed)
del self.__dict__['CACHED__raw_ndarray']
del self._raw_ndarray
filtered = self._raw_ndarray[self.valid_indices]
# we need an intermediary file
tmp_path = self.index_abspath + 'tmp'
Expand All @@ -123,7 +123,7 @@ def _clean_memmap(self):
os.remove(self.index_abspath)
os.rename(tmp_path, self.index_abspath)
# force it to re-read again from the new file
del self.__dict__['CACHED__raw_ndarray']
del self._raw_ndarray

def __getstate__(self):
# called on pickle save
Expand Down
7 changes: 7 additions & 0 deletions jina/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,13 @@ def __get__(self, obj, cls):
value = obj.__dict__[f'CACHED_{self.func.__name__}'] = self.func(obj)
return value

def __delete__(self, obj):
cached_value = obj.__dict__.get(f'CACHED_{self.func.__name__}', None)
if cached_value is not None:
if hasattr(cached_value, 'close'):
cached_value.close()
del obj.__dict__[f'CACHED_{self.func.__name__}']


def get_now_timestamp():
"""
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/executors/indexers/test_binary_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def test_binarypb_update_twice(test_metas, delete_on_dump):

# benchmark only
@pytest.mark.skipif(
'GITHUB_WORKFLOW' in os.environ, reason='skip the network test on github workflow'
'GITHUB_WORKFLOW' in os.environ, reason='skip the benchmark test on github workflow'
)
@pytest.mark.parametrize('delete_on_dump', [True, False])
def test_binarypb_benchmark(test_metas, delete_on_dump):
Expand Down

0 comments on commit 37c0de8

Please sign in to comment.