Skip to content

Commit

Permalink
ENH Add check_call_in_cache method to check cache without calling fun…
Browse files Browse the repository at this point in the history
…ction (#820)

Co-authored-by: cottrell <david@g18e.com>
Co-authored-by: Olivier Grisel <olivier.grisel@gmail.com>
Co-authored-by: Thomas Moreau <thomas.moreau.2010@gmail.com>
  • Loading branch information
4 people committed Aug 24, 2021
1 parent 0fa2cb9 commit 0426d6b
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 8 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Expand Up @@ -17,6 +17,9 @@ Development version
1.0.1
-----

- Add check_call_in_cache method to check cache without calling function.
https://github.com/joblib/joblib/pull/820

- dask: avoid redundant scattering of large arguments to make a more
efficient use of the network resources and avoid crashing dask with
"OSError: [Errno 55] No buffer space available"
Expand Down
2 changes: 1 addition & 1 deletion doc/memory.rst
Expand Up @@ -427,7 +427,7 @@ objects that, in addition of behaving like normal functions, expose
methods useful for cache exploration and management.

.. autoclass:: MemorizedFunc
:members: __init__, call, clear
:members: __init__, call, clear, check_call_in_cache


..
Expand Down
30 changes: 23 additions & 7 deletions joblib/memory.py
Expand Up @@ -19,7 +19,6 @@
import traceback
import warnings
import inspect
import sys
import weakref

from tokenize import open as open_py_source
Expand All @@ -33,7 +32,6 @@
from ._store_backends import StoreBackendBase, FileSystemStoreBackend



FIRST_LINE_TEXT = "# first line:"

# TODO: The following object should have a data store object as a sub
Expand Down Expand Up @@ -136,7 +134,6 @@ def _store_backend_factory(backend, location, verbose=0, backend_options=None):
"supported by joblib. Returning None instead.".format(
location.__class__.__name__), UserWarning)


return None


Expand Down Expand Up @@ -361,6 +358,12 @@ def clear(self, warn=True):
# Argument "warn" is for compatibility with MemorizedFunc.clear
pass

def call(self, *args, **kwargs):
return self.func(*args, **kwargs)

def check_call_in_cache(self, *args, **kwargs):
return False


###############################################################################
# class `MemorizedFunc`
Expand Down Expand Up @@ -606,6 +609,21 @@ def __getstate__(self):

return state

def check_call_in_cache(self, *args, **kwargs):
"""Check if function call is in the memory cache.
Does not call the function or do any work besides func inspection
and arg hashing.
Returns
-------
is_call_in_cache: bool
Whether or not the result of the function has been cached
for the input arguments that have been passed.
"""
func_id, args_id = self._get_output_identifiers(*args, **kwargs)
return self.store_backend.contains_item((func_id, args_id))

# ------------------------------------------------------------------------
# Private interface
# ------------------------------------------------------------------------
Expand Down Expand Up @@ -683,8 +701,8 @@ def _check_previous_func_code(self, stacklevel=2):
extract_first_line(
self.store_backend.get_cached_func_code([func_id]))
except (IOError, OSError): # some backend can also raise OSError
self._write_func_code(func_code, first_line)
return False
self._write_func_code(func_code, first_line)
return False
if old_func_code == func_code:
return True

Expand Down Expand Up @@ -821,8 +839,6 @@ def _persist_input(self, duration, args, kwargs, this_duration_limit=0.5):
% this_duration, stacklevel=5)
return metadata

# XXX: Need a method to check if results are available.

# ------------------------------------------------------------------------
# Private `object` interface
# ------------------------------------------------------------------------
Expand Down
13 changes: 13 additions & 0 deletions joblib/test/test_memory.py
Expand Up @@ -609,6 +609,19 @@ def test_persistence(tmpdir):
gp(1)


def test_check_call_in_cache(tmpdir):
for func in (MemorizedFunc(f, tmpdir.strpath),
Memory(location=tmpdir.strpath, verbose=0).cache(f)):
result = func.check_call_in_cache(2)
assert not result
assert isinstance(result, bool)
assert func(2) == 5
result = func.check_call_in_cache(2)
assert result
assert isinstance(result, bool)
func.clear()


def test_call_and_shelve(tmpdir):
# Test MemorizedFunc outputting a reference to cache.

Expand Down

0 comments on commit 0426d6b

Please sign in to comment.