Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial hashing of func code w/ ast #1102

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions joblib/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import inspect
import sys
import weakref
import hashlib
import ast

from tokenize import open as open_py_source

Expand Down Expand Up @@ -620,10 +622,21 @@ def _get_output_identifiers(self, *args, **kwargs):
argument_hash = self._get_argument_hash(*args, **kwargs)
return func_id, argument_hash

@staticmethod
def _hash_ast(func_code):
_ = ast.parse(func_code)
_ = ast.dump(_)
_ = _.encode()
_ = hashlib.md5(_).hexdigest()
return _

def _hash_func(self):
"""Hash a function to key the online cache"""
func_code_h = hash(getattr(self.func, '__code__', None))
return id(self.func), hash(self.func), func_code_h
try:
func_code_h = self._hash_ast(get_func_code(self.func)[0])
except SyntaxError: # not parsable output from get_func_code
func_code_h = hash(getattr(self.func, '__code__', None))
return (func_code_h, )

def _write_func_code(self, func_code, first_line):
""" Write the function code and the filename to a file.
Expand Down Expand Up @@ -656,6 +669,23 @@ def _check_previous_func_code(self, stacklevel=2):
stacklevel is the depth a which this function is called, to
issue useful warnings to the user.
"""
curr_func_code, source_file, first_line = self.func_code_info
func_id = _build_func_identifier(self.func)
#if not self.store_backend.contains_path([func_id]): # doesnt work. .object_exists not available
try:
self.store_backend.get_cached_func_code([func_id])
except:
self._write_func_code(curr_func_code, first_line)
return False
stored_func_code = self.store_backend.get_cached_func_code([func_id])
if self._hash_ast(curr_func_code) == self._hash_ast(stored_func_code):
return True
else:
# i don't think this mutation should be in a method called 'check_something'
self._write_func_code(curr_func_code, first_line)
return False
# remove the below???

# First check if our function is in the in-memory store.
# Using the in-memory store not only makes things faster, but it
# also renders us robust to variations of the files when the
Expand Down