Merge pull request #21 from jaraco/feature/refresh-implementation

Refresh the implementation
jaraco · Dec 13, 2023 · 50b7fa7 · 50b7fa7
2 parents 084a5c4 + 9ed6464
commit 50b7fa7
Show file tree

Hide file tree

Showing 2 changed files with 158 additions and 109 deletions.
diff --git a/backports/functools_lru_cache.py b/backports/functools_lru_cache.py
@@ -26,6 +26,12 @@ def update_wrapper(
 
 
 class _HashedSeq(list):
+    """This class guarantees that hash() will be called no more than once
+    per element.  This is important because the lru_cache() will hash
+    the key multiple times on a cache miss.
+
+    """
+
     __slots__ = 'hashvalue'
 
     def __init__(self, tup, hash=hash):
@@ -41,45 +47,57 @@ def _make_key(
     kwds,
     typed,
     kwd_mark=(object(),),
-    fasttypes=set([int, str, frozenset, type(None)]),
-    sorted=sorted,
+    fasttypes={int, str},
     tuple=tuple,
     type=type,
     len=len,
 ):
-    'Make a cache key from optionally typed positional and keyword arguments'
+    """Make a cache key from optionally typed positional and keyword arguments
+
+    The key is constructed in a way that is flat as possible rather than
+    as a nested structure that would take more memory.
+
+    If there is only a single argument and its data type is known to cache
+    its hash value, then that argument is returned without a wrapper.  This
+    saves space and improves lookup speed.
+
+    """
+    # All of code below relies on kwds preserving the order input by the user.
+    # Formerly, we sorted() the kwds before looping.  The new way is *much*
+    # faster; however, it means that f(x=1, y=2) will now be treated as a
+    # distinct call from f(y=2, x=1) which will be cached separately.
     key = args
     if kwds:
-        sorted_items = sorted(kwds.items())
         key += kwd_mark
-        for item in sorted_items:
+        for item in kwds.items():
             key += item
     if typed:
         key += tuple(type(v) for v in args)
         if kwds:
-            key += tuple(type(v) for k, v in sorted_items)
+            key += tuple(type(v) for v in kwds.values())
     elif len(key) == 1 and type(key[0]) in fasttypes:
         return key[0]
     return _HashedSeq(key)
 
 
-def lru_cache(maxsize=100, typed=False):  # noqa: C901
+def lru_cache(maxsize=128, typed=False):
     """Least-recently-used cache decorator.
 
     If *maxsize* is set to None, the LRU features are disabled and the cache
     can grow without bound.
 
     If *typed* is True, arguments of different types will be cached separately.
-    For example, f(3.0) and f(3) will be treated as distinct calls with
-    distinct results.
+    For example, f(decimal.Decimal("3.0")) and f(3.0) will be treated as
+    distinct calls with distinct results. Some types such as str and int may
+    be cached separately even when typed is false.
 
     Arguments to the cached function must be hashable.
 
-    View the cache statistics named tuple (hits, misses, maxsize, currsize) with
-    f.cache_info().  Clear the cache and statistics with f.cache_clear().
+    View the cache statistics named tuple (hits, misses, maxsize, currsize)
+    with f.cache_info().  Clear the cache and statistics with f.cache_clear().
     Access the underlying function with f.__wrapped__.
 
-    See:  http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
+    See:  https://en.wikipedia.org/wiki/Cache_replacement_policies#Least_recently_used_(LRU)
 
     """
 
@@ -88,108 +106,138 @@ def lru_cache(maxsize=100, typed=False):  # noqa: C901
     # The internals of the lru_cache are encapsulated for thread safety and
     # to allow the implementation to change (including a possible C version).
 
+    if isinstance(maxsize, int):
+        # Negative maxsize is treated as 0
+        if maxsize < 0:
+            maxsize = 0
+    elif callable(maxsize) and isinstance(typed, bool):
+        # The user_function was passed in directly via the maxsize argument
+        user_function, maxsize = maxsize, 128
+        wrapper = _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo)
+        wrapper.cache_parameters = lambda: {'maxsize': maxsize, 'typed': typed}
+        return update_wrapper(wrapper, user_function)
+    elif maxsize is not None:
+        raise TypeError('Expected first argument to be an integer, a callable, or None')
+
     def decorating_function(user_function):
-        cache = dict()
-        stats = [0, 0]  # make statistics updateable non-locally
-        HITS, MISSES = 0, 1  # names for the stats fields
-        make_key = _make_key
-        cache_get = cache.get  # bound method to lookup key or return None
-        _len = len  # localize the global len() function
-        lock = RLock()  # because linkedlist updates aren't threadsafe
-        root = []  # root of the circular doubly linked list
-        root[:] = [root, root, None, None]  # initialize by pointing to self
-        nonlocal_root = [root]  # make updateable non-locally
-        PREV, NEXT, KEY, RESULT = 0, 1, 2, 3  # names for the link fields
-
-        if maxsize == 0:
-
-            def wrapper(*args, **kwds):
-                # no caching, just do a statistics update after a successful call
-                result = user_function(*args, **kwds)
-                stats[MISSES] += 1
-                return result
+        wrapper = _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo)
+        wrapper.cache_parameters = lambda: {'maxsize': maxsize, 'typed': typed}
+        return update_wrapper(wrapper, user_function)
 
-        elif maxsize is None:
+    return decorating_function
 
-            def wrapper(*args, **kwds):
-                # simple caching without ordering or size limit
-                key = make_key(args, kwds, typed)
-                result = cache_get(
-                    key, root
-                )  # root used here as a unique not-found sentinel
-                if result is not root:
-                    stats[HITS] += 1
-                    return result
-                result = user_function(*args, **kwds)
-                cache[key] = result
-                stats[MISSES] += 1
-                return result
 
-        else:
-
-            def wrapper(*args, **kwds):
-                # size limited caching that tracks accesses by recency
-                key = make_key(args, kwds, typed) if kwds or typed else args
-                with lock:
-                    link = cache_get(key)
-                    if link is not None:
-                        # record recent use of the key by moving it
-                        # to the front of the list
-                        (root,) = nonlocal_root
-                        link_prev, link_next, key, result = link
-                        link_prev[NEXT] = link_next
-                        link_next[PREV] = link_prev
-                        last = root[PREV]
-                        last[NEXT] = root[PREV] = link
-                        link[PREV] = last
-                        link[NEXT] = root
-                        stats[HITS] += 1
-                        return result
-                result = user_function(*args, **kwds)
-                with lock:
-                    (root,) = nonlocal_root
-                    if key in cache:
-                        # getting here means that this same key was added to the
-                        # cache while the lock was released.  since the link
-                        # update is already done, we need only return the
-                        # computed result and update the count of misses.
-                        pass
-                    elif _len(cache) >= maxsize:
-                        # use the old root to store the new key and result
-                        oldroot = root
-                        oldroot[KEY] = key
-                        oldroot[RESULT] = result
-                        # empty the oldest link and make it the new root
-                        root = nonlocal_root[0] = oldroot[NEXT]
-                        oldkey = root[KEY]
-                        root[KEY] = root[RESULT] = None
-                        # now update the cache dictionary for the new links
-                        del cache[oldkey]
-                        cache[key] = oldroot
-                    else:
-                        # put result in a new link at the front of the list
-                        last = root[PREV]
-                        link = [last, root, key, result]
-                        last[NEXT] = root[PREV] = cache[key] = link
-                    stats[MISSES] += 1
+def _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo):
+    # Constants shared by all lru cache instances:
+    sentinel = object()  # unique object used to signal cache misses
+    make_key = _make_key  # build a key from the function arguments
+    PREV, NEXT, KEY, RESULT = 0, 1, 2, 3  # names for the link fields
+
+    cache = {}
+    hits = misses = 0
+    full = False
+    cache_get = cache.get  # bound method to lookup a key or return None
+    cache_len = cache.__len__  # get cache size without calling len()
+    lock = RLock()  # because linkedlist updates aren't threadsafe
+    root = []  # root of the circular doubly linked list
+    root[:] = [root, root, None, None]  # initialize by pointing to self
+
+    if maxsize == 0:
+
+        def wrapper(*args, **kwds):
+            # No caching -- just a statistics update
+            nonlocal misses
+            misses += 1
+            result = user_function(*args, **kwds)
+            return result
+
+    elif maxsize is None:
+
+        def wrapper(*args, **kwds):
+            # Simple caching without ordering or size limit
+            nonlocal hits, misses
+            key = make_key(args, kwds, typed)
+            result = cache_get(key, sentinel)
+            if result is not sentinel:
+                hits += 1
                 return result
+            misses += 1
+            result = user_function(*args, **kwds)
+            cache[key] = result
+            return result
 
-        def cache_info():
-            """Report cache statistics"""
-            with lock:
-                return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
+    else:
 
-        def cache_clear():
-            """Clear the cache and cache statistics"""
+        def wrapper(*args, **kwds):
+            # Size limited caching that tracks accesses by recency
+            nonlocal root, hits, misses, full
+            key = make_key(args, kwds, typed)
             with lock:
-                cache.clear()
-                root = nonlocal_root[0]
-                root[:] = [root, root, None, None]
-                stats[:] = [0, 0]
-
-        wrapper.__wrapped__ = user_function
-        wrapper.cache_info = cache_info
-        wrapper.cache_clear = cache_clear
-        return update_wrapper(wrapper, user_function)
-
-    return decorating_function
+                link = cache_get(key)
+                if link is not None:
+                    # Move the link to the front of the circular queue
+                    link_prev, link_next, _key, result = link
+                    link_prev[NEXT] = link_next
+                    link_next[PREV] = link_prev
+                    last = root[PREV]
+                    last[NEXT] = root[PREV] = link
+                    link[PREV] = last
+                    link[NEXT] = root
+                    hits += 1
+                    return result
+                misses += 1
+            result = user_function(*args, **kwds)
+            with lock:
+                if key in cache:
+                    # Getting here means that this same key was added to the
+                    # cache while the lock was released.  Since the link
+                    # update is already done, we need only return the
+                    # computed result and update the count of misses.
+                    pass
+                elif full:
+                    # Use the old root to store the new key and result.
+                    oldroot = root
+                    oldroot[KEY] = key
+                    oldroot[RESULT] = result
+                    # Empty the oldest link and make it the new root.
+                    # Keep a reference to the old key and old result to
+                    # prevent their ref counts from going to zero during the
+                    # update. That will prevent potentially arbitrary object
+                    # clean-up code (i.e. __del__) from running while we're
+                    # still adjusting the links.
+                    root = oldroot[NEXT]
+                    oldkey = root[KEY]
+                    root[KEY] = root[RESULT] = None
+                    # Now update the cache dictionary.
+                    del cache[oldkey]
+                    # Save the potentially reentrant cache[key] assignment
+                    # for last, after the root and links have been put in
+                    # a consistent state.
+                    cache[key] = oldroot
+                else:
+                    # Put result in a new link at the front of the queue.
+                    last = root[PREV]
+                    link = [last, root, key, result]
+                    last[NEXT] = root[PREV] = cache[key] = link
+                    # Use the cache_len bound method instead of the len() function
+                    # which could potentially be wrapped in an lru_cache itself.
+                    full = cache_len() >= maxsize
+            return result
+
+    def cache_info():
+        """Report cache statistics"""
+        with lock:
+            return _CacheInfo(hits, misses, maxsize, cache_len())
+
+    def cache_clear():
+        """Clear the cache and cache statistics"""
+        nonlocal hits, misses, full
+        with lock:
+            cache.clear()
+            root[:] = [root, root, None, None]
+            hits = misses = 0
+            full = False
+
+    wrapper.cache_info = cache_info
+    wrapper.cache_clear = cache_clear
+    return wrapper
diff --git a/newsfragments/+7e6fa2bb.feature.rst b/newsfragments/+7e6fa2bb.feature.rst
@@ -0,0 +1 @@
+Refreshed implementation from CPython.