From 137cc452e2d273e9bd7caa2b7c4ae9a752c89163 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Wed, 10 Mar 2021 18:41:59 +0300 Subject: [PATCH] WIP --- aiomisc/cache/base.py | 78 ++++----------- aiomisc/cache/lfu.py | 100 +++++++++++++++---- aiomisc/cache/lru.py | 226 +++++++++++++++++++++++++++++++++++++++--- requirements.dev.txt | 1 - setup.py | 1 - 5 files changed, 307 insertions(+), 99 deletions(-) diff --git a/aiomisc/cache/base.py b/aiomisc/cache/base.py index f9f3c985..bccd0741 100644 --- a/aiomisc/cache/base.py +++ b/aiomisc/cache/base.py @@ -1,73 +1,29 @@ -import asyncio -import threading from abc import ABC, abstractmethod -from typing import Any, Union, Hashable, Dict, Optional - -from llist import dllist, dllistnode +from typing import Any, Hashable class CacheBase(ABC): - def __init__(self, max_size: int = 0): - self._max_size: int = max_size - self._loop: Optional[asyncio.AbstractEventLoop] = None - self.usages: dllist = dllist() - self.cache: Dict[Hashable, Any] = dict() - self.lock = threading.RLock() - - @property - def is_overflow(self) -> bool: - if self._max_size == 0: - return False - - if self._max_size < len(self.usages): - return True + __slots__ = "max_size", - return False - - @property - def loop(self) -> asyncio.AbstractEventLoop: - if self._loop is None: - self._loop = asyncio.get_event_loop() - return self._loop + def __init__(self, max_size: int = 0): + self.max_size = max_size @abstractmethod - def _on_set(self, node: dllistnode) -> None: - pass - - def _on_expires(self, node: dllistnode) -> None: - pass + def get(self, key: Hashable): + raise NotImplementedError @abstractmethod - def _on_get(self, node: dllistnode) -> Any: - pass - - def __contains__(self, item: Hashable) -> bool: - return item in self.cache + def remove(self, key: Hashable): + raise NotImplementedError - def get(self, item: Hashable) -> Any: - with self.lock: - node: dllistnode = self.cache[item] - self.loop.call_soon(self._on_get, node) - return node.value[1] - - def expire(self, node: dllistnode): - with self.lock: - item, value = node.value - node: Optional[dllistnode] = self.cache.pop(item, None) - - if node is None: - return - - self.loop.call_soon(self._on_expires, node) - self.usages.remove(node) - - def set(self, item: Hashable, value: Any, - expiration: Union[int, float] = None) -> None: - with self.lock: - node: dllistnode = self.usages.append((item, value)) - self.cache[item] = node + @abstractmethod + def set(self, key: Hashable, value: Any): + raise NotImplementedError - self.loop.call_soon(self._on_set, node) + @abstractmethod + def __contains__(self, key: Hashable): + raise NotImplementedError - if expiration is not None: - self.loop.call_later(expiration, self.expire, node) + @abstractmethod + def __len__(self): + raise NotImplementedError diff --git a/aiomisc/cache/lfu.py b/aiomisc/cache/lfu.py index b0c2d4fd..e4580399 100644 --- a/aiomisc/cache/lfu.py +++ b/aiomisc/cache/lfu.py @@ -1,10 +1,11 @@ -from dataclasses import dataclass from threading import RLock -from typing import Any, Hashable, Optional, Set +from typing import Any, Hashable, Optional, Dict, Set +from aiomisc.cache.base import CacheBase +# noinspection PyShadowingBuiltins class Node: - __slots__ = ('prev', 'next', 'items') + __slots__ = 'prev', 'next', 'items' def __init__(self, prev: "Node" = None, next: "Node" = None, items: Optional[Set["Item"]] = None): @@ -13,14 +14,16 @@ def __init__(self, prev: "Node" = None, next: "Node" = None, self.items = items or set() -@dataclass(frozen=True) class Item: - node: Node - key: Hashable - value: Any + __slots__ = 'node', 'key', 'value' + def __init__(self, node: Node, key: Hashable, value: Any): + self.node: Node = node + self.key: Hashable = key + self.value: Any = value -class LFUCache: + +class LFUCache(CacheBase): """ LFU cache implementation @@ -29,20 +32,37 @@ class LFUCache: >>> assert "foo" in lfu >>> lfu.get('foo') 'bar' + + >>> lfu.remove('foo') + >>> assert "foo" not in lfu + >>> lfu.get("foo") + Traceback (most recent call last): + ... + KeyError: 'foo' + >>> lfu.remove("foo") + >>> lfu.set("bar", "foo") >>> lfu.set("spam", "egg") - + >>> lfu.set("foo", "bar") + >>> lfu.get("foo") + 'bar' + >>> lfu.get("spam") + 'egg' + >>> assert len(lfu) == 3 + >>> lfu.set("egg", "spam") + >>> assert len(lfu) == 3 """ - def __init__(self, max_size: int = 0): - self.cache = dict() - self.usages: Node = Node(prev=None, next=None, items=set()) - self.lock = RLock() - self.size = 0 - self.max_size = max_size + __slots__ = "cache", "usages", "lock" + + def __init__(self, max_size: int): + super().__init__(max_size) + self.cache: Dict[Hashable, Item] = dict() + self.usages: Node = Node(prev=None, next=None) + self.lock: RLock = RLock() def _create_node(self) -> Node: - node = Node(prev=self.usages, next=None, items=set()) + node = Node(prev=self.usages, next=None) self.usages.next = node return node @@ -55,11 +75,7 @@ def _update_usage(self, item: Item): new_node = self._create_node() old_node.items.remove(item) - item = Item( - node=new_node, - key=item.key, - value=item.value, - ) + item.node = new_node new_node.items.add(item) self.cache[item.key] = item @@ -69,11 +85,29 @@ def _update_usage(self, item: Item): self.usages = new_node self.usages.prev = None + def _remove_item(self, item: Item): + with self.lock: + if item.key in self.cache: + self.cache.pop(item.key, None) + + if item in item.node.items: + item.node.items.remove(item) + + item.node = None + def get(self, key: Hashable): item: Item = self.cache[key] self._update_usage(item) return item.value + def remove(self, key: Hashable): + with self.lock: + item: Optional[Item] = self.cache.pop(key, None) + if item is None: + return + + self._remove_item(item) + def set(self, key: Hashable, value: Any): with self.lock: node: Optional[Node] = self.usages @@ -85,8 +119,30 @@ def set(self, key: Hashable, value: Any): node.items.add(item) self.cache[key] = item - def __contains__(self, key) -> Any: + if self._is_overflow(): + self._on_overflow() + + def _on_overflow(self): + with self.lock: + while self._is_overflow(): + if not self.usages.items: + if self.usages.next is not None: + self.usages.next.prev = None + self.usages = self.usages.next + else: + self.usages = Node(prev=None, next=None) + + item = self.usages.items.pop() + self._remove_item(item) + + def _is_overflow(self) -> bool: + return len(self.cache) > self.max_size + + def __contains__(self, key: Hashable) -> Any: if key in self.cache: self._update_usage(self.cache[key]) return True return False + + def __len__(self): + return len(self.cache) diff --git a/aiomisc/cache/lru.py b/aiomisc/cache/lru.py index b8669ee0..17b4558c 100644 --- a/aiomisc/cache/lru.py +++ b/aiomisc/cache/lru.py @@ -1,24 +1,222 @@ -from typing import Any +from threading import RLock +from typing import Any, Hashable, Dict, Optional +from aiomisc.cache.base import CacheBase -from llist import dllistnode -from .base import CacheBase +# noinspection PyShadowingBuiltins +class Node: + __slots__ = 'prev', 'next', 'item', '_restricted' + + def __init__(self, prev: "Node" = None, next: "Node" = None, + item: Optional["Item"] = None): + self.prev = prev + self.next = next + self.item = item + + +class Item: + __slots__ = 'node', 'key', 'value', '_restricted' + + def __init__(self, node: Node, key: Hashable, value: Any): + self.node: Node = node + self.key: Hashable = key + self.value: Any = value class LRUCache(CacheBase): - def _on_set(self, node: dllistnode) -> None: - if not self.is_overflow: + """ + LRU cache implementation + + >>> lfu = LRUCache(3) + >>> lfu.set("foo", "bar") + >>> assert "foo" in lfu + >>> lfu.get('foo') + 'bar' + + >>> lfu.remove('foo') + >>> assert "foo" not in lfu + >>> lfu.get("foo") + Traceback (most recent call last): + ... + KeyError: 'foo' + >>> lfu.remove("foo") + + >>> lfu.set("bar", "foo") + >>> lfu.set("spam", "egg") + >>> lfu.set("foo", "bar") + >>> lfu.get("foo") + 'bar' + >>> lfu.get("spam") + 'egg' + >>> assert len(lfu) == 3 + >>> lfu.set("egg", "spam") + >>> assert len(lfu) == 3, str(len(lfu)) + " is not 3" + """ + + __slots__ = 'lock', 'cache', 'last', 'first' + + def __init__(self, max_size: int): + super().__init__(max_size) + self.cache: Dict[Hashable, Item] = dict() + self.first: Optional[Node] = None + self.last: Optional[Node] = None + self.lock: RLock = RLock() + + @staticmethod + def _node_remove(node: Node): + if node.next is None: return - self._on_overflow() - def _on_get(self, node: dllistnode) -> Any: - with self.lock: - self.usages.remove(node) - self.usages.appendright(node) + node.prev.next = node.next + node.prev = None + node.next = None + + @staticmethod + def _node_append_left(parent: Node, node: Node): + """ + Appends node before the parent node + + before: + + ... <-> [parent] <-> ... + + after: + + ... <-> [node] <-> [parent] <-> ... + + """ + node.next = parent + + if parent.prev is not None: + node.prev = parent.prev + + parent.prev = node + + @staticmethod + def _node_append_right(parent: Node, node: Node): + """ + Appends node after parent node + + before: + + ... <-> [parent] <-> ... + + after: + + ... <-> [parent] <-> [node] <-> ... + + """ + node.prev = parent + + if parent.next is not None: + node.next = parent.next + + parent.next = node + + @staticmethod + def _node_swap(a: Node, b: Node): + """ + Swaps two Nodes and change neighbor links + + Example: doubly linked list looks like: + + [x] <-> [a] <-> [z] <-> [b] <-> [y] + node x node a node z node b node y + p n p n p n p n p n + ---------------------------------------------- + - a x z a b z y b - + + After swap should looks like: + + [x] <-> [b] <-> [z] <-> [a] <-> [y] + node x node b node z node a node y + p n p n p n p n p n + ---------------------------------------------- + - b x z b a z y a - + + That's means we should make 8 changes + + # 4 for "a" and "b" + a.prev, a.next, b.prev, b.next = b.prev, b.next, a.prev, a.next + + # 4 for neighbors + x.next, z.prev, z.next, y.prev = b, b, a, a + + After general case is should be: + + a.prev.next, a.next.prev, b.prev.next, b.next.prev = b, b, a, a + a.prev, a.next, b.prev, b.next = b.prev, b.next, a.prev, a.next + + """ + # store original links + a_prev, a_next, b_prev, b_next = a.prev, a.next, b.prev, b.next + + if a_prev is not None: + a_prev.next = b + a.prev = b_prev + + if a_next is not None: + a_next.prev = b + a.next = b_next + + if b_next is not None: + b_next.prev = a + b.next = b_next + + if b_prev is not None: + b_prev.next = a + b.prev = a_prev def _on_overflow(self): with self.lock: - while self.is_overflow: - node: dllistnode = self.usages.popleft() - item, value = node.value - self.cache.pop(item) + while self._is_overflow(): + node = self.first + if self.first is None: + return + + self.first = self.first.next + self.remove(node.item.key) + + if self.first is None: + self.first = Node(prev=None, next=None) + + def _is_overflow(self) -> bool: + return len(self.cache) > self.max_size + + def get(self, key: Hashable): + item = self.cache[key] + + with self.lock: + self._node_swap(item.node, self.last) + + return item.value + + def remove(self, key: Hashable): + with self.lock: + item: Optional[Item] = self.cache.pop(key, None) + if item is None: + return + self._node_remove(item.node) + + def set(self, key: Hashable, value: Any): + with self.lock: + node = Node(prev=self.last) + item = Item(node=node, key=key, value=value) + node.item = item + self.cache[key] = item + + if self.last is None and self.first is None: + self.last = self.first = node + return + + self._node_append_right(self.last, node) + self.last = node + + if self._is_overflow(): + self._on_overflow() + + def __contains__(self, key: Hashable): + return key in self.cache + + def __len__(self): + return len(self.cache) diff --git a/requirements.dev.txt b/requirements.dev.txt index df0a41a9..21896883 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -7,7 +7,6 @@ coveralls croniter~=0.3.34 fastapi freezegun<1.1 -llist==0.6 mypy~=0.782 pylava pytest diff --git a/setup.py b/setup.py index 87ed3790..e631235a 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,6 @@ def load_requirements(fname): 'raven': ['raven-aiohttp'], 'uvloop': ['uvloop>=0.14,<1'], 'cron': ['croniter~=0.3.34'], - 'lfu': ['llist==0.6'], ':python_version < "3.7"': 'async-generator', }, entry_points={