From 34c82225cf4ea7b4d95f96f60681956a4452885a Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Tue, 9 Mar 2021 00:45:40 +0300 Subject: [PATCH 1/5] WIP --- aiomisc/cache/__init__.py | 1 + aiomisc/cache/base.py | 73 ++++++++++++++++++++++++++++++++++++ aiomisc/cache/lfu.py | 79 +++++++++++++++++++++++++++++++++++++++ aiomisc/cache/lru.py | 24 ++++++++++++ requirements.dev.txt | 5 ++- setup.py | 1 + 6 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 aiomisc/cache/__init__.py create mode 100644 aiomisc/cache/base.py create mode 100644 aiomisc/cache/lfu.py create mode 100644 aiomisc/cache/lru.py diff --git a/aiomisc/cache/__init__.py b/aiomisc/cache/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/aiomisc/cache/__init__.py @@ -0,0 +1 @@ + diff --git a/aiomisc/cache/base.py b/aiomisc/cache/base.py new file mode 100644 index 00000000..f9f3c985 --- /dev/null +++ b/aiomisc/cache/base.py @@ -0,0 +1,73 @@ +import asyncio +import threading +from abc import ABC, abstractmethod +from typing import Any, Union, Hashable, Dict, Optional + +from llist import dllist, dllistnode + + +class CacheBase(ABC): + def __init__(self, max_size: int = 0): + self._max_size: int = max_size + self._loop: Optional[asyncio.AbstractEventLoop] = None + self.usages: dllist = dllist() + self.cache: Dict[Hashable, Any] = dict() + self.lock = threading.RLock() + + @property + def is_overflow(self) -> bool: + if self._max_size == 0: + return False + + if self._max_size < len(self.usages): + return True + + return False + + @property + def loop(self) -> asyncio.AbstractEventLoop: + if self._loop is None: + self._loop = asyncio.get_event_loop() + return self._loop + + @abstractmethod + def _on_set(self, node: dllistnode) -> None: + pass + + def _on_expires(self, node: dllistnode) -> None: + pass + + @abstractmethod + def _on_get(self, node: dllistnode) -> Any: + pass + + def __contains__(self, item: Hashable) -> bool: + return item in self.cache + + def get(self, item: Hashable) -> Any: + with self.lock: + node: dllistnode = self.cache[item] + self.loop.call_soon(self._on_get, node) + return node.value[1] + + def expire(self, node: dllistnode): + with self.lock: + item, value = node.value + node: Optional[dllistnode] = self.cache.pop(item, None) + + if node is None: + return + + self.loop.call_soon(self._on_expires, node) + self.usages.remove(node) + + def set(self, item: Hashable, value: Any, + expiration: Union[int, float] = None) -> None: + with self.lock: + node: dllistnode = self.usages.append((item, value)) + self.cache[item] = node + + self.loop.call_soon(self._on_set, node) + + if expiration is not None: + self.loop.call_later(expiration, self.expire, node) diff --git a/aiomisc/cache/lfu.py b/aiomisc/cache/lfu.py new file mode 100644 index 00000000..a953dd03 --- /dev/null +++ b/aiomisc/cache/lfu.py @@ -0,0 +1,79 @@ +from dataclasses import dataclass +from threading import RLock +from typing import Any, Hashable, Optional + +from llist import dllistnode, dllist + + +@dataclass(frozen=True) +class FrequencyItem: + node: dllistnode + key: Hashable + value: Any + + +class LFUCache: + """ + LFU cache implementation + + >>> lfu = LFUCache(3) + >>> lfu.set("foo", "bar") + >>> assert "foo" in lfu + >>> lfu.get('foo') + 'bar' + >>> lfu.set("bar", "foo") + >>> lfu.set("spam", "egg") + + """ + + def __init__(self, max_size: int = 0): + self.cache = dict() + self.usages = dllist() + self.lock = RLock() + self.size = 0 + self.max_size = max_size + + def _create_node(self) -> dllistnode: + return self.usages.append(set([])) + + def _update_usage(self, item: FrequencyItem): + with self.lock: + old_node = item.node + new_node = item.node.next + + if new_node is None: + new_node = self._create_node() + + old_node.value.remove(item) + item = FrequencyItem( + node=new_node, + key=item.key, + value=item.value, + ) + new_node.value.add(item) + self.cache[item.key] = item + + if not old_node.value: + self.usages.remove(old_node) + + def get(self, key: Hashable): + item: FrequencyItem = self.cache[key] + self._update_usage(item) + return item.value + + def set(self, key: Hashable, value: Any): + with self.lock: + node: Optional[dllistnode] = self.usages.first + + if node is None: + node = self._create_node() + + item = FrequencyItem(node=node, key=key, value=value) + node.value.add(item) + self.cache[key] = item + + def __contains__(self, key) -> Any: + if key in self.cache: + self._update_usage(self.cache[key]) + return True + return False diff --git a/aiomisc/cache/lru.py b/aiomisc/cache/lru.py new file mode 100644 index 00000000..b8669ee0 --- /dev/null +++ b/aiomisc/cache/lru.py @@ -0,0 +1,24 @@ +from typing import Any + +from llist import dllistnode + +from .base import CacheBase + + +class LRUCache(CacheBase): + def _on_set(self, node: dllistnode) -> None: + if not self.is_overflow: + return + self._on_overflow() + + def _on_get(self, node: dllistnode) -> Any: + with self.lock: + self.usages.remove(node) + self.usages.appendright(node) + + def _on_overflow(self): + with self.lock: + while self.is_overflow: + node: dllistnode = self.usages.popleft() + item, value = node.value + self.cache.pop(item) diff --git a/requirements.dev.txt b/requirements.dev.txt index 6631c80c..df0a41a9 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,5 +1,5 @@ -aiohttp<4 aiohttp-asgi +aiohttp<4 async-generator async-timeout coverage==4.5.1 @@ -7,13 +7,14 @@ coveralls croniter~=0.3.34 fastapi freezegun<1.1 +llist==0.6 mypy~=0.782 pylava pytest pytest-cov~=2.5.1 pytest-freezegun~=0.4.2 -sphinx>=3.5.1 sphinx-autobuild sphinx-intl +sphinx>=3.5.1 timeout-decorator tox>=2.4 diff --git a/setup.py b/setup.py index e631235a..87ed3790 100644 --- a/setup.py +++ b/setup.py @@ -65,6 +65,7 @@ def load_requirements(fname): 'raven': ['raven-aiohttp'], 'uvloop': ['uvloop>=0.14,<1'], 'cron': ['croniter~=0.3.34'], + 'lfu': ['llist==0.6'], ':python_version < "3.7"': 'async-generator', }, entry_points={ From 2d81206985a8ca9258a636a868cf08f8bec570cc Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Tue, 9 Mar 2021 00:56:22 +0300 Subject: [PATCH 2/5] WIP --- aiomisc/cache/lfu.py | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/aiomisc/cache/lfu.py b/aiomisc/cache/lfu.py index a953dd03..f69777d5 100644 --- a/aiomisc/cache/lfu.py +++ b/aiomisc/cache/lfu.py @@ -1,13 +1,18 @@ from dataclasses import dataclass from threading import RLock -from typing import Any, Hashable, Optional +from typing import Any, Hashable, Optional, Set -from llist import dllistnode, dllist + +@dataclass(unsafe_hash=True) +class Node: + prev: Optional["Node"] + next: Optional["Node"] + items: Set["Item"] @dataclass(frozen=True) -class FrequencyItem: - node: dllistnode +class Item: + node: Node key: Hashable value: Any @@ -28,15 +33,17 @@ class LFUCache: def __init__(self, max_size: int = 0): self.cache = dict() - self.usages = dllist() + self.usages: Node = Node(prev=None, next=None, items=set()) self.lock = RLock() self.size = 0 self.max_size = max_size - def _create_node(self) -> dllistnode: - return self.usages.append(set([])) + def _create_node(self) -> Node: + node = Node(prev=self.usages, next=None, items=set()) + self.usages.next = node + return node - def _update_usage(self, item: FrequencyItem): + def _update_usage(self, item: Item): with self.lock: old_node = item.node new_node = item.node.next @@ -44,32 +51,35 @@ def _update_usage(self, item: FrequencyItem): if new_node is None: new_node = self._create_node() - old_node.value.remove(item) - item = FrequencyItem( + old_node.items.remove(item) + item = Item( node=new_node, key=item.key, value=item.value, ) - new_node.value.add(item) + new_node.items.add(item) self.cache[item.key] = item - if not old_node.value: - self.usages.remove(old_node) + if not old_node.items: + old_node.next = None + old_node.prev = None + self.usages = new_node + self.usages.prev = None def get(self, key: Hashable): - item: FrequencyItem = self.cache[key] + item: Item = self.cache[key] self._update_usage(item) return item.value def set(self, key: Hashable, value: Any): with self.lock: - node: Optional[dllistnode] = self.usages.first + node: Optional[Node] = self.usages if node is None: node = self._create_node() - item = FrequencyItem(node=node, key=key, value=value) - node.value.add(item) + item = Item(node=node, key=key, value=value) + node.items.add(item) self.cache[key] = item def __contains__(self, key) -> Any: From 604d0297162053b0984aff1a5548aff771b6adfa Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Tue, 9 Mar 2021 00:59:03 +0300 Subject: [PATCH 3/5] WIP --- aiomisc/cache/lfu.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/aiomisc/cache/lfu.py b/aiomisc/cache/lfu.py index f69777d5..b0c2d4fd 100644 --- a/aiomisc/cache/lfu.py +++ b/aiomisc/cache/lfu.py @@ -3,11 +3,14 @@ from typing import Any, Hashable, Optional, Set -@dataclass(unsafe_hash=True) class Node: - prev: Optional["Node"] - next: Optional["Node"] - items: Set["Item"] + __slots__ = ('prev', 'next', 'items') + + def __init__(self, prev: "Node" = None, next: "Node" = None, + items: Optional[Set["Item"]] = None): + self.prev = prev + self.next = next + self.items = items or set() @dataclass(frozen=True) From 13681be5c918af854916b75a130f227c8d6a64a3 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Wed, 10 Mar 2021 18:41:59 +0300 Subject: [PATCH 4/5] WIP --- aiomisc/cache/base.py | 78 ++++----------- aiomisc/cache/lfu.py | 100 +++++++++++++++---- aiomisc/cache/lru.py | 226 +++++++++++++++++++++++++++++++++++++++--- requirements.dev.txt | 1 - setup.py | 1 - 5 files changed, 307 insertions(+), 99 deletions(-) diff --git a/aiomisc/cache/base.py b/aiomisc/cache/base.py index f9f3c985..bccd0741 100644 --- a/aiomisc/cache/base.py +++ b/aiomisc/cache/base.py @@ -1,73 +1,29 @@ -import asyncio -import threading from abc import ABC, abstractmethod -from typing import Any, Union, Hashable, Dict, Optional - -from llist import dllist, dllistnode +from typing import Any, Hashable class CacheBase(ABC): - def __init__(self, max_size: int = 0): - self._max_size: int = max_size - self._loop: Optional[asyncio.AbstractEventLoop] = None - self.usages: dllist = dllist() - self.cache: Dict[Hashable, Any] = dict() - self.lock = threading.RLock() - - @property - def is_overflow(self) -> bool: - if self._max_size == 0: - return False - - if self._max_size < len(self.usages): - return True + __slots__ = "max_size", - return False - - @property - def loop(self) -> asyncio.AbstractEventLoop: - if self._loop is None: - self._loop = asyncio.get_event_loop() - return self._loop + def __init__(self, max_size: int = 0): + self.max_size = max_size @abstractmethod - def _on_set(self, node: dllistnode) -> None: - pass - - def _on_expires(self, node: dllistnode) -> None: - pass + def get(self, key: Hashable): + raise NotImplementedError @abstractmethod - def _on_get(self, node: dllistnode) -> Any: - pass - - def __contains__(self, item: Hashable) -> bool: - return item in self.cache + def remove(self, key: Hashable): + raise NotImplementedError - def get(self, item: Hashable) -> Any: - with self.lock: - node: dllistnode = self.cache[item] - self.loop.call_soon(self._on_get, node) - return node.value[1] - - def expire(self, node: dllistnode): - with self.lock: - item, value = node.value - node: Optional[dllistnode] = self.cache.pop(item, None) - - if node is None: - return - - self.loop.call_soon(self._on_expires, node) - self.usages.remove(node) - - def set(self, item: Hashable, value: Any, - expiration: Union[int, float] = None) -> None: - with self.lock: - node: dllistnode = self.usages.append((item, value)) - self.cache[item] = node + @abstractmethod + def set(self, key: Hashable, value: Any): + raise NotImplementedError - self.loop.call_soon(self._on_set, node) + @abstractmethod + def __contains__(self, key: Hashable): + raise NotImplementedError - if expiration is not None: - self.loop.call_later(expiration, self.expire, node) + @abstractmethod + def __len__(self): + raise NotImplementedError diff --git a/aiomisc/cache/lfu.py b/aiomisc/cache/lfu.py index b0c2d4fd..e4580399 100644 --- a/aiomisc/cache/lfu.py +++ b/aiomisc/cache/lfu.py @@ -1,10 +1,11 @@ -from dataclasses import dataclass from threading import RLock -from typing import Any, Hashable, Optional, Set +from typing import Any, Hashable, Optional, Dict, Set +from aiomisc.cache.base import CacheBase +# noinspection PyShadowingBuiltins class Node: - __slots__ = ('prev', 'next', 'items') + __slots__ = 'prev', 'next', 'items' def __init__(self, prev: "Node" = None, next: "Node" = None, items: Optional[Set["Item"]] = None): @@ -13,14 +14,16 @@ def __init__(self, prev: "Node" = None, next: "Node" = None, self.items = items or set() -@dataclass(frozen=True) class Item: - node: Node - key: Hashable - value: Any + __slots__ = 'node', 'key', 'value' + def __init__(self, node: Node, key: Hashable, value: Any): + self.node: Node = node + self.key: Hashable = key + self.value: Any = value -class LFUCache: + +class LFUCache(CacheBase): """ LFU cache implementation @@ -29,20 +32,37 @@ class LFUCache: >>> assert "foo" in lfu >>> lfu.get('foo') 'bar' + + >>> lfu.remove('foo') + >>> assert "foo" not in lfu + >>> lfu.get("foo") + Traceback (most recent call last): + ... + KeyError: 'foo' + >>> lfu.remove("foo") + >>> lfu.set("bar", "foo") >>> lfu.set("spam", "egg") - + >>> lfu.set("foo", "bar") + >>> lfu.get("foo") + 'bar' + >>> lfu.get("spam") + 'egg' + >>> assert len(lfu) == 3 + >>> lfu.set("egg", "spam") + >>> assert len(lfu) == 3 """ - def __init__(self, max_size: int = 0): - self.cache = dict() - self.usages: Node = Node(prev=None, next=None, items=set()) - self.lock = RLock() - self.size = 0 - self.max_size = max_size + __slots__ = "cache", "usages", "lock" + + def __init__(self, max_size: int): + super().__init__(max_size) + self.cache: Dict[Hashable, Item] = dict() + self.usages: Node = Node(prev=None, next=None) + self.lock: RLock = RLock() def _create_node(self) -> Node: - node = Node(prev=self.usages, next=None, items=set()) + node = Node(prev=self.usages, next=None) self.usages.next = node return node @@ -55,11 +75,7 @@ def _update_usage(self, item: Item): new_node = self._create_node() old_node.items.remove(item) - item = Item( - node=new_node, - key=item.key, - value=item.value, - ) + item.node = new_node new_node.items.add(item) self.cache[item.key] = item @@ -69,11 +85,29 @@ def _update_usage(self, item: Item): self.usages = new_node self.usages.prev = None + def _remove_item(self, item: Item): + with self.lock: + if item.key in self.cache: + self.cache.pop(item.key, None) + + if item in item.node.items: + item.node.items.remove(item) + + item.node = None + def get(self, key: Hashable): item: Item = self.cache[key] self._update_usage(item) return item.value + def remove(self, key: Hashable): + with self.lock: + item: Optional[Item] = self.cache.pop(key, None) + if item is None: + return + + self._remove_item(item) + def set(self, key: Hashable, value: Any): with self.lock: node: Optional[Node] = self.usages @@ -85,8 +119,30 @@ def set(self, key: Hashable, value: Any): node.items.add(item) self.cache[key] = item - def __contains__(self, key) -> Any: + if self._is_overflow(): + self._on_overflow() + + def _on_overflow(self): + with self.lock: + while self._is_overflow(): + if not self.usages.items: + if self.usages.next is not None: + self.usages.next.prev = None + self.usages = self.usages.next + else: + self.usages = Node(prev=None, next=None) + + item = self.usages.items.pop() + self._remove_item(item) + + def _is_overflow(self) -> bool: + return len(self.cache) > self.max_size + + def __contains__(self, key: Hashable) -> Any: if key in self.cache: self._update_usage(self.cache[key]) return True return False + + def __len__(self): + return len(self.cache) diff --git a/aiomisc/cache/lru.py b/aiomisc/cache/lru.py index b8669ee0..17b4558c 100644 --- a/aiomisc/cache/lru.py +++ b/aiomisc/cache/lru.py @@ -1,24 +1,222 @@ -from typing import Any +from threading import RLock +from typing import Any, Hashable, Dict, Optional +from aiomisc.cache.base import CacheBase -from llist import dllistnode -from .base import CacheBase +# noinspection PyShadowingBuiltins +class Node: + __slots__ = 'prev', 'next', 'item', '_restricted' + + def __init__(self, prev: "Node" = None, next: "Node" = None, + item: Optional["Item"] = None): + self.prev = prev + self.next = next + self.item = item + + +class Item: + __slots__ = 'node', 'key', 'value', '_restricted' + + def __init__(self, node: Node, key: Hashable, value: Any): + self.node: Node = node + self.key: Hashable = key + self.value: Any = value class LRUCache(CacheBase): - def _on_set(self, node: dllistnode) -> None: - if not self.is_overflow: + """ + LRU cache implementation + + >>> lfu = LRUCache(3) + >>> lfu.set("foo", "bar") + >>> assert "foo" in lfu + >>> lfu.get('foo') + 'bar' + + >>> lfu.remove('foo') + >>> assert "foo" not in lfu + >>> lfu.get("foo") + Traceback (most recent call last): + ... + KeyError: 'foo' + >>> lfu.remove("foo") + + >>> lfu.set("bar", "foo") + >>> lfu.set("spam", "egg") + >>> lfu.set("foo", "bar") + >>> lfu.get("foo") + 'bar' + >>> lfu.get("spam") + 'egg' + >>> assert len(lfu) == 3 + >>> lfu.set("egg", "spam") + >>> assert len(lfu) == 3, str(len(lfu)) + " is not 3" + """ + + __slots__ = 'lock', 'cache', 'last', 'first' + + def __init__(self, max_size: int): + super().__init__(max_size) + self.cache: Dict[Hashable, Item] = dict() + self.first: Optional[Node] = None + self.last: Optional[Node] = None + self.lock: RLock = RLock() + + @staticmethod + def _node_remove(node: Node): + if node.next is None: return - self._on_overflow() - def _on_get(self, node: dllistnode) -> Any: - with self.lock: - self.usages.remove(node) - self.usages.appendright(node) + node.prev.next = node.next + node.prev = None + node.next = None + + @staticmethod + def _node_append_left(parent: Node, node: Node): + """ + Appends node before the parent node + + before: + + ... <-> [parent] <-> ... + + after: + + ... <-> [node] <-> [parent] <-> ... + + """ + node.next = parent + + if parent.prev is not None: + node.prev = parent.prev + + parent.prev = node + + @staticmethod + def _node_append_right(parent: Node, node: Node): + """ + Appends node after parent node + + before: + + ... <-> [parent] <-> ... + + after: + + ... <-> [parent] <-> [node] <-> ... + + """ + node.prev = parent + + if parent.next is not None: + node.next = parent.next + + parent.next = node + + @staticmethod + def _node_swap(a: Node, b: Node): + """ + Swaps two Nodes and change neighbor links + + Example: doubly linked list looks like: + + [x] <-> [a] <-> [z] <-> [b] <-> [y] + node x node a node z node b node y + p n p n p n p n p n + ---------------------------------------------- + - a x z a b z y b - + + After swap should looks like: + + [x] <-> [b] <-> [z] <-> [a] <-> [y] + node x node b node z node a node y + p n p n p n p n p n + ---------------------------------------------- + - b x z b a z y a - + + That's means we should make 8 changes + + # 4 for "a" and "b" + a.prev, a.next, b.prev, b.next = b.prev, b.next, a.prev, a.next + + # 4 for neighbors + x.next, z.prev, z.next, y.prev = b, b, a, a + + After general case is should be: + + a.prev.next, a.next.prev, b.prev.next, b.next.prev = b, b, a, a + a.prev, a.next, b.prev, b.next = b.prev, b.next, a.prev, a.next + + """ + # store original links + a_prev, a_next, b_prev, b_next = a.prev, a.next, b.prev, b.next + + if a_prev is not None: + a_prev.next = b + a.prev = b_prev + + if a_next is not None: + a_next.prev = b + a.next = b_next + + if b_next is not None: + b_next.prev = a + b.next = b_next + + if b_prev is not None: + b_prev.next = a + b.prev = a_prev def _on_overflow(self): with self.lock: - while self.is_overflow: - node: dllistnode = self.usages.popleft() - item, value = node.value - self.cache.pop(item) + while self._is_overflow(): + node = self.first + if self.first is None: + return + + self.first = self.first.next + self.remove(node.item.key) + + if self.first is None: + self.first = Node(prev=None, next=None) + + def _is_overflow(self) -> bool: + return len(self.cache) > self.max_size + + def get(self, key: Hashable): + item = self.cache[key] + + with self.lock: + self._node_swap(item.node, self.last) + + return item.value + + def remove(self, key: Hashable): + with self.lock: + item: Optional[Item] = self.cache.pop(key, None) + if item is None: + return + self._node_remove(item.node) + + def set(self, key: Hashable, value: Any): + with self.lock: + node = Node(prev=self.last) + item = Item(node=node, key=key, value=value) + node.item = item + self.cache[key] = item + + if self.last is None and self.first is None: + self.last = self.first = node + return + + self._node_append_right(self.last, node) + self.last = node + + if self._is_overflow(): + self._on_overflow() + + def __contains__(self, key: Hashable): + return key in self.cache + + def __len__(self): + return len(self.cache) diff --git a/requirements.dev.txt b/requirements.dev.txt index df0a41a9..21896883 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -7,7 +7,6 @@ coveralls croniter~=0.3.34 fastapi freezegun<1.1 -llist==0.6 mypy~=0.782 pylava pytest diff --git a/setup.py b/setup.py index 87ed3790..e631235a 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,6 @@ def load_requirements(fname): 'raven': ['raven-aiohttp'], 'uvloop': ['uvloop>=0.14,<1'], 'cron': ['croniter~=0.3.34'], - 'lfu': ['llist==0.6'], ':python_version < "3.7"': 'async-generator', }, entry_points={ From c2f304bc6f9345f5e48bf66264f67733ac20f2a9 Mon Sep 17 00:00:00 2001 From: Dmitry Orlov Date: Fri, 26 Mar 2021 10:56:06 +0300 Subject: [PATCH 5/5] WIP --- aiomisc/cache/base.py | 13 +- aiomisc/cache/dllist.py | 239 +++++++++++++++++++++++++++++++++++++ aiomisc/cache/lfu.py | 120 +++++++------------ aiomisc/cache/lru.py | 189 ++++++----------------------- modd.conf | 7 ++ requirements.dev.txt | 1 + tests/cache/__init__.py | 1 + tests/cache/test_dllist.py | 144 ++++++++++++++++++++++ tests/cache/test_lfu.py | 34 ++++++ 9 files changed, 510 insertions(+), 238 deletions(-) create mode 100644 aiomisc/cache/dllist.py create mode 100644 modd.conf create mode 100644 tests/cache/__init__.py create mode 100644 tests/cache/test_dllist.py create mode 100644 tests/cache/test_lfu.py diff --git a/aiomisc/cache/base.py b/aiomisc/cache/base.py index bccd0741..1edb9ad4 100644 --- a/aiomisc/cache/base.py +++ b/aiomisc/cache/base.py @@ -1,12 +1,17 @@ from abc import ABC, abstractmethod -from typing import Any, Hashable +from typing import Any, Hashable, Dict -class CacheBase(ABC): - __slots__ = "max_size", +class CachePolicy(ABC): + __slots__ = "max_size", "cache" - def __init__(self, max_size: int = 0): + def __init__(self, cache: Dict[Hashable, Any], max_size: int = 0): self.max_size = max_size + self.cache = cache + self._on_init() + + def _on_init(self): + pass @abstractmethod def get(self, key: Hashable): diff --git a/aiomisc/cache/dllist.py b/aiomisc/cache/dllist.py new file mode 100644 index 00000000..e86791ef --- /dev/null +++ b/aiomisc/cache/dllist.py @@ -0,0 +1,239 @@ +from multiprocessing import RLock +from typing import Optional, Any, Hashable, Type + + +class Node: + __slots__ = 'prev', 'next', 'value', 'parent' + + prev: Optional["Node"] + next: Optional["Node"] + parent: "DLList" + + def __init__(self, parent: "DLList", prev: "Node" = None, + next: "Node" = None): + self.parent = parent + self.prev = prev + self.next = next + self.value = None + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} " \ + f"{id(self)}: next={id(self.next)} " \ + f"prev={id(self.prev)}>" + + def remove(self): + if self.next is None: + return + + with self.parent.lock: + self.prev.next = self.next + self.prev = None + self.next = None + + def append_left(self, node: "Node") -> "Node": + """ + Appends node before the parent node + + before: + + ... <-> [self] <-> ... + + after: + + ... <-> [node] <-> [self] <-> ... + + """ + with self.parent.lock: + self.parent.nodes.add(node) + node.next = self + + if self.prev is not None: + node.prev = self.prev + + self.prev = node + return node + + def append_right(self, node: "Node") -> "Node": + """ + Appends node after parent node + + before: + + ... <-> [self] <-> ... + + after: + + ... <-> [self] <-> [node] <-> ... + + """ + + with self.parent.lock: + self.parent.nodes.add(node) + node.prev = self + + if self.next is not None: + node.next = self.next + + self.next = node + return node + + def swap(self, other: "Node"): + """ + Swaps two Nodes and change neighbor links + + Example: doubly linked list looks like: + + [x] <-> [self] <-> [z] <-> [other] <-> [y] + node x node self node z node other node y + p n p n p n p n p n + ------------------------------------------------------- + - self x z self other z y other - + + After swap should looks like: + + [x] <-> [other] <-> [z] <-> [self] <-> [y] + node x node other node z node self node y + p n p n p n p n p n + -------------------------------------------------------- + - other x z other b z y other - + + That's means we should make 8 changes + + # 4 for "a" and "b" + a.prev, a.next, b.prev, b.next = b.prev, b.next, a.prev, a.next + + # 4 for neighbors + x.next, z.prev, z.next, y.prev = b, b, a, a + + After general case is should be: + + a.prev.next, a.next.prev, b.prev.next, b.next.prev = b, b, a, a + a.prev, a.next, b.prev, b.next = b.prev, b.next, a.prev, a.next + + """ + with self.parent.lock: + # store original links + self_prev, self_next, other_prev, other_next = ( + self.prev, self.next, other.prev, other.next + ) + + if self_prev is not None: + self_prev.next = other + self.prev = other_prev + + if self_next is not None: + self_next.prev = other + self.next = other_next + + if other_next is not None: + other_next.prev = self + other.next = other_next + + if other_prev is not None: + other_prev.next = self + other.prev = self_prev + + first_set = False + last_set = False + + if not last_set and self is self.parent.last: + self.parent.last = other + last_set = True + + if not first_set and self is self.parent.first: + self.parent.first = other + first_set = True + + if not last_set and other is self.parent.last: + self.parent.last = self + + if not first_set and other is self.parent.first: + self.parent.first = self + + +class Item: + __slots__ = 'node', 'key', 'value' + + node: Node + key: Hashable + value: Any + + def __init__(self, node: Node, key: Hashable, value: Any): + self.node = node + self.key = key + self.value = value + + +class DLList: + __slots__ = 'first', 'last', 'lock', 'nodes' + + NODE_CLASS: Type[Node] = Node + ITEM_CLASS: Type[Item] = Item + + first: Optional[NODE_CLASS] + last: Optional[NODE_CLASS] + + def __init__(self): + self.lock = RLock() + self.first = None + self.last = None + self.nodes = set() + + def __len__(self): + return len(self.nodes) + + def __contains__(self, item: NODE_CLASS): + return item in self.nodes + + def __iter__(self): + with self.lock: + first = self.first + while first is not None: + yield first + first = first.next + + def _create_node(self, *args, **kwargs): + node = self.NODE_CLASS(self, *args, **kwargs) + self.nodes.add(node) + return node + + def remove(self, node: NODE_CLASS): + if node not in self.nodes: + raise ValueError(f"Node {node!r} is not part of {self!r}") + + with self.lock: + self.nodes.remove(node) + if node.prev is not None: + node.prev.next = node.next + if node.next is not None: + node.next.prev = node.prev + + if self.first is node: + self.first = node.next + + if self.last is node: + self.last = node.prev + + def create_left(self) -> NODE_CLASS: + with self.lock: + if self.first is None: + self.first = self._create_node() + self.last = self.first + return self.first + + node = self._create_node(next=self.first) + self.first.prev = node + self.first = node + return node + + def create_right(self) -> NODE_CLASS: + with self.lock: + if self.first is None: + self.last = self._create_node() + self.first = self.last + return self.first + + node = self._create_node(prev=self.last) + self.last.next = node + self.last = node + return node diff --git a/aiomisc/cache/lfu.py b/aiomisc/cache/lfu.py index e4580399..465aa2e9 100644 --- a/aiomisc/cache/lfu.py +++ b/aiomisc/cache/lfu.py @@ -1,33 +1,17 @@ from threading import RLock from typing import Any, Hashable, Optional, Dict, Set -from aiomisc.cache.base import CacheBase +from aiomisc.cache.base import CachePolicy +from llist import dllist, dllistnode +from aiomisc.cache.dllist import Item -# noinspection PyShadowingBuiltins -class Node: - __slots__ = 'prev', 'next', 'items' - def __init__(self, prev: "Node" = None, next: "Node" = None, - items: Optional[Set["Item"]] = None): - self.prev = prev - self.next = next - self.items = items or set() - - -class Item: - __slots__ = 'node', 'key', 'value' - - def __init__(self, node: Node, key: Hashable, value: Any): - self.node: Node = node - self.key: Hashable = key - self.value: Any = value - - -class LFUCache(CacheBase): +class LFUCachePolicy(CachePolicy): """ LFU cache implementation - >>> lfu = LFUCache(3) + >>> cache = {} + >>> lfu = LFUCachePolicy(cache, 3) >>> lfu.set("foo", "bar") >>> assert "foo" in lfu >>> lfu.get('foo') @@ -53,47 +37,49 @@ class LFUCache(CacheBase): >>> assert len(lfu) == 3 """ - __slots__ = "cache", "usages", "lock" + __slots__ = "usages", "lock" - def __init__(self, max_size: int): - super().__init__(max_size) - self.cache: Dict[Hashable, Item] = dict() - self.usages: Node = Node(prev=None, next=None) + def _on_init(self): + self.usages: dllist = dllist() self.lock: RLock = RLock() - def _create_node(self) -> Node: - node = Node(prev=self.usages, next=None) - self.usages.next = node - return node - def _update_usage(self, item: Item): with self.lock: - old_node = item.node - new_node = item.node.next - - if new_node is None: - new_node = self._create_node() + pass - old_node.items.remove(item) - item.node = new_node - new_node.items.add(item) - self.cache[item.key] = item + def _item_remove(self, item: Item): + with self.lock: + if item in item.node.value: + item.node.value.remove(item) - if not old_node.items: - old_node.next = None - old_node.prev = None - self.usages = new_node - self.usages.prev = None + item.node = None - def _remove_item(self, item: Item): + def _on_overflow(self): with self.lock: - if item.key in self.cache: + while self._is_overflow(): + if not self.usages.value: + if self.usages.next is not None: + self.usages.next.prev = None + self.usages = self.usages.next + else: + self.usages = Node(prev=None, next=None) + continue + + item = self.usages.value.pop() self.cache.pop(item.key, None) + self._item_remove(item) - if item in item.node.items: - item.node.items.remove(item) + def _is_overflow(self) -> bool: + return len(self.cache) > self.max_size - item.node = None + def __contains__(self, key: Hashable) -> Any: + if key in self.cache: + self._update_usage(self.cache[key]) + return True + return False + + def __len__(self): + return len(self.cache) def get(self, key: Hashable): item: Item = self.cache[key] @@ -106,43 +92,19 @@ def remove(self, key: Hashable): if item is None: return - self._remove_item(item) + self._item_remove(item) def set(self, key: Hashable, value: Any): with self.lock: node: Optional[Node] = self.usages if node is None: - node = self._create_node() + node = Node() + self.usages = node item = Item(node=node, key=key, value=value) - node.items.add(item) + node.value.add(item) self.cache[key] = item if self._is_overflow(): self._on_overflow() - - def _on_overflow(self): - with self.lock: - while self._is_overflow(): - if not self.usages.items: - if self.usages.next is not None: - self.usages.next.prev = None - self.usages = self.usages.next - else: - self.usages = Node(prev=None, next=None) - - item = self.usages.items.pop() - self._remove_item(item) - - def _is_overflow(self) -> bool: - return len(self.cache) > self.max_size - - def __contains__(self, key: Hashable) -> Any: - if key in self.cache: - self._update_usage(self.cache[key]) - return True - return False - - def __len__(self): - return len(self.cache) diff --git a/aiomisc/cache/lru.py b/aiomisc/cache/lru.py index 17b4558c..6eb0c58b 100644 --- a/aiomisc/cache/lru.py +++ b/aiomisc/cache/lru.py @@ -1,33 +1,18 @@ from threading import RLock from typing import Any, Hashable, Dict, Optional -from aiomisc.cache.base import CacheBase +from llist import dllist, dllistnode -# noinspection PyShadowingBuiltins -class Node: - __slots__ = 'prev', 'next', 'item', '_restricted' +from aiomisc.cache.base import CachePolicy +from aiomisc.cache.dllist import Item - def __init__(self, prev: "Node" = None, next: "Node" = None, - item: Optional["Item"] = None): - self.prev = prev - self.next = next - self.item = item - -class Item: - __slots__ = 'node', 'key', 'value', '_restricted' - - def __init__(self, node: Node, key: Hashable, value: Any): - self.node: Node = node - self.key: Hashable = key - self.value: Any = value - - -class LRUCache(CacheBase): +class LRUCachePolicy(CachePolicy): """ LRU cache implementation - >>> lfu = LRUCache(3) + >>> cache = {} + >>> lfu = LRUCachePolicy(cache, 3) >>> lfu.set("foo", "bar") >>> assert "foo" in lfu >>> lfu.get('foo') @@ -53,164 +38,58 @@ class LRUCache(CacheBase): >>> assert len(lfu) == 3, str(len(lfu)) + " is not 3" """ - __slots__ = 'lock', 'cache', 'last', 'first' - - def __init__(self, max_size: int): - super().__init__(max_size) - self.cache: Dict[Hashable, Item] = dict() - self.first: Optional[Node] = None - self.last: Optional[Node] = None - self.lock: RLock = RLock() - - @staticmethod - def _node_remove(node: Node): - if node.next is None: - return - - node.prev.next = node.next - node.prev = None - node.next = None - - @staticmethod - def _node_append_left(parent: Node, node: Node): - """ - Appends node before the parent node - - before: - - ... <-> [parent] <-> ... - - after: - - ... <-> [node] <-> [parent] <-> ... - - """ - node.next = parent - - if parent.prev is not None: - node.prev = parent.prev - - parent.prev = node - - @staticmethod - def _node_append_right(parent: Node, node: Node): - """ - Appends node after parent node - - before: - - ... <-> [parent] <-> ... - - after: - - ... <-> [parent] <-> [node] <-> ... - - """ - node.prev = parent + lock: RLock + usages: dllist - if parent.next is not None: - node.next = parent.next + __slots__ = 'lock', 'usages' - parent.next = node - - @staticmethod - def _node_swap(a: Node, b: Node): - """ - Swaps two Nodes and change neighbor links - - Example: doubly linked list looks like: - - [x] <-> [a] <-> [z] <-> [b] <-> [y] - node x node a node z node b node y - p n p n p n p n p n - ---------------------------------------------- - - a x z a b z y b - - - After swap should looks like: - - [x] <-> [b] <-> [z] <-> [a] <-> [y] - node x node b node z node a node y - p n p n p n p n p n - ---------------------------------------------- - - b x z b a z y a - - - That's means we should make 8 changes - - # 4 for "a" and "b" - a.prev, a.next, b.prev, b.next = b.prev, b.next, a.prev, a.next - - # 4 for neighbors - x.next, z.prev, z.next, y.prev = b, b, a, a - - After general case is should be: - - a.prev.next, a.next.prev, b.prev.next, b.next.prev = b, b, a, a - a.prev, a.next, b.prev, b.next = b.prev, b.next, a.prev, a.next - - """ - # store original links - a_prev, a_next, b_prev, b_next = a.prev, a.next, b.prev, b.next - - if a_prev is not None: - a_prev.next = b - a.prev = b_prev - - if a_next is not None: - a_next.prev = b - a.next = b_next - - if b_next is not None: - b_next.prev = a - b.next = b_next - - if b_prev is not None: - b_prev.next = a - b.prev = a_prev + def _on_init(self): + self.usages: dllist = dllist() + self.lock: RLock = RLock() def _on_overflow(self): with self.lock: while self._is_overflow(): - node = self.first - if self.first is None: + node: Optional[dllistnode] = self.usages.popleft() + + if node is None: return - self.first = self.first.next - self.remove(node.item.key) + item: Item = node.value + self.cache.pop(item.key, None) - if self.first is None: - self.first = Node(prev=None, next=None) + del item.node + del item.key + del item.value def _is_overflow(self) -> bool: return len(self.cache) > self.max_size def get(self, key: Hashable): - item = self.cache[key] + item: dllistnode = self.cache[key] with self.lock: - self._node_swap(item.node, self.last) + self.usages.remove(item) + self.usages.appendright(item.value) - return item.value + return item.value.value def remove(self, key: Hashable): with self.lock: - item: Optional[Item] = self.cache.pop(key, None) - if item is None: + node: Optional[dllistnode] = self.cache.pop(key, None) + + if node is None: return - self._node_remove(item.node) + + self.usages.remove(node) + print(node) def set(self, key: Hashable, value: Any): with self.lock: - node = Node(prev=self.last) - item = Item(node=node, key=key, value=value) - node.item = item - self.cache[key] = item - - if self.last is None and self.first is None: - self.last = self.first = node - return - - self._node_append_right(self.last, node) - self.last = node + item = Item(node=None, key=key, value=value) + node = self.usages.appendright(item) + item.node = node + self.cache[key] = node if self._is_overflow(): self._on_overflow() diff --git a/modd.conf b/modd.conf new file mode 100644 index 00000000..e52b567f --- /dev/null +++ b/modd.conf @@ -0,0 +1,7 @@ +**/*.py { + prep: pytest -sx tests/cache/test_dllist.py --cov aiomisc.cache --cov-report=term-missing +} + +requirements.* { + prep: pip install -Ue ".[develop]" +} diff --git a/requirements.dev.txt b/requirements.dev.txt index 21896883..86fdbb0f 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -11,6 +11,7 @@ mypy~=0.782 pylava pytest pytest-cov~=2.5.1 +pytest-subtests pytest-freezegun~=0.4.2 sphinx-autobuild sphinx-intl diff --git a/tests/cache/__init__.py b/tests/cache/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/cache/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/cache/test_dllist.py b/tests/cache/test_dllist.py new file mode 100644 index 00000000..de682bc5 --- /dev/null +++ b/tests/cache/test_dllist.py @@ -0,0 +1,144 @@ +from typing import List + +import pytest + +from aiomisc.cache.dllist import DLList, Node + + +def test_simple(subtests): + dllist = DLList() + node_class = DLList.NODE_CLASS + + with subtests.test("blank object"): + assert len(dllist) == 0 + assert dllist.first is None + assert dllist.last is None + + with subtests.test("one node"): + node1 = dllist.create_right() + assert isinstance(node1, node_class) + assert len(dllist) == 1 + assert dllist.first is node1 + assert dllist.last is node1 + assert node1.next is None + assert node1.prev is None + + with subtests.test("two nodes"): + node2 = dllist.create_right() + assert isinstance(node2, node_class) + assert dllist.first is node1 + assert dllist.last is node2 + assert node1.next is node2 + assert dllist.first.next is node2 + + with subtests.test("three nodes"): + node3 = dllist.create_left() + assert isinstance(node3, node_class) + assert dllist.first is node3 + assert dllist.first.next is node1 + + assert dllist.last is node2 + assert node3.next is node1 + assert node1.next is node2 + assert node2.prev is node1 + assert node1.prev is node3 + assert dllist.last is node2 + + with subtests.test("remove node"): + first = dllist.first + while first: + with subtests.test("remove first"): + dllist.remove(dllist.first) + assert dllist.first is not first + assert first not in dllist + first = dllist.first + + +@pytest.fixture +def nodes() -> list: + return [] + + +@pytest.fixture +def dllist(nodes): + dllist = DLList() + for i in range(10): + nodes.append(dllist.create_right()) + + return dllist + + +ITERATIONS = 10 + + +@pytest.mark.parametrize("node_idx", list(range(ITERATIONS))) +def test_remove(node_idx: int, nodes: List[Node], dllist: DLList): + node = nodes.pop(node_idx) + assert node in dllist + dllist.remove(node) + assert node not in dllist + + for idx, item in enumerate(dllist): + assert nodes[idx] is item + + +def test_remove_first(subtests, nodes: List[Node], dllist: DLList): + first = dllist.first + counter = 0 + while first is not None: + with subtests.test(f"iteration={counter}"): + counter += 1 + dllist.remove(dllist.first) + assert dllist.first is not first + assert first not in dllist + first = dllist.first + + assert len(dllist) == (ITERATIONS - counter) + for idx, item in enumerate(dllist): + assert nodes[counter:][idx] is item + + +def test_remove_last(subtests, nodes: List[Node], dllist: DLList): + last = dllist.last + counter = 0 + while last is not None: + with subtests.test(f"iteration={counter}"): + counter += 1 + dllist.remove(dllist.last) + assert dllist.last is not last + assert last not in dllist + last = dllist.last + + assert len(dllist) == (ITERATIONS - counter) + + for idx, item in enumerate(dllist): + assert nodes[idx] is item + + +def test_node_repr_recursion(nodes: List[Node]): + for node in nodes: + assert str(id(node)) in repr(node) + assert str(id(node.next)) in repr(node) + assert str(id(node.prev)) in repr(node) + + +def test_node_swap(dllist): + a, b = dllist.first, dllist.last + a_next = a.next + a_prev = a.prev + b_next = b.next + b_prev = b.prev + + assert dllist.first is a + assert dllist.last is b + + dllist.first.swap(dllist.last) + + assert dllist.first is not a + assert dllist.last is not b + + assert b_next is not b.next + assert a_next is not a.next + assert a_prev is not a.prev + assert b_prev is not b.prev + diff --git a/tests/cache/test_lfu.py b/tests/cache/test_lfu.py new file mode 100644 index 00000000..6abde0d6 --- /dev/null +++ b/tests/cache/test_lfu.py @@ -0,0 +1,34 @@ +from aiomisc.cache.lfu import LFUCachePolicy + + +def test_simple(): + test_data = [(chr(i), i) for i in range(97, 127)] + + lfu = LFUCachePolicy(max_size=10) + + for key, value in test_data[:10]: + lfu.set(key, value) + + lfu.get(key) + + assert key in lfu + + assert len(lfu) == 10 + + lfu.set("foo", "bar") + + assert len(lfu) == 10 + + lfu.remove("foo") + + for key, value in test_data[:10]: + lfu.remove(key) + assert key not in lfu + + assert len(lfu) == 0 + assert len(lfu.cache) == 0 + + node = lfu.usages + while node is not None: + assert len(node.items) == 0 + node = node.next