From b62105ba9da8a2a5fc393aae333f34abb3d48267 Mon Sep 17 00:00:00 2001 From: Max Hutchinson Date: Thu, 19 Feb 2015 18:30:40 +0000 Subject: [PATCH 1/4] Filenames are stored along with keys, making old chests more portable. --- chest/core.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/chest/core.py b/chest/core.py index c1b1b31..803857d 100644 --- a/chest/core.py +++ b/chest/core.py @@ -82,7 +82,7 @@ def __init__(self, data=None, path=None, available_memory=None, # In memory storage self.inmem = data or dict() # A set of keys held both in memory or on disk - self._keys = set() + self._keys = {} # Was a path given or no? If not we'll clean up the directory later self._explicitly_given_path = path is not None # Diretory where the on-disk data will be held @@ -101,7 +101,7 @@ def __init__(self, data=None, path=None, available_memory=None, keyfile = os.path.join(self.path, '.keys') if os.path.exists(keyfile): with open(keyfile, mode='r'+self.mode) as f: - self._keys = set(self.load(f)) + self._keys = dict(self.load(f)) self.lock = Lock() @@ -154,7 +154,7 @@ def __getitem__(self, key): if key in self.inmem: value = self.inmem[key] else: - if key not in self._keys: + if key not in self._keys.keys(): raise KeyError("Key not found: %s" % key) self.get_from_disk(key) @@ -180,15 +180,15 @@ def __delitem__(self, key): if os.path.exists(fn): os.remove(fn) - self._keys.remove(key) + del self._keys[key] def __setitem__(self, key, value): with self.lock: - if key in self._keys: + if key in self._keys.keys(): del self[key] self.inmem[key] = value - self._keys.add(key) + self._keys[key] = self.key_to_filename(key) self._update_lru(key) with self.lock: @@ -202,13 +202,13 @@ def __del__(self): self.drop() # pragma: no cover def __iter__(self): - return iter(self._keys) + return iter(self._keys.keys()) def __len__(self): - return len(self._keys) + return len(self._keys.keys()) def __contains__(self, key): - return key in self._keys + return key in self._keys.keys() @property def memory_usage(self): @@ -242,7 +242,7 @@ def drop(self): def write_keys(self): fn = os.path.join(self.path, '.keys') with open(fn, mode='w'+self.mode) as f: - self.dump(list(self._keys), f) + self.dump(list(iter(self._keys.items())), f) def flush(self): """ Flush all in-memory storage to disk """ @@ -270,18 +270,18 @@ def update(self, other, overwrite=True): # if already flushed, then this does nothing self.flush() other.flush() - for key in other._keys: + for key in other._keys.keys(): if key in self._keys and overwrite: del self[key] elif key in self._keys and not overwrite: continue - old_fn = os.path.join(other.path, other._key_to_filename(key)) + old_fn = other._keys[key] new_fn = os.path.join(self.path, self._key_to_filename(key)) dir = os.path.dirname(new_fn) if not os.path.exists(dir): os.makedirs(dir) os.link(old_fn, new_fn) - self._keys.add(key) + self._keys[key] = new_fn def nbytes(o): From 76afc74b08de3e9ace034270ab96ae1e11531caf Mon Sep 17 00:00:00 2001 From: Max Hutchinson Date: Thu, 19 Feb 2015 12:38:14 -0600 Subject: [PATCH 2/4] Replaced calls to key_to_filename with _keys[key] lookups. --- chest/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chest/core.py b/chest/core.py index 803857d..30e6339 100644 --- a/chest/core.py +++ b/chest/core.py @@ -123,7 +123,7 @@ def key_to_filename(self, key): def move_to_disk(self, key): """ Move data from memory onto disk """ self._on_overflow(key) - fn = self.key_to_filename(key) + fn = self._keys[key] if not os.path.exists(fn): # Only write if it doesn't exist. dir = os.path.dirname(fn) if not os.path.exists(dir): @@ -143,7 +143,7 @@ def get_from_disk(self, key): self._on_miss(key) - fn = self.key_to_filename(key) + fn = self._keys[key] with open(fn, mode='r'+self.mode) as f: value = self.load(f) @@ -176,7 +176,7 @@ def __delitem__(self, key): if key in self.heap: del self.heap[key] - fn = self.key_to_filename(key) + fn = self._keys[key] if os.path.exists(fn): os.remove(fn) @@ -275,7 +275,7 @@ def update(self, other, overwrite=True): del self[key] elif key in self._keys and not overwrite: continue - old_fn = other._keys[key] + old_fn = other._keys[key] new_fn = os.path.join(self.path, self._key_to_filename(key)) dir = os.path.dirname(new_fn) if not os.path.exists(dir): From c7628169fdf8aff5c90413cb4504d539b18b6b81 Mon Sep 17 00:00:00 2001 From: Max Hutchinson Date: Thu, 19 Feb 2015 19:28:14 -0600 Subject: [PATCH 3/4] Removing redundent (and possibly poorly performing) keys() and iter() When the dict is treated like a set, it uses the set of keys. Also, list() knows how to handle dictionary view objects without an intermediate cast. --- chest/core.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/chest/core.py b/chest/core.py index 30e6339..a65ceb7 100644 --- a/chest/core.py +++ b/chest/core.py @@ -154,7 +154,7 @@ def __getitem__(self, key): if key in self.inmem: value = self.inmem[key] else: - if key not in self._keys.keys(): + if key not in self._keys: raise KeyError("Key not found: %s" % key) self.get_from_disk(key) @@ -184,7 +184,7 @@ def __delitem__(self, key): def __setitem__(self, key, value): with self.lock: - if key in self._keys.keys(): + if key in self._keys: del self[key] self.inmem[key] = value @@ -202,13 +202,13 @@ def __del__(self): self.drop() # pragma: no cover def __iter__(self): - return iter(self._keys.keys()) + return iter(self._keys) def __len__(self): - return len(self._keys.keys()) + return len(self._keys) def __contains__(self, key): - return key in self._keys.keys() + return key in self._keys @property def memory_usage(self): @@ -242,7 +242,7 @@ def drop(self): def write_keys(self): fn = os.path.join(self.path, '.keys') with open(fn, mode='w'+self.mode) as f: - self.dump(list(iter(self._keys.items())), f) + self.dump(list(self._keys.items()), f) def flush(self): """ Flush all in-memory storage to disk """ @@ -270,7 +270,7 @@ def update(self, other, overwrite=True): # if already flushed, then this does nothing self.flush() other.flush() - for key in other._keys.keys(): + for key in other._keys: if key in self._keys and overwrite: del self[key] elif key in self._keys and not overwrite: From 5ade9b49dff7a95acfb59f98e2eabc1d3b75262e Mon Sep 17 00:00:00 2001 From: Max Hutchinson Date: Fri, 20 Feb 2015 10:06:41 -0600 Subject: [PATCH 4/4] Adding a test where the key_to_filename changes. --- chest/tests/test_core.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/chest/tests/test_core.py b/chest/tests/test_core.py index e66003e..53bc27d 100644 --- a/chest/tests/test_core.py +++ b/chest/tests/test_core.py @@ -8,6 +8,7 @@ import numpy as np from chest.utils import raises import time +import hashlib @contextmanager @@ -27,6 +28,11 @@ def tmp_chest(*args, **kwargs): pass +def my_key_to_fname(key): + fname = str(hashlib.md5(str(key).encode()).hexdigest()) + return fname + + def test_basic(): with tmp_chest() as c: c[1] = 'one' @@ -384,3 +390,13 @@ def test_nested_files_with_tuples(): c['a', 'b', 'c', 'd', 'e'] = 5 c.flush() assert c['a', 'b', 'c', 'd', 'e'] == 5 + + +def test_store_fnames(): + with tmp_chest(key_to_filename=my_key_to_fname) as c1: + c1[('spam', 'eggs')] = 'spam and eggs' + c1.flush() + with tmp_chest() as c2: + c2.update(c1) + c2.flush() + assert c2[('spam', 'eggs')] == 'spam and eggs'