diff --git a/chest/core.py b/chest/core.py index c1b1b31..a65ceb7 100644 --- a/chest/core.py +++ b/chest/core.py @@ -82,7 +82,7 @@ def __init__(self, data=None, path=None, available_memory=None, # In memory storage self.inmem = data or dict() # A set of keys held both in memory or on disk - self._keys = set() + self._keys = {} # Was a path given or no? If not we'll clean up the directory later self._explicitly_given_path = path is not None # Diretory where the on-disk data will be held @@ -101,7 +101,7 @@ def __init__(self, data=None, path=None, available_memory=None, keyfile = os.path.join(self.path, '.keys') if os.path.exists(keyfile): with open(keyfile, mode='r'+self.mode) as f: - self._keys = set(self.load(f)) + self._keys = dict(self.load(f)) self.lock = Lock() @@ -123,7 +123,7 @@ def key_to_filename(self, key): def move_to_disk(self, key): """ Move data from memory onto disk """ self._on_overflow(key) - fn = self.key_to_filename(key) + fn = self._keys[key] if not os.path.exists(fn): # Only write if it doesn't exist. dir = os.path.dirname(fn) if not os.path.exists(dir): @@ -143,7 +143,7 @@ def get_from_disk(self, key): self._on_miss(key) - fn = self.key_to_filename(key) + fn = self._keys[key] with open(fn, mode='r'+self.mode) as f: value = self.load(f) @@ -176,11 +176,11 @@ def __delitem__(self, key): if key in self.heap: del self.heap[key] - fn = self.key_to_filename(key) + fn = self._keys[key] if os.path.exists(fn): os.remove(fn) - self._keys.remove(key) + del self._keys[key] def __setitem__(self, key, value): with self.lock: @@ -188,7 +188,7 @@ def __setitem__(self, key, value): del self[key] self.inmem[key] = value - self._keys.add(key) + self._keys[key] = self.key_to_filename(key) self._update_lru(key) with self.lock: @@ -242,7 +242,7 @@ def drop(self): def write_keys(self): fn = os.path.join(self.path, '.keys') with open(fn, mode='w'+self.mode) as f: - self.dump(list(self._keys), f) + self.dump(list(self._keys.items()), f) def flush(self): """ Flush all in-memory storage to disk """ @@ -275,13 +275,13 @@ def update(self, other, overwrite=True): del self[key] elif key in self._keys and not overwrite: continue - old_fn = os.path.join(other.path, other._key_to_filename(key)) + old_fn = other._keys[key] new_fn = os.path.join(self.path, self._key_to_filename(key)) dir = os.path.dirname(new_fn) if not os.path.exists(dir): os.makedirs(dir) os.link(old_fn, new_fn) - self._keys.add(key) + self._keys[key] = new_fn def nbytes(o): diff --git a/chest/tests/test_core.py b/chest/tests/test_core.py index e66003e..53bc27d 100644 --- a/chest/tests/test_core.py +++ b/chest/tests/test_core.py @@ -8,6 +8,7 @@ import numpy as np from chest.utils import raises import time +import hashlib @contextmanager @@ -27,6 +28,11 @@ def tmp_chest(*args, **kwargs): pass +def my_key_to_fname(key): + fname = str(hashlib.md5(str(key).encode()).hexdigest()) + return fname + + def test_basic(): with tmp_chest() as c: c[1] = 'one' @@ -384,3 +390,13 @@ def test_nested_files_with_tuples(): c['a', 'b', 'c', 'd', 'e'] = 5 c.flush() assert c['a', 'b', 'c', 'd', 'e'] == 5 + + +def test_store_fnames(): + with tmp_chest(key_to_filename=my_key_to_fname) as c1: + c1[('spam', 'eggs')] = 'spam and eggs' + c1.flush() + with tmp_chest() as c2: + c2.update(c1) + c2.flush() + assert c2[('spam', 'eggs')] == 'spam and eggs'