diff --git a/doc/source/quickstart.rst b/doc/source/quickstart.rst index 5669eed..cb0cccb 100644 --- a/doc/source/quickstart.rst +++ b/doc/source/quickstart.rst @@ -18,7 +18,7 @@ Quick start >>> del m['a_sample_key'] >>> print(m.keys()) ['other_key'] - + The contents of the dictionary are stored to disk. For example, in another python interpreter: :: @@ -27,7 +27,7 @@ The contents of the dictionary are stored to disk. For example, in another pytho >>> m = mmapdict('/tmp/test.mmdpickle') >>> print(m['other_key']) [1, 2, 3] - + It is also possible to open the file in read-only mode, in which case any modification will fail: @@ -42,7 +42,7 @@ It is also possible to open the file in read-only mode, in which case any modifi raise io.UnsupportedOperation('not writable') io.UnsupportedOperation: not writable - + Of course, the main interest is to store numpy arrays: :: @@ -50,13 +50,13 @@ Of course, the main interest is to store numpy arrays: >>> import numpy as np >>> from mmappickle.dict import mmapdict >>> m = mmapdict('/tmp/test.mmdpickle') - >>> m['test'] = np.array([1,2,3],dtype=np.uint8) + >>> m['test'] = np.array([1,2,3], dtype=np.uint8) >>> m['test'][1] = 4 >>> print(m['test']) [1 4 3] >>> print(type(m['test'])) - + As you can see, the ``m['test']`` is now memory-mapped. This means that its content is not loaded in memory, but instead accessed directly from the file. Unfortunately, the array has to exist in order to serialize it to the ``mmapdict``. If the array exceed the available memory, this won't work. Instead one should use stubs: @@ -64,7 +64,7 @@ Unfortunately, the array has to exist in order to serialize it to the ``mmapdict :: >>> from mmappickle.stubs import EmptyNDArray - >>> m['test_large']=EmptyNDArray((300,300,300)) + >>> m['test_large'] = EmptyNDArray((300,300,300)) >>> print(type(m['test_large'])) @@ -78,7 +78,7 @@ Finally, one last useful trick is the :meth:`mmappickle.mmapdict.vacuum` method. >>> #Here, /tmp/test.mmdpickle still occupies ~216M of hard disk >>> m.vacuum() >>> #Now the disk space has been reclaimed. - + .. warning :: When running :meth:`mmappickle.mmapdict.vacuum`, it is crucial that there are no other references to the file content, either in this process or in other. diff --git a/mmappickle/dict.py b/mmappickle/dict.py index ecc8efb..ca098d1 100644 --- a/mmappickle/dict.py +++ b/mmappickle/dict.py @@ -1,17 +1,20 @@ import os -import io, pickle, struct +import io +import pickle +import struct import warnings import weakref -from .utils import * +from .utils import * + class _header: """The file header is at the beginning of the file. - + It consists in the following pickle ops: - + :: - + PROTO 4 (pickle version 4 header) FRAME BININT <_file_version_number:32> POP (version of the pickle dict, 1) @@ -22,24 +25,23 @@ class _header: _file_version_number = 1 _frame_length = 13 _commit_number_position = 18 - - - def __init__(self, mmapdict, _real_header_starts_at = 0): + + def __init__(self, mmapdict, _real_header_starts_at=0): """ :param mmapdict: mmapdict object containing the data :param _real_header_starts_at: Offset of the header (normally not used) - """ + """ self._mmapdict = weakref.ref(mmapdict) self._real_header_starts_at = _real_header_starts_at - - #Check if we have a valid header + + # Check if we have a valid header if not self.exists: self.write_initial() - + @property def _file(self): return self._mmapdict()._file - + @property @save_file_position def exists(self): @@ -51,7 +53,7 @@ def exists(self): if len(newvalue) == 0: return False return True - + @require_writable @save_file_position def write_initial(self): @@ -59,15 +61,14 @@ def write_initial(self): data = pickle.BININT + struct.pack(' POP") return False @@ -103,71 +104,71 @@ def is_valid(self): if file_version_number_read != self._file_version_number: warnings.warn("File has the wrong version number {} (should be {})".format(file_version_number_read, self._file_version_number)) return False - + if frame_contents[6] != pickle.BININT[0] or frame_contents[11] != pickle.POP[0]: warnings.warn("FRAME doesn't containt BININT POP") return False - + if frame_contents[-1] != pickle.MARK[0]: warnings.warn("FRAME doesn't end with a MARK") return False - + return True - + @property @save_file_position def commit_number(self): """Commit number (revision) in the file""" self._file.seek(self._real_header_starts_at + self._commit_number_position, io.SEEK_SET) return struct.unpack(' SHORT_BINUNICODE <<< data >>> BININT POP (max memo index of this part) NEWTRUE|POP POP (if NEWTRUE POP: entry is valid, else entry is deactivated.) - """ + """ + def __init__(self, mmapdict, offset): """ :param mmapdict: mmapdict object containing the data @@ -219,44 +222,44 @@ def __init__(self, mmapdict, offset): self._mmapdict = weakref.ref(mmapdict) self._offset = offset self._exists = self._exists_initial - #Cache for non-written entries + # Cache for non-written entries self._cache = { 'valid': True, - #key, data_length, memomaxidx + # key, data_length, memomaxidx } - + def __len__(self): """:returns: the length of the key-value data""" return self._frame_length + 9 - + @property def offset(self): """:returns: the offset in the file of the key-value data""" return self._offset - + @property def end_offset(self): """:returns: the end-offset in the file of the key-value data""" return self._offset + len(self) - + @property def _file(self): return self._mmapdict()._file - + @property @save_file_position def _frame_length(self): """ - + :returns: the frame length for this _kvdata. - + This is done either by reading it in the file, or by computing it if it doesn't exist""" - + if not self._exists: return 2 + self.key_length + self.data_length + 1 + 4 + 1 + 1 + 1 self._file.seek(self._offset + 1, io.SEEK_SET) return struct.unpack(' end_offset: print("Incomplete frame starting at {}".format(frame_start)) self._file.seek(frame_start) valid = False break - + if frame_id == 1: print("[header]") self._file.seek(frame_start + frame_length + 9, io.SEEK_SET) continue - + first_data = self._file.read(1) - + if first_data == pickle.DICT: self._file.seek(frame_start + frame_length + 9 - 1, io.SEEK_SET) terminator = self._file.read(1) @@ -797,22 +799,22 @@ def fsck(self): print("[terminator (invalid)]") self._file.seek(frame_start, io.SEEK_SET) break - + if first_data != pickle.SHORT_BINUNICODE: print("[Unknown stuff starting with {}]".format(first_data)) self._file.seek(frame_start, io.SEEK_SET) valid = False break - + key_length = self._file.read(1)[0] - + print("Frame [{}]".format(self._file.read(key_length).decode('utf8'))) self._file.seek(frame_start + frame_length + 9, io.SEEK_SET) - + self._file.truncate() self._terminator.write() return valid - + if __name__ == '__main__': import sys @@ -820,5 +822,5 @@ def fsck(self): if len(sys.argv) != 2: print("If run directly, this script requires exactly one argument, and converts it to the latest mmapdict format.") sys.exit(1) - + m = mmapdict(sys.argv[1]) diff --git a/mmappickle/picklers/__init__.py b/mmappickle/picklers/__init__.py index 2b057d0..d1fba81 100644 --- a/mmappickle/picklers/__init__.py +++ b/mmappickle/picklers/__init__.py @@ -8,5 +8,5 @@ __all__.append('ArrayPickler') __all__.append('MaskedArrayPickler') except ImportError: - #No numpy, just ignore what would not be loadable + # No numpy, just ignore what would not be loadable pass diff --git a/mmappickle/picklers/base.py b/mmappickle/picklers/base.py index 964869a..e2e48d3 100644 --- a/mmappickle/picklers/base.py +++ b/mmappickle/picklers/base.py @@ -3,76 +3,77 @@ import pickletools import weakref -from ..utils import * +from ..utils import * + class BasePickler: """Picklers will be attempted in decreasing priority order""" priority = 0 - + def __init__(self, parent_object): self._parent_object = weakref.ref(parent_object) - + @property def _file(self): return self._parent_object()._file - + @save_file_position def is_valid(self, offset, length): """ Return True if object starting at offset in f is valid. - + File position is kept. """ return False - + def is_picklable(self, obj): """ Return True if object can be pickled with this pickler """ return False - + @save_file_position def read(self, offset, length): """Return the unpickled object read from offset, and the length read. The file position is kept.""" raise NotImplementedError("Should be subclassed") - + @save_file_position - def write(self, obj, offset, memo_start_idx = 0): + def write(self, obj, offset, memo_start_idx=0): """ Write the pickled object to the file stream, the file position is kept. - + Returns a tuple (number of bytes, last memo index)""" raise NotImplementedError("Should be subclassed") - + def _pickle_load_fix(self, p): """Load a pickle object from p, adding the header and the terminator. Returns the object.""" - p = pickle.PROTO + struct.pack('= 256: raise ValueError("dtype length should be less than 256") self._file.seek(offset, io.SEEK_SET) retlength = 0 retlength += self._file.write(self._header) - - #Write a 64-bits long bytes string + + # Write a 64-bits long bytes string retlength += self._file.write(pickle.BINBYTES8) - #skip the (yet) unknown size + # skip the (yet) unknown size self._file.seek(8, io.SEEK_CUR) - - #Write to file + + # Write to file startpos = self._file.tell() obj.tofile(self._file) endpos = self._file.tell() - + retlength += (endpos - startpos) - - #Write length + + # Write length self._file.seek(startpos - 8, io.SEEK_SET) retlength += self._file.write(struct.pack('=3.4', - #Numpy is required to have memmap array, but it still makes sense to use this module - #without it, so it is not a requirement per-se. + # Numpy is required to have memmap array, but it still makes sense to use this module + # without it, so it is not a requirement per-se. 'install_requires': [] + platform_requirements, } diff --git a/tests/test_dict.py b/tests/test_dict.py index 08e8434..0177e42 100644 --- a/tests/test_dict.py +++ b/tests/test_dict.py @@ -12,45 +12,46 @@ from mmappickle.picklers.numpy import ArrayPickler, MaskedArrayPickler from mmappickle.stubs.numpy import EmptyNDArray + class TestDictBase(unittest.TestCase): def test_creation(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) self.assertTrue(m._header.is_valid()) - + def test_commit_number(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) self.assertEqual(m._header.commit_number, 0) self.assertTrue(m._header.is_valid()) - + m._header.commit_number = 32 self.assertEqual(m._header.commit_number, 32) self.assertTrue(m._header.is_valid()) - + m._header.commit_number = 465468 self.assertEqual(m._header.commit_number, 465468) self.assertTrue(m._header.is_valid()) - + with self.assertRaises(TypeError): m._header.commit_number = 'a' self.assertTrue(m._header.is_valid()) - + def test_valid_pickle(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) f.seek(0) - + d = pickle.load(f) self.assertDictEqual(d, {}) - + def test_destructor(self): import weakref with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) m_ref = weakref.ref(m) del m - + self.assertIsNone(m_ref(), "Reference to object is still valid, something is wrong (using object instance instead of weakref.ref?)") def test_nonexistent(self): @@ -58,120 +59,130 @@ def test_nonexistent(self): with tempfile.NamedTemporaryFile(delete=False) as f: f.close() os.unlink(f.name) - + with self.assertRaises(FileNotFoundError): - m = mmapdict(f.name, True, picklers = [GenericPickler]) - #This works and re-create the file - m = mmapdict(f.name, False, picklers = [GenericPickler]) - + m = mmapdict(f.name, True, picklers=[GenericPickler]) + # This works and re-create the file + m = mmapdict(f.name, False, picklers=[GenericPickler]) + del m os.unlink(f.name) - + def test_notafile(self): import os with self.assertRaises(TypeError): - d = mmapdict({}, picklers = [GenericPickler]) - - + d = mmapdict({}, picklers=[GenericPickler]) + def test_readonly(self): with tempfile.NamedTemporaryFile(delete=False) as f: f.close() - - m = mmapdict(f.name, picklers = [GenericPickler]) + + m = mmapdict(f.name, picklers=[GenericPickler]) m['test1'] = 234 self.assertTrue(m.writable) - - m2 = mmapdict(f.name, True, picklers = [GenericPickler]) + + m2 = mmapdict(f.name, True, picklers=[GenericPickler]) self.assertFalse(m2.writable) with self.assertRaises(io.UnsupportedOperation): - m2['test1'] = 123 + m2['test1'] = 123 with self.assertRaises(io.UnsupportedOperation): m2['test2'] = 123 with self.assertRaises(io.UnsupportedOperation): del m2['test1'] - + def test_convert(self): with tempfile.NamedTemporaryFile() as f: v = {'abc': 123} pickle.dump(v, f) f.flush() - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) assert dict(m) == v - + def test_put_opcodes(self): - import string, itertools + import string + import itertools d_vals = {} for s in itertools.product(string.ascii_letters, string.ascii_letters): s = ''.join(s) d_vals[s] = s - + with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) for i in range(5): m['test-{}'.format(i)] = d_vals for i in range(5): self.assertDictEqual(m['test-{}'.format(i)], d_vals) - - - - + def _test_bad_file(self, d, exc=None): with tempfile.NamedTemporaryFile(delete=False) as f: f.write(d) f.flush() f.close() - + if exc is not None: with self.assertRaises(exc): - m = mmapdict(f.name, picklers = [GenericPickler]) + m = mmapdict(f.name, picklers=[GenericPickler]) else: - m = mmapdict(f.name, picklers = [GenericPickler]) - + m = mmapdict(f.name, picklers=[GenericPickler]) + os.unlink(f.name) - + def test_bad_file_header_1(self): self._test_bad_file(b'\x80', ValueError) + def test_bad_file_header_2(self): self._test_bad_file(b'\x81\x04', ValueError) + def test_bad_file_header_3(self): self._test_bad_file(b'\x80\x03', ValueError) + def test_bad_file_header_4(self): self._test_bad_file(b'\x80\x04\x00', ValueError) + def test_bad_file_header_5(self): self._test_bad_file(b'\x80\x04\x95', ValueError) + def test_bad_file_header_6(self): self._test_bad_file(b'\x80\x04\x95\x00\x00\x00\x00\x00\x00\x00\x00', ValueError) + def test_bad_file_header_7(self): self._test_bad_file(b'\x80\x04\x95\x0d\x00\x00\x00\x00\x00\x00\x00', ValueError) + def test_bad_file_header_8(self): self._test_bad_file(b'\x80\x04\x95\x0d\x00\x00\x00\x00\x00\x00\x00111110111111111', ValueError) + def test_bad_file_header_9(self): self._test_bad_file(b'\x80\x04\x95\x0d\x00\x00\x00\x00\x00\x00\x00J11111111111111', ValueError) + def test_bad_file_header_10(self): self._test_bad_file(b'\x80\x04\x95\x0d\x00\x00\x00\x00\x00\x00\x00J11110111111111', ValueError) + def test_bad_file_header_11(self): self._test_bad_file(b'\x80\x04\x95\x0d\x00\x00\x00\x00\x00\x00\x00J\x01\x00\x00\x000X\x01\x00\x00\x0001', ValueError) + def test_bad_file_header_12(self): self._test_bad_file(b'\x80\x04\x95\x0d\x00\x00\x00\x00\x00\x00\x00J\x01\x00\x00\x000J\x01\x00\x00\x0011', ValueError) + def test_bad_file_header_13(self): self._test_bad_file(b'\x80\x04\x95\x0d\x00\x00\x00\x00\x00\x00\x00J\x01\x00\x00\x000J\x01\x00\x00\x0001', ValueError) - - + def test_bad_terminator(self): valid_header = b'\x80\x04\x95\x0d\x00\x00\x00\x00\x00\x00\x00J\x01\x00\x00\x000J\x01\x00\x00\x000(' + + class TestKvdata(unittest.TestCase): - #Since kvdata is fairly complex, it is tested individually - + # Since kvdata is fairly complex, it is tested individually + class DictMock: class TerminatorMock: def write(self): pass - + def __init__(self, file): self._file = file self._terminator = self.TerminatorMock() - + def test_cache(self): from mmappickle.dict import _kvdata with tempfile.TemporaryFile() as f: @@ -179,28 +190,28 @@ def test_cache(self): k = _kvdata(d, 0) k.data_length = 34 with self.assertRaises(ValueError): - k.data_length = -1 + k.data_length = -1 self.assertEqual(k.data_length, 34) - - k = _kvdata(d, 0) #restart + + k = _kvdata(d, 0) # restart k.memomaxidx = 1234 with self.assertRaises(ValueError): k.memomaxidx = -1 self.assertEqual(k.memomaxidx, 1234) - - k = _kvdata(d, 0) #restart + + k = _kvdata(d, 0) # restart k.key = "test" self.assertEqual(k.key, "test") - - k = _kvdata(d, 0) #restart + + k = _kvdata(d, 0) # restart with self.assertRaises(TypeError): k.valid = 1 - self.assertEqual(k.valid, True) #should be valid by default + self.assertEqual(k.valid, True) # should be valid by default k.valid = False self.assertEqual(k.valid, False) k.valid = True self.assertEqual(k.valid, True) - + def test_1(self): from mmappickle.dict import _kvdata with tempfile.TemporaryFile() as f: @@ -208,27 +219,27 @@ def test_1(self): k = _kvdata(d, 0) k.key = 'test' with self.assertRaises(ValueError): - k.data_length = -1 + k.data_length = -1 k.data_length = 10 k.memomaxidx = 5 - + self.assertEqual(k.key, 'test') self.assertEqual(k.data_length, 10) self.assertEqual(k.memomaxidx, 5) self.assertEqual(k.valid, True) - + + k._write_if_allowed() k._write_if_allowed() - k._write_if_allowed() - + k.valid = False self.assertEqual(k.key, 'test') self.assertEqual(k.data_length, 10) self.assertEqual(k.memomaxidx, 5) self.assertEqual(k.valid, False) - + k._write_if_allowed() k._write_if_allowed() - + def test_2(self): from mmappickle.dict import _kvdata with tempfile.TemporaryFile() as f: @@ -237,19 +248,18 @@ def test_2(self): k.valid = False k.key = 'test' with self.assertRaises(ValueError): - k.data_length = -1 + k.data_length = -1 k.data_length = 10 k.memomaxidx = 5 - - + self.assertEqual(k.key, 'test') self.assertEqual(k.data_length, 10) self.assertEqual(k.memomaxidx, 5) self.assertEqual(k.valid, False) - + k._write_if_allowed() k._write_if_allowed() - + def test_pickle(self): from mmappickle.dict import _kvdata with tempfile.TemporaryFile() as f: @@ -263,190 +273,192 @@ def test_pickle(self): k.memomaxidx = 0 f.seek(0, io.SEEK_END) f.write(pickle.DICT + pickle.STOP) - + f.seek(0, io.SEEK_SET) - self.assertDictEqual(pickle.load(f), {'test': True,}) - + self.assertDictEqual(pickle.load(f), {'test': True, }) + k.valid = False f.seek(0, io.SEEK_SET) self.assertDictEqual(pickle.load(f), {}) - + k.valid = True f.seek(0, io.SEEK_SET) - self.assertDictEqual(pickle.load(f), {'test': True,}) - + self.assertDictEqual(pickle.load(f), {'test': True, }) + with self.assertRaises(RuntimeError): k.data_length = 123 - + with self.assertRaises(AttributeError): k.data_offset = 123 - + with self.assertRaises(RuntimeError): k.key = 'ABC' - + with self.assertRaises(AttributeError): k.key_length = 123 - + with self.assertRaises(RuntimeError): k.memomaxidx = 123 - + + class TestDict(unittest.TestCase): def _dump_file(self, f): f.seek(0, io.SEEK_SET) pickletools.dis(f) - + def test_notpicklable(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = []) + m = mmapdict(f, picklers=[]) with self.assertRaises(TypeError): m['test'] = 'abc' - + def test_notreadable(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) m['test'] = 'abc' - - m = mmapdict(f, picklers = []) + + m = mmapdict(f, picklers=[]) with self.assertRaises(ValueError): m['test'] == 'abc' - + def test_nonexistentkey(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) with self.assertRaises(KeyError): del m['test'] with self.assertRaises(KeyError): m['test'] - + def test_empty(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) - + m = mmapdict(f, picklers=[GenericPickler]) + f.seek(0) d = pickle.load(f) self.assertDictEqual(d, {}) - + def test_store_simple(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) m['test'] = 'abc' with self.assertRaises(TypeError): m[1] = 'aaa' - + self.assertEqual(m['test'], 'abc') d = pickle.load(f) - self.assertDictEqual(d, {'test': 'abc',}) - + self.assertDictEqual(d, {'test': 'abc', }) + def test_readonly(self): with tempfile.NamedTemporaryFile(delete=False) as f: f.close() - - m = mmapdict(f.name, picklers = [GenericPickler]) + + m = mmapdict(f.name, picklers=[GenericPickler]) m['test'] = 'abc' self.assertEqual(m['test'], 'abc') del m - - - m2 = mmapdict(f.name, readonly = True, picklers = [GenericPickler]) + + m2 = mmapdict(f.name, readonly=True, picklers=[GenericPickler]) self.assertEqual(m2['test'], 'abc') del m2 - + os.unlink(f.name) - + def test_store_ref(self): with tempfile.TemporaryFile() as f: obj = "1234" obj2 = "machin" - dict_a = {obj: obj, '3': obj,} - dict_b = {obj: obj2, obj2: obj2, 2: 4, 4: obj,} + dict_a = {obj: obj, '3': obj, } + dict_b = {obj: obj2, obj2: obj2, 2: 4, 4: obj, } m = mmapdict(f) m['dict_a'] = dict_a m['dict_b'] = dict_b - + self.assertEqual(m['dict_a'], dict_a) self.assertEqual(m['dict_b'], dict_b) d = pickle.load(f) - self.assertDictEqual(d, {'dict_a': dict_a, 'dict_b': dict_b,}) - + self.assertDictEqual(d, {'dict_a': dict_a, 'dict_b': dict_b, }) + import collections od = collections.OrderedDict() od['obj'] = obj od['obj2'] = obj2 od[obj] = 3 m['od'] = od - - #This should not fail, but will have no effect + + # This should not fail, but will have no effect m['od']['machin'] = 'abc' - + self.assertEqual(m['od'], od) f.seek(0, io.SEEK_SET) d = pickle.load(f) - + m['od'] = 'abc' - + self.assertEqual(m['od'], 'abc') - + f.seek(0, io.SEEK_SET) - d = pickle.load(f) + d = pickle.load(f) self.assertEqual(d['od'], 'abc') - - #self._dump_file(f) - + + # self._dump_file(f) + def test_delitem(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) m['test'] = 'abc' self.assertEqual(m['test'], 'abc') - + del m['test'] with self.assertRaises(KeyError): print(m['test']) - - #reset + + # reset m = mmapdict(f) with self.assertRaises(KeyError): print(m['test']) - + f.seek(0, io.SEEK_SET) d = pickle.load(f) self.assertDictEqual(d, {}) - + + class TestDictNumpyArray(unittest.TestCase): def _dump_file(self, f): f.seek(0, io.SEEK_SET) - pickletools.dis(f) + pickletools.dis(f) + def test_store_simple(self): with tempfile.TemporaryFile() as f: data = numpy.array([[1, 2, 3], [4, 5, 6]]) - m = mmapdict(f, picklers = [ArrayPickler, GenericPickler]) + m = mmapdict(f, picklers=[ArrayPickler, GenericPickler]) m['test'] = data self.assertIsInstance(m['test'], numpy.memmap) numpy.testing.assert_array_equal(m['test'], data) f.seek(0) d = pickle.load(f) numpy.testing.assert_array_equal(d['test'], data) - + def test_store_empty(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [ArrayPickler]) - m['test'] = EmptyNDArray((10, 9), dtype = numpy.float32) - + m = mmapdict(f, picklers=[ArrayPickler]) + m['test'] = EmptyNDArray((10, 9), dtype=numpy.float32) + self.assertIsInstance(m['test'], numpy.memmap) self.assertEqual(m['test'].shape, (10, 9)) - + def test_store_dims(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [ArrayPickler]) + m = mmapdict(f, picklers=[ArrayPickler]) for i in range(1, 9): - m['test{}'.format(i)] = numpy.zeros(tuple([1]*i), dtype = numpy.float32) - + m['test{}'.format(i)] = numpy.zeros(tuple([1]*i), dtype=numpy.float32) + for i in range(1, 9): self.assertEqual(m['test{}'.format(i)].ndim, i) - + def test_store_masked(self): with tempfile.TemporaryFile() as f: data = numpy.ma.MaskedArray([[1, 2, 3], [4, 5, 6]], [[False, True, False], [True, False, True]]) - m = mmapdict(f, picklers = [MaskedArrayPickler]) + m = mmapdict(f, picklers=[MaskedArrayPickler]) m['test'] = data self.assertIsInstance(m['test'].data, numpy.memmap) self.assertIsInstance(m['test'].mask, numpy.memmap) @@ -454,12 +466,12 @@ def test_store_masked(self): f.seek(0) d = pickle.load(f) numpy.testing.assert_array_equal(d['test'], data) - + def test_masked_bug(self): with tempfile.TemporaryFile() as f: data = numpy.random.rand(100) data = numpy.ma.masked_invalid(data) - m = mmapdict(f, picklers = [MaskedArrayPickler]) + m = mmapdict(f, picklers=[MaskedArrayPickler]) m['test'] = data self.assertIsInstance(m['test'].data, numpy.memmap) self.assertIsInstance(m['test'].mask, numpy.memmap) @@ -467,81 +479,85 @@ def test_masked_bug(self): f.seek(0) d = pickle.load(f) numpy.testing.assert_array_equal(d['test'], data) - + def test_masked_empty(self): with tempfile.TemporaryFile() as f: data = numpy.ma.zeros([2, 3], dtype=numpy.int64) - m = mmapdict(f, picklers = [MaskedArrayPickler, ArrayPickler, GenericPickler]) - m['test'] = data + m = mmapdict(f, picklers=[MaskedArrayPickler, ArrayPickler, GenericPickler]) + m['test'] = data self.assertIsInstance(m['test'], numpy.ma.masked_array) self.assertIsInstance(m['test'].data, numpy.memmap) - self.assertIsInstance(m['test'].mask, numpy.memmap) + self.assertIsInstance(m['test'].mask, numpy.memmap) numpy.testing.assert_array_equal(m['test'], data) d = pickle.load(f) numpy.testing.assert_array_equal(d['test'], data) - + def test_readonly(self): with tempfile.NamedTemporaryFile(delete=False) as f: f.close() - - m = mmapdict(f.name, picklers = [ArrayPickler]) - m['test'] = numpy.zeros((1, ), dtype = numpy.float32) - - m2 = mmapdict(f.name, True, picklers = [ArrayPickler]) + + m = mmapdict(f.name, picklers=[ArrayPickler]) + m['test'] = numpy.zeros((1, ), dtype=numpy.float32) + + m2 = mmapdict(f.name, True, picklers=[ArrayPickler]) with self.assertRaises(ValueError): m2['test'][0] = 2 - + del m del m2 os.unlink(f.name) - + def _tc_increment(args): m, idx = args m['value'][idx] += 1 - + + class TestConcurrent(unittest.TestCase): def test_concurrent_1(self): with tempfile.NamedTemporaryFile(delete=False) as f: f.close() - + m = mmapdict(f.name) m['value'] = numpy.zeros((4, ), numpy.float) - - import multiprocessing, itertools + + import multiprocessing + import itertools with multiprocessing.Pool(4) as p: p.map(_tc_increment, itertools.product([m], range(4))) - + self.assertEqual(m['value'][0], 1) self.assertEqual(m['value'][1], 1) self.assertEqual(m['value'][2], 1) self.assertEqual(m['value'][3], 1) - + del m - + os.unlink(f.name) - + def test_concurrent_mmapdict_pickle(self): - #This is not a real test, but it fixes the converage computation since the previous test in not counted + # This is not a real test, but it fixes the converage computation since the previous test in not counted with tempfile.NamedTemporaryFile(delete=False) as f: f.close() - + m = mmapdict(f.name) - + m2 = pickle.loads(pickle.dumps(m)) - + del m del m2 - + os.unlink(f.name) + class TestVacuum(unittest.TestCase): def _dump_file(self, f): f.seek(0, io.SEEK_SET) - pickletools.dis(f) + pickletools.dis(f) + def test_vacuum(self): with tempfile.TemporaryFile() as f: - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) m['a'] = 1 m['b'] = 2 m['c'] = 3 @@ -550,49 +566,51 @@ def test_vacuum(self): m['f'] = 6 m['g'] = 7 m['h'] = 8 - + f.seek(0, io.SEEK_END) fsizebefore = f.tell() - + del m['b'] del m['d'] del m['f'] del m['g'] - + f.seek(0, io.SEEK_SET) valid_dict = pickle.load(f) - + self.assertDictEqual(dict(m), valid_dict) - + f.seek(0, io.SEEK_END) fsizeafterdel = f.tell() - + self.assertEqual(fsizebefore, fsizeafterdel) old_commit_number = m.commit_number - + m.vacuum() - + f.seek(0, io.SEEK_END) fsizeaftervacuum = f.tell() - + self.assertNotEqual(fsizebefore, fsizeaftervacuum) self.assertNotEqual(old_commit_number, m.commit_number) self.assertDictEqual(dict(m), valid_dict) - + m.commit_number = 143 m.vacuum() self.assertEqual(143, m.commit_number) - - #We have to do something otherwise the vacuum won't do anything + + # We have to do something otherwise the vacuum won't do anything del m['a'] m.commit_number = 0 m.vacuum() self.assertNotEqual(0, m.commit_number) - + + class TestConvert(unittest.TestCase): def _dump_file(self, f): f.seek(0, io.SEEK_SET) - pickletools.dis(f) + pickletools.dis(f) + def test_convert(self): with tempfile.TemporaryFile() as f: d = { @@ -601,66 +619,63 @@ def test_convert(self): 'c': 'test', } pickle.dump(d, f) - - m = mmapdict(f, picklers = [GenericPickler]) + + m = mmapdict(f, picklers=[GenericPickler]) self.assertDictEqual(dict(m), d) - + def test_convert_not_possible(self): with tempfile.TemporaryFile() as f: d = 'abc' pickle.dump(d, f) - + with self.assertRaises(ValueError): - m = mmapdict(f, picklers = [GenericPickler]) + m = mmapdict(f, picklers=[GenericPickler]) - def test_broken(self): with tempfile.TemporaryFile() as f: m = mmapdict(f) m['a'] = 1 - #self.assertTrue(m.fsck()) + # self.assertTrue(m.fsck()) m['b'] = (1, 2, 3) m['c'] = 'test' - + f.seek(0, io.SEEK_END) - original_size = f.tell() + original_size = f.tell() original_dict = dict(m) - - #self.assertTrue(m.fsck()) - + + # self.assertTrue(m.fsck()) + f.seek(0, io.SEEK_END) - + self.assertDictEqual(original_dict, dict(m)) - self.assertEqual(original_size, f.tell()) + self.assertEqual(original_size, f.tell()) for i in range(1, 13): del m - + f.truncate(original_size-i) - + m = mmapdict(f) m.fsck() - + f.seek(0, io.SEEK_END) - + self.assertDictEqual(original_dict, dict(m)) self.assertEqual(original_size, f.tell()) - - #We loose one key... + + # We loose one key... del original_dict['c'] for i in range(14, 20): del m - + f.truncate(original_size-i) - + m = mmapdict(f) m.fsck() - + f.seek(0, io.SEEK_END) - + self.assertDictEqual(original_dict, dict(m)) - - - - + + if __name__ == '__main__': unittest.main() diff --git a/tests/test_module.py b/tests/test_module.py index 99bb5a1..b7b1ae0 100644 --- a/tests/test_module.py +++ b/tests/test_module.py @@ -1,23 +1,26 @@ import unittest import re + class TestModule(unittest.TestCase): def test_version_is_canonical(self): import mmappickle - #Regex from PEP-440 - self.assertIsNotNone(re.match(r'^([1-9]\d*!)?(0|[1-9]\d*)(\.(0|[1-9]\d*))*((a|b|rc)(0|[1-9]\d*))?(\.post(0|[1-9]\d*))?(\.dev(0|[1-9]\d*))?$', mmappickle.__version__)) - + # Regex from PEP-440 + self.assertIsNotNone( + re.match(r'^([1-9]\d*!)?(0|[1-9]\d*)(\.(0|[1-9]\d*))*((a|b|rc)(0|[1-9]\d*))?(\.post(0|[1-9]\d*))?(\.dev(0|[1-9]\d*))?$', mmappickle.__version__)) + def test_picklersdiscovery(self): from mmappickle.dict import mmapdict import tempfile from mmappickle.picklers import GenericPickler + class TestPickler(GenericPickler): pass - + with tempfile.TemporaryFile() as f: m = mmapdict(f) - self.assertIn(True, [x.__class__.__name__ == 'TestPickler' for x in m._picklers]) + self.assertIn(True, [x.__class__.__name__ == 'TestPickler' for x in m._picklers]) + if __name__ == '__main__': unittest.main() - diff --git a/tests/test_without_numpy.py b/tests/test_without_numpy.py index 6e837eb..f17f765 100644 --- a/tests/test_without_numpy.py +++ b/tests/test_without_numpy.py @@ -2,6 +2,7 @@ import re import sys + class NumpyFailsToLoadImporter: def find_module(self, fullname, path=None): if fullname.startswith('numpy.') or fullname == 'numpy': @@ -10,17 +11,18 @@ def find_module(self, fullname, path=None): def load_module(self, fullname): raise ImportError(fullname) - + + sys.meta_path.insert(0, NumpyFailsToLoadImporter()) class TestModule(unittest.TestCase): def test_import_without_numpy(self): - #For some reason, this test doesn't work in coverage mode... - #(i.e. execution paths are not followed) - import mmappickle, mmappickle.stubs + # For some reason, this test doesn't work in coverage mode... + # (i.e. execution paths are not followed) + import mmappickle + import mmappickle.stubs + if __name__ == '__main__': unittest.main() - -