API: zipped -> compress

I might want to use algorithms unrelated to 'zip' one day.
joblib · Dec 28, 2011 · 5a243eb · 5a243eb
1 parent fc22b70
commit 5a243eb
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 33 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -5,6 +5,12 @@ Latest changes
 Release 0.5.7
 ---------------
 
+Gael varoquaux
+2011-12-26
+
+    ENH: Add a zipped option to Memory
+    API: Memory no longer accepts save_npy
+
 Kenneth C. Arnold and Gael varoquaux
 2011-12-22
 

diff --git a/joblib/memory.py b/joblib/memory.py
@@ -96,8 +96,8 @@ class MemorizedFunc(Logger):
             The memmapping mode used when loading from cache
             numpy arrays. See numpy.load for the meaning of the
             arguments.
-        zipped: boolean
-            Whether to zip the stored data on disk. Note that zipped
+        compress: boolean
+            Whether to zip the stored data on disk. Note that compressed
             arrays cannot be read by memmapping.
         verbose: int, optional
             The verbosity flag, controls messages that are issued as
@@ -108,7 +108,7 @@ class MemorizedFunc(Logger):
     #-------------------------------------------------------------------------
 
     def __init__(self, func, cachedir, ignore=None, mmap_mode=None,
-                 zipped=False, verbose=1, timestamp=None):
+                 compress=False, verbose=1, timestamp=None):
         """
             Parameters
             ----------
@@ -134,9 +134,9 @@ def __init__(self, func, cachedir, ignore=None, mmap_mode=None,
         self.cachedir = cachedir
         self.func = func
         self.mmap_mode = mmap_mode
-        self.zipped = zipped
-        if zipped and mmap_mode is not None:
-            warnings.warn('Zipped results cannot be memmapped',
+        self.compress = compress
+        if compress and mmap_mode is not None:
+            warnings.warn('Compressed results cannot be memmapped',
                           stacklevel=2)
         if timestamp is None:
             timestamp = time.time()
@@ -190,7 +190,7 @@ def __reduce__(self):
             In addition, when unpickling, we run the __init__
         """
         return (self.__class__, (self.func, self.cachedir, self.ignore,
-                self.mmap_mode, self.zipped, self._verbose))
+                self.mmap_mode, self.compress, self._verbose))
 
     #-------------------------------------------------------------------------
     # Private interface
@@ -368,7 +368,7 @@ def _persist_output(self, output, dir):
         try:
             mkdirp(dir)
             filename = os.path.join(dir, 'output.pkl')
-            numpy_pickle.dump(output, filename, zipped=self.zipped)
+            numpy_pickle.dump(output, filename, compress=self.compress)
         except OSError:
             " Race condition in the creation of the directory "
 
@@ -437,7 +437,7 @@ class Memory(Logger):
     # Public interface
     #-------------------------------------------------------------------------
 
-    def __init__(self, cachedir, mmap_mode=None, zipped=False, verbose=1):
+    def __init__(self, cachedir, mmap_mode=None, compress=False, verbose=1):
         """
             Parameters
             ----------
@@ -449,9 +449,9 @@ def __init__(self, cachedir, mmap_mode=None, zipped=False, verbose=1):
                 The memmapping mode used when loading from cache
                 numpy arrays. See numpy.load for the meaning of the
                 arguments.
-            zipped: boolean
-                Whether to zip the stored data on disk. Note that zipped
-                arrays cannot be read by memmapping.
+            compress: boolean
+                Whether to zip the stored data on disk. Note that
+                compressed arrays cannot be read by memmapping.
             verbose: int, optional
                 Verbosity flag, controls the debug messages that are issued
                 as functions are revaluated.
@@ -461,9 +461,9 @@ def __init__(self, cachedir, mmap_mode=None, zipped=False, verbose=1):
         self._verbose = verbose
         self.mmap_mode = mmap_mode
         self.timestamp = time.time()
-        self.zipped = zipped
-        if zipped and mmap_mode is not None:
-            warnings.warn('Zipped results cannot be memmapped',
+        self.compress = compress
+        if compress and mmap_mode is not None:
+            warnings.warn('Compressed results cannot be memmapped',
                           stacklevel=2)
         if cachedir is None:
             self.cachedir = None
@@ -513,7 +513,7 @@ def cache(self, func=None, ignore=None, verbose=None,
         return MemorizedFunc(func, cachedir=self.cachedir,
                                    mmap_mode=mmap_mode,
                                    ignore=ignore,
-                                   zipped=self.zipped,
+                                   compress=self.compress,
                                    verbose=verbose,
                                    timestamp=self.timestamp)
 
@@ -554,4 +554,4 @@ def __reduce__(self):
         """
         # We need to remove 'joblib' from the end of cachedir
         return (self.__class__, (self.cachedir[:-7],
-                self.mmap_mode, self.zipped, self._verbose))
+                self.mmap_mode, self.compress, self._verbose))
diff --git a/joblib/numpy_pickle.py b/joblib/numpy_pickle.py
@@ -168,7 +168,7 @@ def _open_file(self, name):
 ###############################################################################
 # Utility functions
 
-def dump(value, filename, zipped=False):
+def dump(value, filename, compress=False):
     """ Persist an arbitrary Python object into a filename, with numpy arrays
         saved as separate .npy files.
 
@@ -178,25 +178,25 @@ def dump(value, filename, zipped=False):
             The object to store to disk
         filename: string
             The name of the file in which it is to be stored
-        zipped: boolean, optional
+        compress: boolean, optional
             Whether to compress the data on the disk or not
 
         Returns
         -------
         filenames: list of strings
-            The list of file names in which the data is stored. If zipped
-            is false, each array is stored in a different file.
+            The list of file names in which the data is stored. If
+            compress is false, each array is stored in a different file.
 
         See Also
         --------
         joblib.load : corresponding loader
 
         Notes
         -----
-        zipped file take extra disk space during the dump, and extra
+        compressed files take extra disk space during the dump, and extra
         memory during the loading.
     """
-    if zipped:
+    if compress:
         return _dump_zipped(value, filename)
     else:
         return _dump(value, filename)
@@ -249,8 +249,8 @@ def load(filename, mmap_mode=None):
             The name of the file from which to load the object
         mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
             If not None, the arrays are memory-mapped from the disk. This
-            mode has not effect for zipped files. Note that in this
-            case the reconstructed object might not longer match exactly 
+            mode has not effect for compressed files. Note that in this
+            case the reconstructed object might not longer match exactly
             the originally pickled object.
 
         Returns

diff --git a/joblib/test/test_memory.py b/joblib/test/test_memory.py
@@ -100,8 +100,8 @@ def f(l):
         yield test
 
     # Now test clearing
-    for zipped in (False, True):
-        memory = Memory(cachedir=env['dir'], verbose=0, zipped=zipped)
+    for compress in (False, True):
+        memory = Memory(cachedir=env['dir'], verbose=0, compress=compress)
         # First clear the cache directory, to check that our code can
         # handle that
         # NOTE: this line would raise an exception, as the database file is still

diff --git a/joblib/test/test_numpy_pickle.py b/joblib/test/test_numpy_pickle.py
@@ -113,9 +113,9 @@ def test_standard_types():
     #""" Test pickling and saving with standard types.
     #"""
     filename = env['filename']
-    for zipped in [True, False]:
+    for compress in [True, False]:
         for member in typelist:
-            numpy_pickle.dump(member, filename, zipped=zipped)
+            numpy_pickle.dump(member, filename, compress=compress)
             _member = numpy_pickle.load(filename)
             # We compare the pickled instance to the reloaded one only if it
             # can be compared to a copied one
@@ -127,10 +127,10 @@ def test_standard_types():
 def test_numpy_persistence():
     filename = env['filename']
     a = np.random.random(10)
-    for zipped in [True, False]:
+    for compress in [True, False]:
         for obj in (a,), (a, a), [a, a, a]:
-            filenames = numpy_pickle.dump(obj, filename, zipped=zipped)
-            if not zipped:
+            filenames = numpy_pickle.dump(obj, filename, compress=compress)
+            if not compress:
                 # Check that one file was created per array
                 yield nose.tools.assert_equal, len(filenames), len(obj) + 1
                 # Check that these files do exist
@@ -151,7 +151,7 @@ def test_numpy_persistence():
 
         # Now test with array subclasses
         obj = np.matrix(np.zeros(10))
-        filenames = numpy_pickle.dump(obj, filename, zipped=zipped)
+        filenames = numpy_pickle.dump(obj, filename, compress=compress)
         obj_ = numpy_pickle.load(filename)
         yield nose.tools.assert_true, isinstance(obj_, np.matrix)