Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 423 lines (361 sloc) 15.333 kb
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
1 """
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
2 Utilities for fast persistence of big data, with optional compression.
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
3 """
4
764d761 @rgommers STY: PEP8 cleanup.
rgommers authored
5 # Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
6 # Copyright (c) 2009 Gael Varoquaux
7 # License: BSD Style, 3 clauses.
8
9 import pickle
10 import traceback
764d761 @rgommers STY: PEP8 cleanup.
rgommers authored
11 import sys
12 import os
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
13 import zlib
dbcf1e6 @GaelVaroquaux ENH: same function to dump/load zipped + unzipped
GaelVaroquaux authored
14 import warnings
78289c9 @fabianp Py3K compatibility.
fabianp authored
15
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
16 if sys.version_info[0] >= 3:
6ee10bc @GaelVaroquaux BUG: fix python 2.5/3 incompatibilities
GaelVaroquaux authored
17 from io import BytesIO
78289c9 @fabianp Py3K compatibility.
fabianp authored
18 from pickle import _Unpickler as Unpickler
690a604 @amueller COSMIT pep8
amueller authored
19
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
20 def asbytes(s):
21 if isinstance(s, bytes):
22 return s
23 return s.encode('latin1')
78289c9 @fabianp Py3K compatibility.
fabianp authored
24 else:
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
25 try:
26 from io import BytesIO
27 except ImportError:
28 # BytesIO has been added in Python 2.5
29 from cStringIO import StringIO as BytesIO
78289c9 @fabianp Py3K compatibility.
fabianp authored
30 from pickle import Unpickler
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
31 asbytes = str
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
32
690a604 @amueller COSMIT pep8
amueller authored
33 _MEGA = 2 ** 20
34 _MAX_LEN = len(hex(2 ** 64))
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
35
36 # To detect file types
37 _ZFILE_PREFIX = asbytes('ZF')
4b718d1 @GaelVaroquaux ENH: add zip-based pickling
GaelVaroquaux authored
38
690a604 @amueller COSMIT pep8
amueller authored
39
764d761 @rgommers STY: PEP8 cleanup.
rgommers authored
40 ###############################################################################
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
41 # Compressed file with Zlib
42
43 def _read_magic(file_handle):
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
44 """ Utility to check the magic signature of a file identifying it as a
45 Zfile
46 """
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
47 magic = file_handle.read(len(_ZFILE_PREFIX))
48 # Pickling needs file-handles at the beginning of the file
49 file_handle.seek(0)
50 return magic
51
52
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
53 def read_zfile(file_handle):
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
54 """Read the z-file and return the content as a string
55
56 Z-files are raw data compressed with zlib used internally by joblib
57 for persistence. Backward compatibility is not garantied. Do not
58 use for external purposes.
59 """
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
60 file_handle.seek(0)
61 assert _read_magic(file_handle) == _ZFILE_PREFIX, \
62 "File does not have the right magic"
63 length = file_handle.read(len(_ZFILE_PREFIX) + _MAX_LEN)
64 length = length[len(_ZFILE_PREFIX):]
65 length = int(length, 16)
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
66 # We use the known length of the data to tell Zlib the size of the
67 # buffer to allocate.
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
68 data = zlib.decompress(file_handle.read(), 15, length)
69 assert len(data) == length, (
70 "Incorrect data length while decompressing %s."
71 "The file could be corrupted." % file_handle)
72 return data
73
74
75 def write_zfile(file_handle, data, compress=1):
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
76 """Write the data in the given file as a Z-file.
77
78 Z-files are raw data compressed with zlib used internally by joblib
79 for persistence. Backward compatibility is not garantied. Do not
80 use for external purposes.
81 """
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
82 file_handle.write(_ZFILE_PREFIX)
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
83 length = hex(len(data))
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
84 if sys.version_info[0] < 3 and type(length) is long:
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
85 # We need to remove the trailing 'L' in the hex representation
86 length = length[:-1]
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
87 # Store the length of the data
88 file_handle.write(length.ljust(_MAX_LEN))
89 file_handle.write(zlib.compress(data, compress))
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
90
91
92 ###############################################################################
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
93 # Utility objects for persistence.
94
95 class NDArrayWrapper(object):
96 """ An object to be persisted instead of numpy arrays.
97
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
98 The only thing this object does, is to carrus the filename in wich
99 the array has been persisted, and the array subclass.
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
100 """
101 def __init__(self, filename, subclass):
102 "Store the useful information for later"
103 self.filename = filename
104 self.subclass = subclass
105
106 def read(self, unpickler):
107 "Reconstruct the array"
108 filename = os.path.join(unpickler._dirname, self.filename)
109 # Load the array from the disk
110 if unpickler.np.__version__ >= '1.3':
111 array = unpickler.np.load(filename,
112 mmap_mode=unpickler.mmap_mode)
113 else:
114 # Numpy does not have mmap_mode before 1.3
115 array = unpickler.np.load(filename)
7d2f268 @GaelVaroquaux FIX: ensure compatibility with old numpy
GaelVaroquaux authored
116 # Reconstruct subclasses. This does not work with old
117 # versions of numpy
118 if (hasattr(array, '__array_prepare__')
119 and not self.subclass in (unpickler.np.ndarray,
120 unpickler.np.memmap)):
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
121 # We need to reconstruct another subclass
122 new_array = unpickler.np.core.multiarray._reconstruct(
123 self.subclass, (0,), 'b')
124 new_array.__array_prepare__(array)
125 array = new_array
126 return array
127
128
0934f6e @GaelVaroquaux BUG: compress out-of-pickle arrays
GaelVaroquaux authored
129 class ZNDArrayWrapper(NDArrayWrapper):
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
130 """An object to be persisted instead of numpy arrays.
131
132 This object store the Zfile filename in wich
133 the data array has been persisted, and the meta information to
134 retrieve it.
135
136 The reason that we store the raw buffer data of the array and
137 the meta information, rather than array representation routine
138 (tostring) is that it enables us to use completely the strided
139 model to avoid memory copies (a and a.T store as fast). In
140 addition saving the heavy information separately can avoid
141 creating large temporary buffers when unpickling data with
142 large arrays.
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
143 """
144 def __init__(self, filename, init_args, state):
145 "Store the useful information for later"
146 self.filename = filename
147 self.state = state
148 self.init_args = init_args
149
150 def read(self, unpickler):
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
151 "Reconstruct the array from the meta-information and the z-file"
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
152 # Here we a simply reproducing the unpickling mechanism for numpy
153 # arrays
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
154 filename = os.path.join(unpickler._dirname, self.filename)
155 array = unpickler.np.core.multiarray._reconstruct(*self.init_args)
0934f6e @GaelVaroquaux BUG: compress out-of-pickle arrays
GaelVaroquaux authored
156 data = read_zfile(open(filename, 'rb'))
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
157 state = self.state + (data,)
0934f6e @GaelVaroquaux BUG: compress out-of-pickle arrays
GaelVaroquaux authored
158 array.__setstate__(state)
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
159 return array
160
161
162 ###############################################################################
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
163 # Pickler classes
164
165 class NumpyPickler(pickle.Pickler):
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
166 """A pickler to persist of big data efficiently.
167
168 The main features of this object are:
169
170 * persistence of numpy arrays in separate .npy files, for which
171 I/O is fast.
172
173 * optional compression using Zlib, with a special care on avoid
174 temporaries.
103d9c4 @GaelVaroquaux ENH: Improve test coverage, and handling of misc cases.
GaelVaroquaux authored
175 """
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
176
f4aa261 @GaelVaroquaux MISC: Tweek defaults
GaelVaroquaux authored
177 def __init__(self, filename, compress=0, cache_size=100):
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
178 self._filename = filename
179 self._filenames = [filename, ]
0934f6e @GaelVaroquaux BUG: compress out-of-pickle arrays
GaelVaroquaux authored
180 self.cache_size = cache_size
933918e @GaelVaroquaux ENH: pickling handles compression level
GaelVaroquaux authored
181 self.compress = compress
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
182 if not self.compress:
183 self.file = open(filename, 'wb')
184 else:
185 self.file = BytesIO()
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
186 # Count the number of npy files that we have created:
187 self._npy_counter = 0
fb73a21 @GaelVaroquaux TEST: Make the tests run.
GaelVaroquaux authored
188 pickle.Pickler.__init__(self, self.file,
189 protocol=pickle.HIGHEST_PROTOCOL)
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
190 # delayed import of numpy, to avoid tight coupling
e2e5eae @GaelVaroquaux ENH: pickle even without numpy
GaelVaroquaux authored
191 try:
192 import numpy as np
193 except ImportError:
194 np = None
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
195 self.np = np
196
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
197 def _write_array(self, array, filename):
198 if not self.compress:
199 self.np.save(filename, array)
200 container = NDArrayWrapper(os.path.basename(filename),
201 type(array))
202 else:
203 filename += '.z'
204 # Efficient compressed storage:
205 # The meta data is stored in the container, and the core
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
206 # numerics in a z-file
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
207 _, init_args, state = array.__reduce__()
208 # the last entry of 'state' is the data itself
912edc0 @GaelVaroquaux BUG: fix numpy_pickle huge bugs
GaelVaroquaux authored
209 zfile = open(filename, 'wb')
210 write_zfile(zfile, state[-1],
211 compress=self.compress)
212 zfile.close()
213 state = state[:-1]
214 container = ZNDArrayWrapper(os.path.basename(filename),
215 init_args, state)
0934f6e @GaelVaroquaux BUG: compress out-of-pickle arrays
GaelVaroquaux authored
216 return container, filename
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
217
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
218 def save(self, obj):
219 """ Subclass the save method, to save ndarray subclasses in npy
6665add @kcarnold Cosmetic: fix typos
kcarnold authored
220 files, rather than pickling them. Of course, this is a
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
221 total abuse of the Pickler class.
222 """
25a8672 @GaelVaroquaux MISC: take in account @kcarnold's rmks
GaelVaroquaux authored
223 if self.np is not None and type(obj) in (self.np.ndarray,
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
224 self.np.matrix, self.np.memmap):
225 size = obj.size * obj.itemsize
0934f6e @GaelVaroquaux BUG: compress out-of-pickle arrays
GaelVaroquaux authored
226 if self.compress and size < self.cache_size * _MEGA:
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
227 # When compressing, as we are not writing directly to the
228 # disk, it is more efficient to use standard pickling
229 if type(obj) is self.np.memmap:
230 # Pickling doesn't work with memmaped arrays
231 obj = self.np.asarray(obj)
232 return pickle.Pickler.save(self, obj)
a43124a @kcarnold Only use NDArrayWrapper for exact instances of ndarray.
kcarnold authored
233 self._npy_counter += 1
98e31ee @GaelVaroquaux ENH: fast pickling for memmap and matrix
GaelVaroquaux authored
234 try:
235 filename = '%s_%02i.npy' % (self._filename,
236 self._npy_counter)
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
237 # This converts the array in a container
0934f6e @GaelVaroquaux BUG: compress out-of-pickle arrays
GaelVaroquaux authored
238 obj, filename = self._write_array(obj, filename)
25a8672 @GaelVaroquaux MISC: take in account @kcarnold's rmks
GaelVaroquaux authored
239 self._filenames.append(filename)
98e31ee @GaelVaroquaux ENH: fast pickling for memmap and matrix
GaelVaroquaux authored
240 except:
241 self._npy_counter -= 1
242 # XXX: We should have a logging mechanism
243 print 'Failed to save %s to .npy file:\n%s' % (
244 type(obj),
245 traceback.format_exc())
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
246 return pickle.Pickler.save(self, obj)
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
247
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
248 def close(self):
249 if self.compress:
b6f082c @GaelVaroquaux BUG: file semantic to work under Windows
GaelVaroquaux authored
250 zfile = open(self._filename, 'wb')
251 write_zfile(zfile,
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
252 self.file.getvalue(), self.compress)
912edc0 @GaelVaroquaux BUG: fix numpy_pickle huge bugs
GaelVaroquaux authored
253 zfile.close()
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
254
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
255
78289c9 @fabianp Py3K compatibility.
fabianp authored
256 class NumpyUnpickler(Unpickler):
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
257 """A subclass of the Unpickler to unpickle our numpy pickles.
2071c60 @GaelVaroquaux ENH: Add memmap persistence to the numpy pickler.
GaelVaroquaux authored
258 """
78289c9 @fabianp Py3K compatibility.
fabianp authored
259 dispatch = Unpickler.dispatch.copy()
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
260
454de88 @GaelVaroquaux ENH: keep the file handles around
GaelVaroquaux authored
261 def __init__(self, filename, file_handle, mmap_mode=None):
283c2b4 @GaelVaroquaux ENH: refactor unpickler for fast unziper
GaelVaroquaux authored
262 self._filename = os.path.basename(filename)
764d761 @rgommers STY: PEP8 cleanup.
rgommers authored
263 self._dirname = os.path.dirname(filename)
933918e @GaelVaroquaux ENH: pickling handles compression level
GaelVaroquaux authored
264 self.mmap_mode = mmap_mode
454de88 @GaelVaroquaux ENH: keep the file handles around
GaelVaroquaux authored
265 self.file_handle = self._open_pickle(file_handle)
283c2b4 @GaelVaroquaux ENH: refactor unpickler for fast unziper
GaelVaroquaux authored
266 Unpickler.__init__(self, self.file_handle)
8b94acc @GaelVaroquaux BUG: The unpickler should work without numpy
GaelVaroquaux authored
267 try:
268 import numpy as np
269 except ImportError:
270 np = None
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
271 self.np = np
272
454de88 @GaelVaroquaux ENH: keep the file handles around
GaelVaroquaux authored
273 def _open_pickle(self, file_handle):
274 return file_handle
ee90bb5 @GaelVaroquaux MISC: Clean up code
GaelVaroquaux authored
275
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
276 def load_build(self):
6665add @kcarnold Cosmetic: fix typos
kcarnold authored
277 """ This method is called to set the state of a newly created
764d761 @rgommers STY: PEP8 cleanup.
rgommers authored
278 object.
279
2071c60 @GaelVaroquaux ENH: Add memmap persistence to the numpy pickler.
GaelVaroquaux authored
280 We capture it to replace our place-holder objects,
281 NDArrayWrapper, by the array we are interested in. We
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
282 replace them directly in the stack of pickler.
2071c60 @GaelVaroquaux ENH: Add memmap persistence to the numpy pickler.
GaelVaroquaux authored
283 """
78289c9 @fabianp Py3K compatibility.
fabianp authored
284 Unpickler.load_build(self)
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
285 if isinstance(self.stack[-1], NDArrayWrapper):
8b94acc @GaelVaroquaux BUG: The unpickler should work without numpy
GaelVaroquaux authored
286 if self.np is None:
287 raise ImportError('Trying to unpickle an ndarray, '
288 "but numpy didn't import correctly")
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
289 nd_array_wrapper = self.stack.pop()
54f4d7e @GaelVaroquaux ENH: optimize pickler for non-contiguous
GaelVaroquaux authored
290 array = nd_array_wrapper.read(self)
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
291 self.stack.append(array)
292
293 # Be careful to register our new method.
294 dispatch[pickle.BUILD] = load_build
295
296
3a42862 @GaelVaroquaux ENH: fast unziper
GaelVaroquaux authored
297 class ZipNumpyUnpickler(NumpyUnpickler):
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
298 """A subclass of our Unpickler to unpickle on the fly from
299 compressed storage."""
3a42862 @GaelVaroquaux ENH: fast unziper
GaelVaroquaux authored
300
454de88 @GaelVaroquaux ENH: keep the file handles around
GaelVaroquaux authored
301 def __init__(self, filename, file_handle):
933918e @GaelVaroquaux ENH: pickling handles compression level
GaelVaroquaux authored
302 NumpyUnpickler.__init__(self, filename,
454de88 @GaelVaroquaux ENH: keep the file handles around
GaelVaroquaux authored
303 file_handle,
3a42862 @GaelVaroquaux ENH: fast unziper
GaelVaroquaux authored
304 mmap_mode=None)
305
454de88 @GaelVaroquaux ENH: keep the file handles around
GaelVaroquaux authored
306 def _open_pickle(self, file_handle):
307 return BytesIO(read_zfile(file_handle))
ee90bb5 @GaelVaroquaux MISC: Clean up code
GaelVaroquaux authored
308
3a42862 @GaelVaroquaux ENH: fast unziper
GaelVaroquaux authored
309
764d761 @rgommers STY: PEP8 cleanup.
rgommers authored
310 ###############################################################################
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
311 # Utility functions
312
f4aa261 @GaelVaroquaux MISC: Tweek defaults
GaelVaroquaux authored
313 def dump(value, filename, compress=0, cache_size=100):
67b4f2a @GaelVaroquaux ENH: capture wrong argument order in dump
GaelVaroquaux authored
314 """Fast persistence of an arbitrary Python object into a files, with
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
315 dedicated storage for numpy arrays.
316
317 Parameters
318 -----------
319 value: any Python object
320 The object to store to disk
321 filename: string
322 The name of the file in which it is to be stored
323 compress: integer for 0 to 9, optional
324 Optional compression level for the data. 0 is no compression.
325 Higher means more compression, but also slower read and
326 write times. Using a value of 3 is often a good compromise.
327 See the notes for more details.
328 cache_size: positive number, optional
329 Fixes the order of magnitude (in megabytes) of the cache used
330 for in-memory compression. Note that this is just an order of
331 magnitude estimate and that for big arrays, the code will go
332 over this value at dump and at load time.
333
334 Returns
335 -------
336 filenames: list of strings
337 The list of file names in which the data is stored. If
338 compress is false, each array is stored in a different file.
339
340 See Also
341 --------
342 joblib.load : corresponding loader
343
344 Notes
345 -----
346 Memmapping on load cannot be used for compressed files. Thus
347 using compression can significantly slow down loading. In
348 addition, compressed files take extra extra memory during
349 dump and load.
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
350 """
67b4f2a @GaelVaroquaux ENH: capture wrong argument order in dump
GaelVaroquaux authored
351 if not isinstance(filename, basestring):
352 # People keep inverting arguments, and the resulting error is
353 # incomprehensible
354 raise ValueError(
355 'Second argument should be a filename, %s (type %s) was given'
356 % (filename, type(filename))
357 )
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
358 try:
0934f6e @GaelVaroquaux BUG: compress out-of-pickle arrays
GaelVaroquaux authored
359 pickler = NumpyPickler(filename, compress=compress,
360 cache_size=cache_size)
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
361 pickler.dump(value)
4b404a1 @GaelVaroquaux Clean Up
GaelVaroquaux authored
362 pickler.close()
074b5f9 @GaelVaroquaux ENH: The numpy pickler is now working.
GaelVaroquaux authored
363 finally:
364 if 'pickler' in locals() and hasattr(pickler, 'file'):
365 pickler.file.flush()
366 pickler.file.close()
367 return pickler._filenames
368
369
dbcf1e6 @GaelVaroquaux ENH: same function to dump/load zipped + unzipped
GaelVaroquaux authored
370 def load(filename, mmap_mode=None):
04e0b93 @GaelVaroquaux DOC: improve doc/docstrings
GaelVaroquaux authored
371 """Reconstruct a Python object from a file persisted with joblib.load.
372
373 Parameters
374 -----------
375 filename: string
376 The name of the file from which to load the object
377 mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
378 If not None, the arrays are memory-mapped from the disk. This
379 mode has not effect for compressed files. Note that in this
380 case the reconstructed object might not longer match exactly
381 the originally pickled object.
382
383 Returns
384 -------
385 result: any Python object
386 The object stored in the file.
387
388 See Also
389 --------
390 joblib.dump : function to save an object
391
392 Notes
393 -----
394
395 This function can load numpy array files saved separately during the
396 dump. If the mmap_mode argument is given, it is passed to np.load and
397 arrays are loaded as memmaps. As a consequence, the reconstructed
398 object might not match the original pickled object. Note that if the
399 file was saved with compression, the arrays cannot be memmaped.
4b718d1 @GaelVaroquaux ENH: add zip-based pickling
GaelVaroquaux authored
400 """
dbcf1e6 @GaelVaroquaux ENH: same function to dump/load zipped + unzipped
GaelVaroquaux authored
401 file_handle = open(filename, 'rb')
690a604 @amueller COSMIT pep8
amueller authored
402 # We are careful to open the file hanlde early and keep it open to
403 # avoid race-conditions on renames. That said, if data are stored in
454de88 @GaelVaroquaux ENH: keep the file handles around
GaelVaroquaux authored
404 # companion files, moving the directory will create a race when
405 # joblib tries to access the companion files.
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
406 if _read_magic(file_handle) == _ZFILE_PREFIX:
dbcf1e6 @GaelVaroquaux ENH: same function to dump/load zipped + unzipped
GaelVaroquaux authored
407 if mmap_mode is not None:
408 warnings.warn('file "%(filename)s" appears to be a zip, '
409 'ignoring mmap_mode "%(mmap_mode)s" flag passed'
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
410 % locals(), Warning, stacklevel=2)
454de88 @GaelVaroquaux ENH: keep the file handles around
GaelVaroquaux authored
411 unpickler = ZipNumpyUnpickler(filename, file_handle=file_handle)
dbcf1e6 @GaelVaroquaux ENH: same function to dump/load zipped + unzipped
GaelVaroquaux authored
412 else:
413 unpickler = NumpyUnpickler(filename,
414 file_handle=file_handle,
415 mmap_mode=mmap_mode)
416
4b718d1 @GaelVaroquaux ENH: add zip-based pickling
GaelVaroquaux authored
417 try:
3a42862 @GaelVaroquaux ENH: fast unziper
GaelVaroquaux authored
418 obj = unpickler.load()
4b718d1 @GaelVaroquaux ENH: add zip-based pickling
GaelVaroquaux authored
419 finally:
83e8b26 @GaelVaroquaux ENH: Faster compression with raw ZLib
GaelVaroquaux authored
420 if hasattr(unpickler, 'file_handle'):
421 unpickler.file_handle.close()
3a42862 @GaelVaroquaux ENH: fast unziper
GaelVaroquaux authored
422 return obj
Something went wrong with that request. Please try again.