-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
npz.py
242 lines (198 loc) · 8.91 KB
/
npz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import numpy
import six
from chainer.backends import _cpu
from chainer.backends import cuda
from chainer.backends import intel64
from chainer import serializer
import chainerx
# For historical reasons, NPZ serializers in Chainer allow pickle despite their
# potential security issues. This behavior may be changed in future.
# `numpy.save` and `numpy.load` have `allow_pickle` option. `numpy.savez` and
# `numpy.savez_compressed` do not have an option to disable pickle.
# Before NumPy 1.10, pickle was always allowed. Since NumPy 1.16.3, pickle is
# not allowed by default.
_allow_pickle_kwargs = {}
if numpy.lib.NumpyVersion(numpy.__version__) >= '1.10.0':
_allow_pickle_kwargs['allow_pickle'] = True
class DictionarySerializer(serializer.Serializer):
"""Serializer for dictionary.
This is the standard serializer in Chainer. The hierarchy of objects are
simply mapped to a flat dictionary with keys representing the paths to
objects in the hierarchy.
.. note::
Despite of its name, this serializer DOES NOT serialize the
object into external files. It just build a flat dictionary of arrays
that can be fed into :func:`numpy.savez` and
:func:`numpy.savez_compressed`. If you want to use this serializer
directly, you have to manually send a resulting dictionary to one of
these functions.
Args:
target (dict): The dictionary that this serializer saves the objects
to. If target is None, then a new dictionary is created.
path (str): The base path in the hierarchy that this serializer
indicates.
Attributes:
~DictionarySerializer.target (dict): The target dictionary.
Once the serialization completes, this dictionary can be fed into
:func:`numpy.savez` or :func:`numpy.savez_compressed` to serialize
it in the NPZ format.
"""
def __init__(self, target=None, path=''):
self.target = {} if target is None else target
self.path = path
def __getitem__(self, key):
key = key.strip('/')
return DictionarySerializer(self.target, self.path + key + '/')
def __call__(self, key, value):
key = key.lstrip('/')
self.target[self.path + key] = (
_cpu._to_cpu(value) if value is not None
else numpy.asarray(None))
return value
def serialize(obj):
"""Serializes an object to a dictionary object.
Args:
obj: Object to be serialized. It must support serialization protocol.
Returns:
dict: Serialized object.
"""
s = DictionarySerializer()
s.save(obj)
return s.target
def save_npz(file, obj, compression=True):
"""Saves an object to the file in NPZ format.
This is a short-cut function to save only one object into an NPZ file.
Args:
file (str or file-like): Target file to write to.
obj: Object to be serialized. It must support serialization protocol.
If it is a dictionary object, the serialization will be skipped.
compression (bool): If ``True``, compression in the resulting zip file
is enabled.
.. seealso::
:func:`chainer.serializers.load_npz`
"""
if isinstance(file, six.string_types):
with open(file, 'wb') as f:
save_npz(f, obj, compression)
return
if isinstance(obj, dict):
target = obj
else:
s = DictionarySerializer()
s.save(obj)
target = s.target
if compression:
numpy.savez_compressed(file, **target)
else:
numpy.savez(file, **target)
class NpzDeserializer(serializer.Deserializer):
"""Deserializer for NPZ format.
This is the standard deserializer in Chainer. This deserializer can be used
to read an object serialized by :func:`save_npz`.
Args:
npz: `npz` file object.
path: The base path that the deserialization starts from.
strict (bool): If ``True``, the deserializer raises an error when an
expected value is not found in the given NPZ file. Otherwise,
it ignores the value and skip deserialization.
ignore_names (string, callable or list of them):
If callable, it is a function that takes a name of a parameter
and a persistent and returns ``True`` when it needs to be skipped.
If string, this is a name of a parameter or persistent that are
going to be skipped.
This can also be a list of callables and strings that behave as
described above.
"""
def __init__(self, npz, path='', strict=True, ignore_names=None):
self.npz = npz
self.path = path
self.strict = strict
if ignore_names is None:
ignore_names = []
self.ignore_names = ignore_names
def __getitem__(self, key):
key = key.strip('/')
return NpzDeserializer(
self.npz, self.path + key + '/', strict=self.strict,
ignore_names=self.ignore_names)
def __call__(self, key, value):
key = self.path + key.lstrip('/')
if not self.strict and key not in self.npz:
return value
if isinstance(self.ignore_names, (tuple, list)):
ignore_names = self.ignore_names
else:
ignore_names = (self.ignore_names,)
for ignore_name in ignore_names:
if isinstance(ignore_name, str):
if key == ignore_name:
return value
elif callable(ignore_name):
if ignore_name(key):
return value
else:
raise ValueError(
'ignore_names needs to be a callable, string or '
'list of them.')
dataset = self.npz[key]
if dataset[()] is None:
return None
if value is None:
return dataset
if isinstance(value, chainerx.ndarray):
value_view = chainerx.to_numpy(value, copy=False)
numpy.copyto(value_view, dataset)
elif isinstance(value, numpy.ndarray):
numpy.copyto(value, dataset)
elif isinstance(value, cuda.ndarray):
value.set(numpy.asarray(dataset, dtype=value.dtype))
elif isinstance(value, intel64.mdarray):
intel64.ideep.basic_copyto(value, numpy.asarray(dataset))
else:
value_type = type(value)
dataset_arr = numpy.asarray(dataset)
if (issubclass(dataset_arr.dtype.type, numpy.number)
and not (issubclass(dataset_arr.dtype.type, numpy.integer)
and value_type in six.integer_types)
# Casting a `numpy.integer` scalar by `int()` case above is
# safe as `int()` gives unlimited precision integer (it's
# also true for `long()`/`int()` on Python 2). For such a
# case, the check below may be too strict. For example,
# `numpy.can_cast(numpy.int64, int)`, which checks cast-
# ability to `dtype(int)`, gives `False` on a platform
# whose `dtype(int)` is `numpy.int32` like Windows/x64.
and not numpy.can_cast(
dataset_arr.dtype, value_type, casting='safe')):
raise TypeError(
'Cannot safely deserialize from numpy array with dtype={} '
'into a variable of type {}.'.format(
dataset.dtype, type(value)))
value = value_type(dataset_arr)
return value
def load_npz(file, obj, path='', strict=True, ignore_names=None):
"""Loads an object from the file in NPZ format.
This is a short-cut function to load from an `.npz` file that contains only
one object.
Args:
file (str or file-like): File to be loaded.
obj: Object to be deserialized. It must support serialization protocol.
path (str): The path in the hierarchy of the serialized data under
which the data is to be loaded. The default behavior (blank) will
load all data under the root path.
strict (bool): If ``True``, the deserializer raises an error when an
expected value is not found in the given NPZ file. Otherwise,
it ignores the value and skip deserialization.
ignore_names (string, callable or list of them):
If callable, it is a function that takes a name of a parameter
and a persistent and returns ``True`` when it needs to be skipped.
If string, this is a name of a parameter or persistent that are
going to be skipped.
This can also be a list of callables and strings that behave as
described above.
.. seealso::
:func:`chainer.serializers.save_npz`
"""
with numpy.load(file, **_allow_pickle_kwargs) as f:
d = NpzDeserializer(
f, path=path, strict=strict, ignore_names=ignore_names)
d.load(obj)