diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml new file mode 100644 index 000000000..f58e4c634 --- /dev/null +++ b/.github/workflows/black.yml @@ -0,0 +1,11 @@ +name: Lint + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - uses: psf/black@stable diff --git a/docs/conf.py b/docs/conf.py index 2cc12af72..dbd930276 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,9 +17,9 @@ # -- Project information ----------------------------------------------------- -project = 'RosettaSciIO' -copyright = '2022, HyperSpy Developers' -author = 'HyperSpy Developers' +project = "RosettaSciIO" +copyright = "2022, HyperSpy Developers" +author = "HyperSpy Developers" # -- General configuration --------------------------------------------------- @@ -28,9 +28,9 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.githubpages', - 'sphinx.ext.intersphinx', - 'sphinxcontrib.towncrier', + "sphinx.ext.githubpages", + "sphinx.ext.intersphinx", + "sphinxcontrib.towncrier", ] intersphinx_mapping = { @@ -40,12 +40,12 @@ } # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -53,12 +53,12 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'pydata_sphinx_theme' +html_theme = "pydata_sphinx_theme" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] html_theme_options = { "icon_links": [ @@ -81,7 +81,7 @@ "icon": "", # The type of image to be used (see below for details) "type": "local", - } + }, ], "logo": { "text": "RosettaSciIO", @@ -104,7 +104,7 @@ # -- Options for towncrier_draft extension ----------------------------------- # Options: draft/sphinx-version/sphinx-release -towncrier_draft_autoversion_mode = 'draft' +towncrier_draft_autoversion_mode = "draft" towncrier_draft_include_empty = False towncrier_draft_working_directory = ".." diff --git a/rsciio/__init__.py b/rsciio/__init__.py index af1c5d8bd..9f436f212 100644 --- a/rsciio/__init__.py +++ b/rsciio/__init__.py @@ -30,9 +30,7 @@ for sub, _, _ in os.walk(here): specsf = os.path.join(sub, "specifications.yaml") if os.path.isfile(specsf): - with open(specsf, 'r') as stream: + with open(specsf, "r") as stream: specs = yaml.safe_load(stream) specs["api"] = "rsciio.%s.api" % os.path.split(sub)[1] IO_PLUGINS.append(specs) - - diff --git a/rsciio/_hierarchical.py b/rsciio/_hierarchical.py index cbf159602..bc3afe49c 100644 --- a/rsciio/_hierarchical.py +++ b/rsciio/_hierarchical.py @@ -33,12 +33,11 @@ default_version = Version(version) -not_valid_format = 'The file is not a valid HyperSpy hdf5 file' +not_valid_format = "The file is not a valid HyperSpy hdf5 file" _logger = logging.getLogger(__name__) - def get_signal_chunks(shape, dtype, signal_axes=None, target_size=1e6): """ Function that calculates chunks for the signal, preferably at least one @@ -63,11 +62,10 @@ def get_signal_chunks(shape, dtype, signal_axes=None, target_size=1e6): # largely based on the guess_chunk in h5py bytes_per_signal = np.prod([shape[i] for i in signal_axes]) * typesize signals_per_chunk = int(np.floor_divide(target_size, bytes_per_signal)) - navigation_axes = tuple(i for i in range(len(shape)) if i not in - signal_axes) + navigation_axes = tuple(i for i in range(len(shape)) if i not in signal_axes) num_nav_axes = len(navigation_axes) num_signals = np.prod([shape[i] for i in navigation_axes]) - if signals_per_chunk < 2 or num_nav_axes==0: + if signals_per_chunk < 2 or num_nav_axes == 0: # signal is larger than chunk max chunks = [s if i in signal_axes else 1 for i, s in enumerate(shape)] return tuple(chunks) @@ -87,13 +85,24 @@ def get_signal_chunks(shape, dtype, signal_axes=None, target_size=1e6): # than some axes sizes. If that is the case, the value must be # recomputed at the next iteration after having added the "offending" # axes to `small_idx` - nav_axes_chunks = int(np.floor((signals_per_chunk / np.prod(small_sizes))**(1 / (num_nav_axes - len(small_sizes))))) + nav_axes_chunks = int( + np.floor( + (signals_per_chunk / np.prod(small_sizes)) + ** (1 / (num_nav_axes - len(small_sizes))) + ) + ) for index, size in enumerate(shape): - if index not in (list(signal_axes) + small_idx) and size < nav_axes_chunks: + if ( + index not in (list(signal_axes) + small_idx) + and size < nav_axes_chunks + ): small_idx.append(index) small_sizes.append(size) iterate = True - chunks = [s if i in signal_axes or i in small_idx else nav_axes_chunks for i, s in enumerate(shape)] + chunks = [ + s if i in signal_axes or i in small_idx else nav_axes_chunks + for i, s in enumerate(shape) + ] return tuple(int(x) for x in chunks) @@ -122,7 +131,8 @@ def __init__(self, file): "This file was written using a newer version of the " f"HyperSpy {self._file_type} file format. I will attempt to " "load it, but, if I fail, it is likely that I will be more " - "successful at this and other tasks if you upgrade me.") + "successful at this and other tasks if you upgrade me." + ) def get_format_version(self): """Return the format version.""" @@ -166,37 +176,35 @@ def read(self, lazy): models_with_signals = [] standalone_models = [] - if 'Analysis/models' in self.file: + if "Analysis/models" in self.file: try: - m_gr = self.file['Analysis/models'] + m_gr = self.file["Analysis/models"] for model_name in m_gr: - if '_signal' in m_gr[model_name].attrs: - key = m_gr[model_name].attrs['_signal'] + if "_signal" in m_gr[model_name].attrs: + key = m_gr[model_name].attrs["_signal"] # del m_gr[model_name].attrs['_signal'] - res = self._group2dict( - m_gr[model_name], - lazy=lazy) - del res['_signal'] + res = self._group2dict(m_gr[model_name], lazy=lazy) + del res["_signal"] models_with_signals.append((key, {model_name: res})) else: standalone_models.append( - {model_name: self._group2dict( - m_gr[model_name], lazy=lazy)}) + {model_name: self._group2dict(m_gr[model_name], lazy=lazy)} + ) except TypeError: raise IOError(not_valid_format) experiments = [] exp_dict_list = [] - if 'Experiments' in self.file: - for ds in self.file['Experiments']: - if isinstance(self.file['Experiments'][ds], self.Group): - if 'data' in self.file['Experiments'][ds]: + if "Experiments" in self.file: + for ds in self.file["Experiments"]: + if isinstance(self.file["Experiments"][ds], self.Group): + if "data" in self.file["Experiments"][ds]: experiments.append(ds) # Parse the file for experiment in experiments: - exg = self.file['Experiments'][experiment] + exg = self.file["Experiments"][experiment] exp = self.group2signaldict(exg, lazy) # assign correct models, if found: _tmp = {} @@ -204,7 +212,7 @@ def read(self, lazy): if key == exg.name: _tmp.update(_dict) models_with_signals.remove((key, _dict)) - exp['models'] = _tmp + exp["models"] = _tmp exp_dict_list.append(exp) @@ -214,7 +222,7 @@ def read(self, lazy): exp_dict_list.extend(standalone_models) if not len(exp_dict_list): - raise IOError(f'This is not a valid {self._file_type} file.') + raise IOError(f"This is not a valid {self._file_type} file.") return exp_dict_list @@ -243,11 +251,10 @@ def group2signaldict(self, group, lazy=False): metadata = "metadata" original_metadata = "original_metadata" - exp = {'metadata': self._group2dict( - group[metadata], lazy=lazy), - 'original_metadata': self._group2dict( - group[original_metadata], lazy=lazy), - 'attributes': {} + exp = { + "metadata": self._group2dict(group[metadata], lazy=lazy), + "original_metadata": self._group2dict(group[original_metadata], lazy=lazy), + "attributes": {}, } if "package" in group.attrs: # HyperSpy version is >= 1.5 @@ -260,7 +267,7 @@ def group2signaldict(self, group, lazy=False): exp["package"] = "" exp["package_version"] = "" - data = group['data'] + data = group["data"] try: ragged_shape = group["ragged_shapes"] new_data = np.empty(shape=data.shape, dtype=object) @@ -271,14 +278,14 @@ def group2signaldict(self, group, lazy=False): pass if lazy: data = da.from_array(data, chunks=data.chunks) - exp['attributes']['_lazy'] = True + exp["attributes"]["_lazy"] = True else: data = np.asanyarray(data) - exp['data'] = data + exp["data"] = data axes = [] - for i in range(len(exp['data'].shape)): + for i in range(len(exp["data"].shape)): try: - axes.append(self._group2dict(group[f'axis-{i}'])) + axes.append(self._group2dict(group[f"axis-{i}"])) axis = axes[-1] for key, item in axis.items(): if isinstance(item, np.bool_): @@ -287,80 +294,89 @@ def group2signaldict(self, group, lazy=False): axis[key] = ensure_unicode(item) except KeyError: break - if len(axes) != len(exp['data'].shape): # broke from the previous loop + if len(axes) != len(exp["data"].shape): # broke from the previous loop try: - axes = [i for k, i in sorted(iter(self._group2dict( - group['_list_' + str(len(exp['data'].shape)) + '_axes'], - lazy=lazy).items()))] + axes = [ + i + for k, i in sorted( + iter( + self._group2dict( + group["_list_" + str(len(exp["data"].shape)) + "_axes"], + lazy=lazy, + ).items() + ) + ) + ] except KeyError: raise IOError(not_valid_format) - exp['axes'] = axes - if 'learning_results' in group.keys(): - exp['attributes']['learning_results'] = \ - self._group2dict( - group['learning_results'], - lazy=lazy) - if 'peak_learning_results' in group.keys(): - exp['attributes']['peak_learning_results'] = \ - self._group2dict( - group['peak_learning_results'], - lazy=lazy) + exp["axes"] = axes + if "learning_results" in group.keys(): + exp["attributes"]["learning_results"] = self._group2dict( + group["learning_results"], lazy=lazy + ) + if "peak_learning_results" in group.keys(): + exp["attributes"]["peak_learning_results"] = self._group2dict( + group["peak_learning_results"], lazy=lazy + ) # If the title was not defined on writing the Experiment is # then called __unnamed__. The next "if" simply sets the title # back to the empty string if "General" in exp["metadata"] and "title" in exp["metadata"]["General"]: - if '__unnamed__' == exp['metadata']['General']['title']: - exp['metadata']["General"]['title'] = '' + if "__unnamed__" == exp["metadata"]["General"]["title"]: + exp["metadata"]["General"]["title"] = "" if self.version < Version("1.1"): # Load the decomposition results written with the old name, # mva_results - if 'mva_results' in group.keys(): - exp['attributes']['learning_results'] = self._group2dict( - group['mva_results'], lazy=lazy) - if 'peak_mva_results' in group.keys(): - exp['attributes']['peak_learning_results'] = self._group2dict( - group['peak_mva_results'], lazy=lazy) + if "mva_results" in group.keys(): + exp["attributes"]["learning_results"] = self._group2dict( + group["mva_results"], lazy=lazy + ) + if "peak_mva_results" in group.keys(): + exp["attributes"]["peak_learning_results"] = self._group2dict( + group["peak_mva_results"], lazy=lazy + ) # Replace the old signal and name keys with their current names - if 'signal' in exp['metadata']: + if "signal" in exp["metadata"]: if "Signal" not in exp["metadata"]: exp["metadata"]["Signal"] = {} - exp['metadata']["Signal"]['signal_type'] = \ - exp['metadata']['signal'] - del exp['metadata']['signal'] + exp["metadata"]["Signal"]["signal_type"] = exp["metadata"]["signal"] + del exp["metadata"]["signal"] - if 'name' in exp['metadata']: + if "name" in exp["metadata"]: if "General" not in exp["metadata"]: exp["metadata"]["General"] = {} - exp['metadata']['General']['title'] = \ - exp['metadata']['name'] - del exp['metadata']['name'] + exp["metadata"]["General"]["title"] = exp["metadata"]["name"] + del exp["metadata"]["name"] if self.version < Version("1.2"): - if '_internal_parameters' in exp['metadata']: - exp['metadata']['_HyperSpy'] = \ - exp['metadata']['_internal_parameters'] - del exp['metadata']['_internal_parameters'] - if 'stacking_history' in exp['metadata']['_HyperSpy']: - exp['metadata']['_HyperSpy']["Stacking_history"] = \ - exp['metadata']['_HyperSpy']['stacking_history'] - del exp['metadata']['_HyperSpy']["stacking_history"] - if 'folding' in exp['metadata']['_HyperSpy']: - exp['metadata']['_HyperSpy']["Folding"] = \ - exp['metadata']['_HyperSpy']['folding'] - del exp['metadata']['_HyperSpy']["folding"] - if 'Variance_estimation' in exp['metadata']: + if "_internal_parameters" in exp["metadata"]: + exp["metadata"]["_HyperSpy"] = exp["metadata"]["_internal_parameters"] + del exp["metadata"]["_internal_parameters"] + if "stacking_history" in exp["metadata"]["_HyperSpy"]: + exp["metadata"]["_HyperSpy"]["Stacking_history"] = exp["metadata"][ + "_HyperSpy" + ]["stacking_history"] + del exp["metadata"]["_HyperSpy"]["stacking_history"] + if "folding" in exp["metadata"]["_HyperSpy"]: + exp["metadata"]["_HyperSpy"]["Folding"] = exp["metadata"][ + "_HyperSpy" + ]["folding"] + del exp["metadata"]["_HyperSpy"]["folding"] + if "Variance_estimation" in exp["metadata"]: if "Noise_properties" not in exp["metadata"]: exp["metadata"]["Noise_properties"] = {} - exp['metadata']['Noise_properties']["Variance_linear_model"] = \ - exp['metadata']['Variance_estimation'] - del exp['metadata']['Variance_estimation'] + exp["metadata"]["Noise_properties"]["Variance_linear_model"] = exp[ + "metadata" + ]["Variance_estimation"] + del exp["metadata"]["Variance_estimation"] if "TEM" in exp["metadata"]: if "Acquisition_instrument" not in exp["metadata"]: exp["metadata"]["Acquisition_instrument"] = {} - exp["metadata"]["Acquisition_instrument"]["TEM"] = \ - exp["metadata"]["TEM"] + exp["metadata"]["Acquisition_instrument"]["TEM"] = exp["metadata"][ + "TEM" + ] del exp["metadata"]["TEM"] tem = exp["metadata"]["Acquisition_instrument"]["TEM"] if "EELS" in tem: @@ -391,8 +407,9 @@ def group2signaldict(self, group, lazy=False): if "SEM" in exp["metadata"]: if "Acquisition_instrument" not in exp["metadata"]: exp["metadata"]["Acquisition_instrument"] = {} - exp["metadata"]["Acquisition_instrument"]["SEM"] = \ - exp["metadata"]["SEM"] + exp["metadata"]["Acquisition_instrument"]["SEM"] = exp["metadata"][ + "SEM" + ] del exp["metadata"]["SEM"] sem = exp["metadata"]["Acquisition_instrument"]["SEM"] if "EDS" in sem: @@ -404,10 +421,13 @@ def group2signaldict(self, group, lazy=False): del sem["EDS"] del sem - if "Sample" in exp["metadata"] and "Xray_lines" in exp[ - "metadata"]["Sample"]: - exp["metadata"]["Sample"]["xray_lines"] = exp[ - "metadata"]["Sample"]["Xray_lines"] + if ( + "Sample" in exp["metadata"] + and "Xray_lines" in exp["metadata"]["Sample"] + ): + exp["metadata"]["Sample"]["xray_lines"] = exp["metadata"]["Sample"][ + "Xray_lines" + ] del exp["metadata"]["Sample"]["Xray_lines"] for key in ["title", "date", "time", "original_filename"]: @@ -442,8 +462,7 @@ def group2signaldict(self, group, lazy=False): exposure = "exposure_time" if exposure is not None: if "Detector" not in tem: - tem["Detector"] = {"Camera": { - "exposure": tem[exposure]}} + tem["Detector"] = {"Camera": {"exposure": tem[exposure]}} tem["Detector"]["Camera"] = {"exposure": tem[exposure]} del tem[exposure] # Move tilt_stage to Stage.tilt_alpha @@ -462,7 +481,7 @@ def _group2dict(self, group, dictionary=None, lazy=False): if isinstance(value, bytes): value = value.decode() if isinstance(value, (np.string_, str)): - if value == '_None_': + if value == "_None_": value = None elif isinstance(value, np.bool_): value = bool(value) @@ -472,38 +491,37 @@ def _group2dict(self, group, dictionary=None, lazy=False): if value.dtype.str.endswith("U1"): value = value.tolist() # skip signals - these are handled below. - if key.startswith('_sig_'): + if key.startswith("_sig_"): pass - elif key.startswith('_list_empty_'): - dictionary[key[len('_list_empty_'):]] = [] - elif key.startswith('_tuple_empty_'): - dictionary[key[len('_tuple_empty_'):]] = () - elif key.startswith('_bs_'): - dictionary[key[len('_bs_'):]] = value.tobytes() + elif key.startswith("_list_empty_"): + dictionary[key[len("_list_empty_") :]] = [] + elif key.startswith("_tuple_empty_"): + dictionary[key[len("_tuple_empty_") :]] = () + elif key.startswith("_bs_"): + dictionary[key[len("_bs_") :]] = value.tobytes() # The following two elif stataments enable reading date and time from # v < 2 of HyperSpy's metadata specifications - elif key.startswith('_datetime_date'): + elif key.startswith("_datetime_date"): date_iso = datetime.date( - *ast.literal_eval(value[value.index("("):])).isoformat() + *ast.literal_eval(value[value.index("(") :]) + ).isoformat() dictionary[key.replace("_datetime_", "")] = date_iso - elif key.startswith('_datetime_time'): + elif key.startswith("_datetime_time"): date_iso = datetime.time( - *ast.literal_eval(value[value.index("("):])).isoformat() + *ast.literal_eval(value[value.index("(") :]) + ).isoformat() dictionary[key.replace("_datetime_", "")] = date_iso else: dictionary[key] = value if not isinstance(group, self.Dataset): for key in group.keys(): - if key.startswith('_sig_'): - dictionary[key] = ( - self.group2signaldict( - group[key])) + if key.startswith("_sig_"): + dictionary[key] = self.group2signaldict(group[key]) elif isinstance(group[key], self.Dataset): dat = group[key] kn = key if key.startswith("_list_"): - if (h5py.check_string_dtype(dat.dtype) and - hasattr(dat, 'asstr')): + if h5py.check_string_dtype(dat.dtype) and hasattr(dat, "asstr"): # h5py 3.0 and newer # https://docs.h5py.org/en/3.0.0/strings.html dat = dat.asstr()[:] @@ -528,28 +546,32 @@ def _group2dict(self, group, dictionary=None, lazy=False): else: ans = np.array(dat) dictionary[kn] = ans - elif key.startswith('_hspy_AxesManager_'): + elif key.startswith("_hspy_AxesManager_"): dictionary[key] = [ - i for k, i in sorted( - iter(self._group2dict(group[key], lazy=lazy).items()))] - elif key.startswith('_list_'): - dictionary[key[7 + key[6:].find('_'):]] = \ - [i for k, i in sorted(iter( - self._group2dict( - group[key], lazy=lazy).items() - ))] - elif key.startswith('_tuple_'): - dictionary[key[8 + key[7:].find('_'):]] = tuple( - [i for k, i in sorted(iter( - self._group2dict( - group[key], lazy=lazy).items() - ))]) + i + for k, i in sorted( + iter(self._group2dict(group[key], lazy=lazy).items()) + ) + ] + elif key.startswith("_list_"): + dictionary[key[7 + key[6:].find("_") :]] = [ + i + for k, i in sorted( + iter(self._group2dict(group[key], lazy=lazy).items()) + ) + ] + elif key.startswith("_tuple_"): + dictionary[key[8 + key[7:].find("_") :]] = tuple( + [ + i + for k, i in sorted( + iter(self._group2dict(group[key], lazy=lazy).items()) + ) + ] + ) else: dictionary[key] = {} - self._group2dict( - group[key], - dictionary[key], - lazy=lazy) + self._group2dict(group[key], dictionary[key], lazy=lazy) return dictionary @@ -559,6 +581,7 @@ class HierarchicalWriter: An object used to simplify and organize the process for writing a Hierarchical signal, such as hspy/zspy format. """ + target_size = 1e6 def __init__(self, file, signal, group, **kwds): @@ -586,17 +609,14 @@ def __init__(self, file, signal, group, **kwds): @staticmethod def _get_object_dset(*args, **kwargs): # pragma: no cover - raise NotImplementedError( - "This method must be implemented by subclasses.") + raise NotImplementedError("This method must be implemented by subclasses.") @staticmethod def _store_data(*arg): # pragma: no cover - raise NotImplementedError( - "This method must be implemented by subclasses.") + raise NotImplementedError("This method must be implemented by subclasses.") @classmethod - def overwrite_dataset(cls, group, data, key, signal_axes=None, - chunks=None, **kwds): + def overwrite_dataset(cls, group, data, key, signal_axes=None, chunks=None, **kwds): """ Overwrites a dataset into a hierarchical structure following the h5py API. @@ -630,23 +650,26 @@ def overwrite_dataset(cls, group, data, key, signal_axes=None, # optimise the chunking to contain at least one signal per chunk chunks = get_signal_chunks( data.shape, data.dtype, signal_axes, cls.target_size - ) - if np.issubdtype(data.dtype, np.dtype('U')): + ) + if np.issubdtype(data.dtype, np.dtype("U")): # Saving numpy unicode type is not supported in h5py - data = data.astype(np.dtype('S')) + data = data.astype(np.dtype("S")) - if data.dtype == np.dtype('O'): + if data.dtype == np.dtype("O"): dset = cls._get_object_dset(group, data, key, chunks, **kwds) else: got_data = False while not got_data: try: these_kwds = kwds.copy() - these_kwds.update(dict(shape=data.shape, - dtype=data.dtype, - exact=True, - chunks=chunks, - )) + these_kwds.update( + dict( + shape=data.shape, + dtype=data.dtype, + exact=True, + chunks=chunks, + ) + ) # If chunks is True, the `chunks` attribute of `dset` below # contains the chunk shape guessed by h5py @@ -658,30 +681,31 @@ def overwrite_dataset(cls, group, data, key, signal_axes=None, del group[key] _logger.info(f"Chunks used for saving: {chunks}") - if data.dtype == np.dtype('O'): + if data.dtype == np.dtype("O"): new_data = np.empty(shape=data.shape, dtype=object) shapes = np.empty(shape=data.shape, dtype=object) for i in np.ndindex(data.shape): new_data[i] = data[i].ravel() shapes[i] = np.array(data[i].shape) - shape_dset = cls._get_object_dset(group, shapes, "ragged_shapes", shapes.shape, **kwds) - cls._store_data(shapes, shape_dset, group, 'ragged_shapes', chunks=shapes.shape) + shape_dset = cls._get_object_dset( + group, shapes, "ragged_shapes", shapes.shape, **kwds + ) + cls._store_data( + shapes, shape_dset, group, "ragged_shapes", chunks=shapes.shape + ) cls._store_data(new_data, dset, group, key, chunks) else: cls._store_data(data, dset, group, key, chunks) def write(self): - self.write_signal(self.signal, - self.group, - **self.kwds) + self.write_signal(self.signal, self.group, **self.kwds) - def write_signal(self, signal, group, write_dataset=True, chunks=None, - **kwds): + def write_signal(self, signal, group, write_dataset=True, chunks=None, **kwds): "Writes a hyperspy signal to a hdf5 group" group.attrs.update(signal["package_info"]) for i, axis_dict in enumerate(signal["axes"]): - group_name = f'axis-{i}' + group_name = f"axis-{i}" # delete existing group in case the file have been open in 'a' mode # and we are saving a different type of axis, to avoid having # incompatible axis attributes from previously saved axis. @@ -697,30 +721,30 @@ def write_signal(self, signal, group, write_dataset=True, chunks=None, self.overwrite_dataset( group, signal["data"], - 'data', - signal_axes=[idx for idx, axis in enumerate(signal["axes"]) if not axis["navigate"]], + "data", + signal_axes=[ + idx + for idx, axis in enumerate(signal["axes"]) + if not axis["navigate"] + ], chunks=chunks, - **kwds - ) + **kwds, + ) if default_version < Version("1.2"): - metadata_dict["_internal_parameters"] = \ - metadata_dict.pop("_HyperSpy") + metadata_dict["_internal_parameters"] = metadata_dict.pop("_HyperSpy") self.dict2group(metadata_dict, mapped_par, **kwds) original_par = group.require_group("original_metadata") - self.dict2group(signal["original_metadata"], original_par, - **kwds) - learning_results = group.require_group('learning_results') - self.dict2group(signal["learning_results"], - learning_results, **kwds) + self.dict2group(signal["original_metadata"], original_par, **kwds) + learning_results = group.require_group("learning_results") + self.dict2group(signal["learning_results"], learning_results, **kwds) if signal["models"]: - model_group = self.file.require_group('Analysis/models') - self.dict2group(signal["models"], - model_group, **kwds) + model_group = self.file.require_group("Analysis/models") + self.dict2group(signal["models"], model_group, **kwds) for model in model_group.values(): - model.attrs['_signal'] = group.name + model.attrs["_signal"] = group.name def dict2group(self, dictionary, group, **kwds): "Recursive writer of dicts and signals" @@ -732,14 +756,14 @@ def dict2group(self, dictionary, group, **kwds): self.overwrite_dataset(group, value, key, **kwds) elif value is None: - group.attrs[key] = '_None_' + group.attrs[key] = "_None_" elif isinstance(value, bytes): try: # binary string if has any null characters (otherwise not # supported by hdf5) - value.index(b'\x00') - group.attrs['_bs_' + key] = np.void(value) + value.index(b"\x00") + group.attrs["_bs_" + key] = np.void(value) except ValueError: group.attrs[key] = value.decode() @@ -748,15 +772,15 @@ def dict2group(self, dictionary, group, **kwds): elif isinstance(value, list): if len(value): - self.parse_structure(key, group, value, '_list_', **kwds) + self.parse_structure(key, group, value, "_list_", **kwds) else: - group.attrs['_list_empty_' + key] = '_None_' + group.attrs["_list_empty_" + key] = "_None_" elif isinstance(value, tuple): if len(value): - self.parse_structure(key, group, value, '_tuple_', **kwds) + self.parse_structure(key, group, value, "_tuple_", **kwds) else: - group.attrs['_tuple_empty_' + key] = '_None_' + group.attrs["_tuple_empty_" + key] = "_None_" else: try: @@ -764,7 +788,8 @@ def dict2group(self, dictionary, group, **kwds): except BaseException: _logger.exception( "The writer could not write the following " - f"information in the file: {key} : {value}") + f"information in the file: {key} : {value}" + ) def parse_structure(self, key, group, value, _type, **kwds): try: @@ -778,22 +803,20 @@ def parse_structure(self, key, group, value, _type, **kwds): except ValueError: tmp = np.array([[0]]) - if tmp.dtype == np.dtype('O') or tmp.ndim != 1: - self.dict2group(dict(zip( - [str(i) for i in range(len(value))], value)), - group.require_group(_type + str(len(value)) + '_' + key), - **kwds) + if tmp.dtype == np.dtype("O") or tmp.ndim != 1: + self.dict2group( + dict(zip([str(i) for i in range(len(value))], value)), + group.require_group(_type + str(len(value)) + "_" + key), + **kwds, + ) elif tmp.dtype.type is np.unicode_: if _type + key in group: del group[_type + key] - group.create_dataset(_type + key, - shape=tmp.shape, - **self.unicode_kwds, - **kwds) + group.create_dataset( + _type + key, shape=tmp.shape, **self.unicode_kwds, **kwds + ) group[_type + key][:] = tmp[:] else: if _type + key in group: del group[_type + key] - group.create_dataset(_type + key, - data=tmp, - **kwds) + group.create_dataset(_type + key, data=tmp, **kwds) diff --git a/rsciio/blockfile/api.py b/rsciio/blockfile/api.py index c5e965d57..e0639f994 100644 --- a/rsciio/blockfile/api.py +++ b/rsciio/blockfile/api.py @@ -238,7 +238,7 @@ def file_reader(filename, endianess="<", mmap_mode=None, lazy=False, **kwds): if header["SDP"]: SDP = 100.0 / header["SDP"] else: - SDP = 1 # Set default scale to 1 + SDP = 1 # Set default scale to 1 original_metadata = {"blockfile_header": header} # Get data: @@ -367,7 +367,7 @@ def file_writer(filename, signal, **kwds): elif scale_strategy == "minmax": minimum = signal["data"].min() maximum = signal["data"].max() - if signal['attributes']['_lazy']: + if signal["attributes"]["_lazy"]: minimum, maximum = dask.compute(minimum, maximum) original_scale = (minimum, maximum) elif scale_strategy == "crop": @@ -422,7 +422,11 @@ def file_writer(filename, signal, **kwds): file_location = f.tell() if scale_strategy is not None: - array_data = rescale_intensity(signal["data"], in_range=original_scale, out_range=np.uint8,) + array_data = rescale_intensity( + signal["data"], + in_range=original_scale, + out_range=np.uint8, + ) else: array_data = signal["data"] array_data = array_data.astype(endianess + "u1") @@ -442,7 +446,7 @@ def file_writer(filename, signal, **kwds): ) file_memmap["MAGIC"] = magics file_memmap["ID"] = ids - if signal['attributes']['_lazy']: + if signal["attributes"]["_lazy"]: cm = ProgressBar if show_progressbar else dummy_context_manager with cm(): array_data.store(file_memmap["IMG"]) diff --git a/rsciio/bruker/api.py b/rsciio/bruker/api.py index 665969ec0..cef795092 100644 --- a/rsciio/bruker/api.py +++ b/rsciio/bruker/api.py @@ -53,12 +53,15 @@ try: from rsciio.bruker import unbcf_fast + fast_unbcf = True _logger.info("The fast cython based bcf unpacking library were found") except ImportError: # pragma: no cover fast_unbcf = False - _logger.info("""unbcf_fast library is not present... -Falling back to slow python only backend.""") + _logger.info( + """unbcf_fast library is not present... +Falling back to slow python only backend.""" + ) # define re with two capturing groups with comma in between # firstgroup looks for numeric value after (the '>' char) with or @@ -66,7 +69,7 @@ # closing <\tag> (the '<' char); '([Ee]-?\d*)' part (optionally a third group) # checks for scientific notation (e.g. 8,843E-7 -> 'E-7'); # compiled pattern is binary, as raw xml string is binary.: -fix_dec_patterns = re.compile(b'(>-?\\d+),(\\d*([Ee]-?\\d*)?<)') +fix_dec_patterns = re.compile(b"(>-?\\d+),(\\d*([Ee]-?\\d*)?<)") class Container(object): @@ -90,14 +93,24 @@ class SFSTreeItem(object): def __init__(self, item_raw_string, parent): self.sfs = parent - self._pointer_to_pointer_table, self.size, create_time, \ - mod_time, some_time, self.permissions, \ - self.parent, _, self.is_dir, _, name, _ = strct_unp( - ' 1: next_chunk = self._pointer_to_pointer_table temp_string = io.BytesIO() for dummy1 in range(n_of_chunks): fn.seek(self.sfs.chunksize * next_chunk + 0x118) - next_chunk = strct_unp(' 0: @@ -196,7 +211,7 @@ def _iter_read_chunks(self, first=0): chunks -- the number of chunks to read. (default False) """ last = self.size_in_chunks - with open(self.sfs.filename, 'rb') as fn: + with open(self.sfs.filename, "rb") as fn: for idx in range(first, last - 1): fn.seek(self.pointers[idx]) yield fn.read(self.sfs.usable_chunk) @@ -208,7 +223,7 @@ def _iter_read_chunks(self, first=0): yield fn.read(self.sfs.usable_chunk) def setup_compression_metadata(self): - """ parse and setup the number of compression chunks + """parse and setup the number of compression chunks and uncompressed chunk size as class attributes. @@ -216,16 +231,18 @@ def setup_compression_metadata(self): self.uncompressed_blk_size, self.no_of_compr_blk """ - with open(self.sfs.filename, 'rb') as fn: + with open(self.sfs.filename, "rb") as fn: fn.seek(self.pointers[0]) # AACS signature, uncompressed size, undef var, number of blocks - aacs, uc_size, _, n_of_blocks = strct_unp('