Skip to content

Commit

Permalink
Merge pull request #2874 from jat255/ENH_file_io_metadata
Browse files Browse the repository at this point in the history
Add file reading metadata to Signals
  • Loading branch information
ericpre committed Apr 2, 2022
2 parents a1b206b + b4198af commit 4d15a15
Show file tree
Hide file tree
Showing 15 changed files with 306 additions and 15 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ hyperspy/misc/etc/test_compilers.obj
*nbc
*nbi
hyperspy/tests/io/edax_files.zip
.python-version

### Code ###
# Visual Studio Code - https://code.visualstudio.com/
Expand Down
53 changes: 53 additions & 0 deletions doc/user_guide/metadata_structure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@ in the following sections of this chapter.
│ ├── y (mm)
│ └── z (mm)
├── General
| |── FileIO
| | ├── 0
| | | ├── operation
| | | ├── hyperspy_version
| | | ├── io_plugin
| │ | └── timestamp
| | ├── 1
| | | ├── operation
| | | ├── hyperspy_version
| | | ├── io_plugin
| │ | └── timestamp
| | └── ...
│ ├── authors
│ ├── date
│ ├── doi
Expand Down Expand Up @@ -155,6 +167,47 @@ notes

Notes about the data.

.. _general-file-metadata:

FileIO
------

Contains information about the software packages and versions used any time the
Signal was created by reading the original data format (added in HyperSpy
v1.7) or saved by one of HyperSpy's IO tools. If the signal is saved to one
of the ``hspy``, ``zspy`` or ``nxs`` formats, the metadata within the ``FileIO``
node will represent a history of the software configurations used when the
conversion was made from the proprietary/original format to HyperSpy's
format, as well as any time the signal was subsequently loaded from and saved
to disk. Under the ``FileIO`` node will be one or more nodes named ``0``,
``1``, ``2``, etc., each with the following structure:

operation
type: Str

This value will be either ``"load"`` or ``"save"`` to indicate whether
this node represents a load from, or save to disk operation, respectively.

hyperspy_version
type: Str

The version number of the HyperSpy software used to extract a Signal from
this data file or save this Signal to disk

io_plugin
type: Str

The specific input/output plugin used to originally extract this data file
into a HyperSpy Signal or save it to disk -- will be of the form
``hyperspy.io_plugins.<plugin_name>``.

timestamp
type: Str

The timestamp of the computer running the data loading/saving process (in a
timezone-aware format). The timestamp will be in ISO 8601 format, as
produced by the ``isoformat()`` method of the ``datetime`` class.

Acquisition_instrument
======================

Expand Down
23 changes: 23 additions & 0 deletions hyperspy/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
from inspect import isgenerator
from pathlib import Path
from collections.abc import MutableMapping
from datetime import datetime

from hyperspy import __version__ as hs_version
from hyperspy.drawing.marker import markers_metadata_dict_to_markers
from hyperspy.exceptions import VisibleDeprecationWarning
from hyperspy.misc.io.tools import ensure_directory
Expand Down Expand Up @@ -538,6 +540,7 @@ def load_with_reader(
if signal_type is not None:
signal_dict['metadata']["Signal"]['signal_type'] = signal_type
signal = dict2signal(signal_dict, lazy=lazy)
signal = _add_file_load_save_metadata('load', signal, reader)
path = _parse_path(filename)
folder, filename = os.path.split(os.path.abspath(path))
filename, extension = os.path.splitext(filename)
Expand Down Expand Up @@ -843,6 +846,7 @@ def save(filename, signal, overwrite=None, **kwds):
if write:
# Pass as a string for now, pathlib.Path not
# properly supported in io_plugins
signal = _add_file_load_save_metadata('save', signal, writer)
if not isinstance(filename, MutableMapping):
writer.file_writer(str(filename), signal, **kwds)
_logger.info(f'{filename} was created')
Expand All @@ -856,3 +860,22 @@ def save(filename, signal, overwrite=None, **kwds):
signal.tmp_parameters.set_item('folder', file.parent)
signal.tmp_parameters.set_item('filename', file.stem)
signal.tmp_parameters.set_item('extension', extension)


def _add_file_load_save_metadata(operation, signal, io_plugin):
mdata_dict = {
'operation': operation,
'io_plugin': io_plugin.__loader__.name,
'hyperspy_version': hs_version,
'timestamp': datetime.now().astimezone().isoformat()
}

# get the largest integer key present under General.FileIO, returning 0
# as default if none are present
largest_index = max(
[int(i.replace('Number_', '')) + 1
for i in signal.metadata.get_item('General.FileIO', {}).keys()] + [0])

signal.metadata.set_item(f"General.FileIO.{largest_index}", mdata_dict)

return signal
12 changes: 12 additions & 0 deletions hyperspy/tests/io/test_blockfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,18 @@ def test_save_load_cycle(save_path, convert_units):
sig_reload.metadata.General.original_filename = (
signal.metadata.General.original_filename
)
# assert file reading tests here, then delete so we can compare
# entire metadata structure at once:
plugin = 'hyperspy.io_plugins.blockfile'
assert signal.metadata.General.FileIO.Number_0.operation == 'load'
assert signal.metadata.General.FileIO.Number_0.io_plugin == plugin
assert signal.metadata.General.FileIO.Number_1.operation == 'save'
assert signal.metadata.General.FileIO.Number_1.io_plugin == plugin
assert sig_reload.metadata.General.FileIO.Number_0.operation == 'load'
assert sig_reload.metadata.General.FileIO.Number_0.io_plugin == plugin
del signal.metadata.General.FileIO
del sig_reload.metadata.General.FileIO

assert_deep_almost_equal(
signal.metadata.as_dictionary(), sig_reload.metadata.as_dictionary()
)
Expand Down
12 changes: 11 additions & 1 deletion hyperspy/tests/io/test_bruker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import pytest

from hyperspy import __version__ as hs_version
from hyperspy import signals
from hyperspy.io import load
from hyperspy.misc.test_utils import assert_deep_almost_equal
Expand Down Expand Up @@ -129,7 +130,14 @@ def test_hyperspy_wrap():
'30x30_instructively_packed_16bit_compressed.bcf',
'title': 'EDX',
'date': '2018-10-04',
'time': '13:02:07'},
'time': '13:02:07',
'FileIO': {
'0': {
'operation': 'load',
'hyperspy_version': hs_version,
'io_plugin': 'hyperspy.io_plugins.bruker',
}
}},
'Sample': {
'name': 'chevkinite',
'elements': ['Al', 'C', 'Ca', 'Ce', 'Fe', 'Gd', 'K', 'Mg', 'Na',
Expand All @@ -153,6 +161,8 @@ def test_hyperspy_wrap():
with open(filename_omd) as fn:
# original_metadata:
omd_ref = json.load(fn)
# delete FileIO timestamp since it's runtime dependent
del hype.metadata.General.FileIO.Number_0.timestamp
assert_deep_almost_equal(hype.metadata.as_dictionary(), md_ref)
assert_deep_almost_equal(hype.original_metadata.as_dictionary(), omd_ref)
assert hype.metadata.General.date == "2018-10-04"
Expand Down
24 changes: 22 additions & 2 deletions hyperspy/tests/io/test_dm3.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import numpy as np
import pytest

from hyperspy import __version__ as hs_version
from hyperspy.io import load
from hyperspy.io_plugins.digital_micrograph import (DigitalMicrographReader,
ImageObject)
Expand Down Expand Up @@ -485,7 +486,16 @@ def test_multi_signal():
'original_filename': 'multi_signal.dm3',
'date': '2019-12-10',
'time': '15:32:41',
'authors': 'JohnDoe'},
'authors': 'JohnDoe',
'FileIO': {
'0': {
'operation': 'load',
'hyperspy_version': hs_version,
'io_plugin':
'hyperspy.io_plugins.digital_micrograph'
}
}
},
'Signal': {'signal_type': '',
'quantity': 'Intensity',
'Noise_properties': {
Expand Down Expand Up @@ -519,7 +529,14 @@ def test_multi_signal():
'original_axes_manager': None}},
'General': {
'title': 'Plot',
'original_filename': 'multi_signal.dm3'},
'original_filename': 'multi_signal.dm3',
'FileIO': {
'0': {
'operation': 'load',
'hyperspy_version': hs_version,
'io_plugin': 'hyperspy.io_plugins.digital_micrograph'
}
}},
'Signal': {
'signal_type': '',
'quantity': 'Intensity',
Expand All @@ -528,6 +545,9 @@ def test_multi_signal():
'gain_factor': 1.0,
'gain_offset': 0.0}}}
}
# remove timestamps from metadata since these are runtime dependent
del s1.metadata.General.FileIO.Number_0.timestamp
del s2.metadata.General.FileIO.Number_0.timestamp

# make sure the metadata dictionaries are as we expect
assert s1.metadata.as_dictionary() == s1_md_truth
Expand Down
13 changes: 12 additions & 1 deletion hyperspy/tests/io/test_emd.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import tempfile
import shutil

from hyperspy import __version__ as hs_version
from hyperspy.io import load
from hyperspy.misc.test_utils import assert_deep_almost_equal
from hyperspy.signals import (BaseSignal, EDSTEMSpectrum, Signal1D, Signal2D,
Expand Down Expand Up @@ -341,7 +342,15 @@ def test_fei_emd_image(self, lazy):
'date': '2017-03-06',
'time': '09:56:41',
'time_zone': 'BST',
'title': 'HAADF'},
'title': 'HAADF',
'FileIO': {
'0': {
'operation': 'load',
'hyperspy_version': hs_version,
'io_plugin': 'hyperspy.io_plugins.emd'
}
}
},
'Signal': {'signal_type': ''},
'_HyperSpy': {'Folding': {'original_axes_manager': None,
'original_shape': None,
Expand All @@ -358,6 +367,8 @@ def test_fei_emd_image(self, lazy):

signal = load(os.path.join(self.fei_files_path, 'fei_emd_image.emd'),
lazy=lazy)
# delete timestamp from metadata since it's runtime dependent
del signal.metadata.General.FileIO.Number_0.timestamp
if lazy:
assert signal._lazy
signal.compute(close_file=True)
Expand Down
13 changes: 12 additions & 1 deletion hyperspy/tests/io/test_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import numpy as np
import pytest

from hyperspy import __version__ as hs_version
from hyperspy.io import load
from hyperspy.axes import DataAxis, UniformDataAxis, FunctionalDataAxis, AxesManager
from hyperspy.signal import BaseSignal
Expand Down Expand Up @@ -370,7 +371,15 @@ def test_metadata_update_to_v3_1(self):
'General': {'date': '2014-07-09',
'original_filename': 'test_diffraction_pattern.dm3',
'time': '18:56:37',
'title': 'test_diffraction_pattern'},
'title': 'test_diffraction_pattern',
'FileIO': {
'0': {
'operation': 'load',
'hyperspy_version': hs_version,
'io_plugin': 'hyperspy.io_plugins.hspy'
}
}
},
'Signal': {'Noise_properties': {'Variance_linear_model': {'gain_factor': 1.0,
'gain_offset': 0.0}},
'quantity': 'Intensity',
Expand All @@ -380,6 +389,8 @@ def test_metadata_update_to_v3_1(self):
'signal_unfolded': False,
'unfolded': False}}}
s = load(my_path / "hdf5_files" / 'example2_v3.1.hspy')
# delete timestamp from metadata since it's runtime dependent
del s.metadata.General.FileIO.Number_0.timestamp
assert_deep_almost_equal(s.metadata.as_dictionary(), md)


Expand Down
47 changes: 47 additions & 0 deletions hyperspy/tests/io/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
from hyperspy.signals import Signal1D
from hyperspy.axes import DataAxis
from hyperspy.io_plugins import io_plugins
from hyperspy import __version__ as hs_version
from hyperspy.misc.test_utils import assert_deep_almost_equal


FULLFILENAME = Path(__file__).resolve().parent.joinpath("test_io_overwriting.hspy")
Expand Down Expand Up @@ -260,3 +262,48 @@ def test_load_original_metadata():

t = hs.load(Path(dirpath, "temp.hspy"), load_original_metadata=False)
assert t.original_metadata.as_dictionary() == {}


def test_load_save_filereader_metadata():
# tests that original FileReader metadata is correctly persisted and
# appended through a save and load cycle

my_path = os.path.dirname(__file__)
s = hs.load(os.path.join(my_path, "msa_files", "example1.msa"))
assert s.metadata.General.FileIO.Number_0.io_plugin == \
'hyperspy.io_plugins.msa'
assert s.metadata.General.FileIO.Number_0.operation == 'load'
assert s.metadata.General.FileIO.Number_0.hyperspy_version == hs_version

with tempfile.TemporaryDirectory() as dirpath:
f = os.path.join(dirpath, "temp")
s.save(f)
expected = {
'0': {
'io_plugin': 'hyperspy.io_plugins.msa',
'operation': 'load',
'hyperspy_version': hs_version
},
'1': {
'io_plugin': 'hyperspy.io_plugins.hspy',
'operation': 'save',
'hyperspy_version': hs_version
},
'2': {
'io_plugin': 'hyperspy.io_plugins.hspy',
'operation': 'load',
'hyperspy_version': hs_version
},
}
del s.metadata.General.FileIO.Number_0.timestamp # runtime dependent
del s.metadata.General.FileIO.Number_1.timestamp # runtime dependent
assert \
s.metadata.General.FileIO.Number_0.as_dictionary() == expected['0']
assert \
s.metadata.General.FileIO.Number_1.as_dictionary() == expected['1']

t = hs.load(Path(dirpath, "temp.hspy"))
del t.metadata.General.FileIO.Number_0.timestamp # runtime dependent
del t.metadata.General.FileIO.Number_1.timestamp # runtime dependent
del t.metadata.General.FileIO.Number_2.timestamp # runtime dependent
assert t.metadata.General.FileIO.as_dictionary() == expected
17 changes: 15 additions & 2 deletions hyperspy/tests/io/test_jeol.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import numpy as np

import hyperspy.api as hs
from hyperspy import __version__ as hs_version


def teardown_module(module):
Expand Down Expand Up @@ -257,11 +258,23 @@ def test_load_eds_file(filename_as_string):
assert axis.offset == -0.00176612
assert axis.scale == 0.0100004

# delete timestamp from metadata since it's runtime dependent
del s.metadata.General.FileIO.Number_0.timestamp

md_dict = s.metadata.as_dictionary()
assert md_dict['General'] == {'original_filename': 'met03.EDS',
'time': '14:14:51',
'date':'2018-06-25',
'title': 'EDX'}
'date': '2018-06-25',
'title': 'EDX',
'FileIO': {
'0': {
'operation': 'load',
'hyperspy_version': hs_version,
'io_plugin':
'hyperspy.io_plugins.jeol'
}
}
}
TEM_dict = md_dict['Acquisition_instrument']['TEM']
assert TEM_dict == {'beam_energy': 200.0,
'Detector': {'EDS': {'azimuth_angle': 90.0,
Expand Down

0 comments on commit 4d15a15

Please sign in to comment.