Merge pull request #2910 from sem-geologist/patch-2

FIX-ENH update cutoff_at_kV behavior for bruker.py reader
hyperspy · Mar 29, 2022 · cbb6be0 · cbb6be0
2 parents c7dc06c + 3c54e79
commit cbb6be0
Show file tree

Hide file tree

Showing 6 changed files with 85 additions and 30 deletions.
diff --git a/doc/user_guide/io.rst b/doc/user_guide/io.rst
@@ -1049,39 +1049,49 @@ Extra loading arguments
   The underlying method of downsampling is unchangeable: sum. Differently than
   ``block_reduce`` from skimage.measure it is memory efficient (does not creates
   intermediate arrays, works inplace).
-- ``cutoff_at_kV`` : if set (can be int or float >= 0) can be used either to crop
-  or enlarge energy (or channels) range at max values (default None).
+- ``cutoff_at_kV`` : if set (can be None, int, float (kV), one of 'zealous'
+  or 'auto') can be used either to crop or enlarge energy (or number of
+  channels) range at max values. It can be used to conserve memory or enlarge
+  the range if needed to mach the size of other file. Default value is None
+  (which does not influence size). Numerical values should be in kV.
+  'zealous' truncates to the last non zero channel (this option
+  should not be used for stacks, as low beam current EDS can have different
+  last non zero channel per slice). 'auto' truncates channels to SEM/TEM
+  acceleration voltage or energy at last channel, depending which is smaller.
+  In case the hv info is not there or hv is off (0 kV) then it fallbacks to
+  full channel range.
 
 Example of loading reduced (downsampled, and with energy range cropped)
 "spectrum only" data from bcf (original shape: 80keV EDS range (4096 channels),
-100x75 pixels):
+100x75 pixels; SEM acceleration voltage: 20kV):
 
 .. code-block:: python
 
     >>> hs.load("sample80kv.bcf", select_type='spectrum', downsample=2, cutoff_at_kV=10)
     <EDSSEMSpectrum, title: EDX, dimensions: (50, 38|595)>
 
-load the same file without extra arguments:
+load the same file with limiting array size to SEM acceleration voltage:
 
 .. code-block:: python
 
-    >>> hs.load("sample80kv.bcf")
+    >>> hs.load("sample80kv.bcf", cutoff_at_kV='auto')
     [<Signal2D, title: BSE, dimensions: (|100, 75)>,
     <Signal2D, title: SE, dimensions: (|100, 75)>,
-    <EDSSEMSpectrum, title: EDX, dimensions: (100, 75|1095)>]
+    <EDSSEMSpectrum, title: EDX, dimensions: (100, 75|1024)>]
 
 The loaded array energy dimension can by forced to be larger than the data
 recorded by setting the 'cutoff_at_kV' kwarg to higher value:
 
 .. code-block:: python
 
-    >>> hs.load("sample80kv.bcf", cutoff_at_kV=80)
+    >>> hs.load("sample80kv.bcf", cutoff_at_kV=60)
     [<Signal2D, title: BSE, dimensions: (|100, 75)>,
     <Signal2D, title: SE, dimensions: (|100, 75)>,
-    <EDSSEMSpectrum, title: EDX, dimensions: (100, 75|4096)>]
+    <EDSSEMSpectrum, title: EDX, dimensions: (100, 75|3072)>]
 
-Note that setting downsample to >1 currently locks out using SEM imagery
-as navigator in the plotting.
+loading without setting cutoff_at_kV value would return data with all 4096
+channels. Note that setting downsample to >1 currently locks out using SEM
+images for navigation in the plotting.
 
 .. _spx-format:
 

diff --git a/hyperspy/io.py b/hyperspy/io.py
@@ -236,10 +236,18 @@ def load(filenames=None,
         pixel. This allows to improve signal and conserve the memory with the
         cost of lower resolution.
     cutoff_at_kV : None, int, float, optional
-        For Bruker bcf files, if set to numerical (default is None),
-        bcf is parsed into array with depth cutoff at coresponding given energy.
+        For Bruker bcf files and Jeol, if set to numerical (default is None),
+        hypermap is parsed into array with depth cutoff at set energy value.
         This allows to conserve the memory by cutting-off unused spectral
         tails, or force enlargement of the spectra size.
+        Bruker bcf reader accepts additional values for semi-automatic cutoff.
+        "zealous" value truncates to the last non zero channel (this option
+        should not be used for stacks, as low beam current EDS can have different
+        last non zero channel per slice).
+        "auto" truncates channels to SEM/TEM acceleration voltage or
+        energy at last channel, depending which is smaller.
+        In case the hv info is not there or hv is off (0 kV) then it fallbacks to
+        full channel range.
     select_type : 'spectrum_image', 'image', 'single_spectrum', None, optional
         If None (default), all data are loaded.
         For Bruker bcf and Velox emd files: if one of 'spectrum_image', 'image'

diff --git a/hyperspy/io_plugins/bruker.py b/hyperspy/io_plugins/bruker.py
@@ -555,6 +555,10 @@ def __init__(self, spectrum):
         # main data:
         self.data = np.fromstring(spectrum.find('./Channels').text,
                                   dtype='Q', sep=",")
+
+    def last_non_zero_channel(self):
+        """return index of last nonzero channel"""
+        return self.data.nonzero()[0][-1]
 
     def energy_to_channel(self, energy, kV=True):
         """ convert energy to channel index,
@@ -581,9 +585,10 @@ class HyperHeader(object):
     If Bcf is version 2, the bcf can contain stacks
     of hypermaps - thus header part  can contain multiply sum eds spectras
     and it's metadata per hypermap slice which can be selected using index.
-    Bcf can record number of imagery from different
-    imagining detectors (BSE, SEI, ARGUS, etc...): access to imagery
-    is throught image index.
+    Bcf can record number of images from different single dimentional value
+    detectors (BSE, SEI, ARGUS, etc...). images representing signals are
+    internaly ordered and right signal image can be accessed using image
+    index (do not confuse with dataset index).
     """
 
     def __init__(self, xml_str, indexes, instrument=None):
@@ -754,24 +759,30 @@ def _set_sum_edx(self, root, indexes):
                 "./SpectrumData{0}/ClassInstance".format(str(i)))
             self.spectra_data[i] = EDXSpectrum(spec_node)
 
-    def estimate_map_channels(self, index=0):
-        """Estimate minimal size of energy axis so any spectra from any pixel
-        would not be truncated.
+    def get_consistent_min_channels(self, index=0):
+        """Estimate consistent minimal size of energy axis by comparing energy
+        at last recorded channel vs electron beam potential and return channel
+        number corresponding to least energy. This method is safe to use with
+        sliced datasets (consistent between slices) which were acquired using
+        the same electron potential.
 
         Parameters
         ----------
         index : int
-            Index of the map if multiply hypermaps are present in the same bcf.
+            Index of the map if multiple hypermaps are present in the same bcf.
 
         Returns
         -------
         optimal channel number
         """
-        bruker_hv_range = self.spectra_data[index].amplification / 1000
-        if self.hv >= bruker_hv_range:
-            return self.spectra_data[index].data.shape[0]
-        else:
+        eds_max_energy = self.spectra_data[index].amplification / 1000  # in kV
+        if hasattr(self, "hv") and (self.hv > 0) and (self.hv < eds_max_energy):
             return self.spectra_data[index].energy_to_channel(self.hv)
+        if (not hasattr(self, "hv")) or (self.hv == 0):
+            logging.warn('bcf header contains no node for electron beam '
+                         'voltage or such node is absent.\n'
+                         'Using full range of recorded channels.')
+        return self.spectra_data[index].data.shape[0]
 
     def estimate_map_depth(self, index=0, downsample=1, for_numpy=False):
         """Estimate minimal dtype of array using cumulative spectra
@@ -939,9 +950,12 @@ def parse_hypermap(self, index=None,
             skimage.measure, the parser populates reduced array by suming
             results of pixels, thus having lower memory requiriments. Default
             is 1.
-        cutoff_at_kV : None or float
-            Value in keV to truncate the array at. Helps reducing size of
-            array. Default is None.
+        cutoff_at_kV : None, float, int or str
+            Value or method to truncate the array at energy in kV. Helps reducing size of the returned
+            array. Default value is None (does not truncate). Numerical value should be in kV.
+            Two methods for automatic cutoff is available:
+              "zealous" - truncates to the last non zero channel (should not be used for stacks).
+              "auto" - truncates to hv of electron microscope (good for stacks if hv is consistent).
         lazy : bool
             It True, returns dask.array otherwise a numpy.array. Default is
             False.
@@ -953,14 +967,20 @@ def parse_hypermap(self, index=None,
         """
         if index is None:
             index = self.def_index
+
         if type(cutoff_at_kV) in (int, float):
             eds = self.header.spectra_data[index]
-            max_chan = eds.energy_to_channel(cutoff_at_kV)
-        else:
-            max_chan = self.header.estimate_map_channels(index=index)
+            n_channels = eds.energy_to_channel(cutoff_at_kV)
+        elif cutoff_at_kV == 'zealous':
+            n_channels = self.header.spectra_data[index].last_non_zero_channel() + 1
+        elif cutoff_at_kV == 'auto':
+            n_channels = self.header.get_consistent_min_channels(index=index)
+        else:  # None
+            n_channels = self.header.spectra_data[index].data.size
+
         shape = (ceil(self.header.image.height / downsample),
                  ceil(self.header.image.width / downsample),
-                 max_chan)
+                 n_channels)
         sfs_file = SFS_reader(self.filename)
         vrt_file_hand = sfs_file.get_file(
             'EDSDatabase/SpectrumData' + str(index))

diff --git a/hyperspy/tests/io/test_bruker.py b/hyperspy/tests/io/test_bruker.py
@@ -59,6 +59,20 @@ def test_load_16bit_reduced():
     assert str(hype.data.dtype)[0] == 'u'
 
 
+def test_load_16bit_cutoff_zealous():
+    filename = os.path.join(my_path, 'bruker_data', test_files[0])
+    print('testing downsampled 16bit bcf with cutoff_at_kV=zealous...')
+    hype = load(filename, cutoff_at_kV="zealous", select_type="spectrum_image")
+    assert hype.data.shape == (30, 30, 2048)
+
+
+def test_load_16bit_cutoff_auto():
+    filename = os.path.join(my_path, 'bruker_data', test_files[0])
+    print('testing downsampled 16bit bcf with cutoff_at_kV=auto...')
+    hype = load(filename, cutoff_at_kV="auto", select_type="spectrum_image")
+    assert hype.data.shape == (30, 30, 2048)
+
+
 def test_load_8bit():
     for bcffile in test_files[1:3]:
         filename = os.path.join(my_path, 'bruker_data', bcffile)

diff --git a/upcoming_changes/2898.bugfix.rst b/upcoming_changes/2898.bugfix.rst
@@ -0,0 +1 @@
+When loading Bruker Bcf, cutoff_at_kV=None does no cutoff
diff --git a/upcoming_changes/2910.api.rst b/upcoming_changes/2910.api.rst
@@ -0,0 +1,2 @@
+* when loading Bruker bcf, cutoff_at_kV=None (default) applies no more automatic cutoff.
+* New acceptable values "zealous" and "auto" do automatic cutoff.