# Import libraries for interactively working in the notebook

In [2]:
#Base modules
import sys
import os
from datetime import datetime as dati

#Basic 3rd party packages
import h5py
import numpy as np
import json

#GUI elements
import tkinter as Tk
from tkinter import filedialog

#For working with images
from PIL import Image
from matplotlib import cm
import matplotlib.pyplot as plt
from matplotlib_scalebar.scalebar import ScaleBar, SI_LENGTH_RECIPROCAL

#for generating some random numbers
import random

In [3]:
%matplotlib notebook

# My in-out module for a few operations on Velox emd

In [4]:
import modules.io as io
import importlib


In [13]:
importlib.reload(io)

<module 'modules.io' from '/Users/nielscautaerts/Documents/Projects/BigMax/3_Software/VeloxMeta/TEMMETA/modules/io.py'>

# Creating dummy hdf5 file

In [None]:
with h5py.File("example.hdf5", "w") as nf:
    g1 = nf.create_group("Group1")
    g2 = nf.create_group("Group2/NestedGroup1")
    size = (100, 100)
    data = np.random.rand(*size)
    ds1 = g1.create_dataset("RandomImage", data = data)
    ds2 = g2.create_dataset("RandomThing", data = "Random string")
    #add some attributes
    g1.attrs["Attribute1"] = "Whatwhat"
    g1.attrs["Listatr"] = [0, 1, 2, 4]
    g2.attrs["Bla"] = 2
    g2.attrs["hmm"] = "Boo"
    ds1.attrs["What"] = "This is a dataset"
    ds2.attrs["teehee"] = "What is this"

# Visual file opening

In [6]:
f = io.open_emd_gui()

# CLI file opening

In [5]:
filename = os.path.expanduser("~/ownCloud/emdData&Reader/TestData/April2019/1520 EDS-HAADF 20190410.emd")

In [6]:
f = h5py.File(filename, 'r')

# Inspection of HDF5 node

In [7]:
help(io.scan_hdf5_node)

Help on function scan_hdf5_node in module modules.io:

scan_hdf5_node(hdf5_node, recursive: bool = True, full_path: bool = False, see_info: bool = True, tab_step: int = 4)
    Print the structure of an HDF5 node (can be root).
    
    Args:
        hdf5_node : The node to be investigated
        recursive (bool): if true it traverses all subgroups. If not it only prints top level of the current node.
        full_path (bool): print the entire group path of groups and datasets or only its name
        see_info (bool): print the array size and datatype next to datasets.
        tab_step (int): how many spaces to indent each level



In [8]:
io.scan_hdf5_node(f)

/
    Application
        Velox
             - DisplayLayout  ((1,), object)
    Data
        Image
            1c887212b79849869d14fd2b03db2481
                 - Data  ((512, 512, 1), float32)
                 - FrameLookupTable  ((1,), uint32)
                 - Metadata  ((60000, 1), uint8)
            515e962de04c4529b1e83a6c7ed565c7
                 - Data  ((512, 512, 1), float32)
                 - FrameLookupTable  ((1,), uint32)
                 - Metadata  ((60000, 1), uint8)
            84c474d76cea419399ee5eac5d9838ac
                 - Data  ((512, 512, 239), uint16)
                 - FrameLookupTable  ((239,), uint32)
                 - Metadata  ((60000, 239), uint8)
            ae260c6bc7ff47898f3321228d18ddbd
                 - Data  ((512, 512, 1), float32)
                 - FrameLookupTable  ((1,), uint32)
                 - Metadata  ((60000, 1), uint8)
        Line
             - fde442a54d7d4654b6386a1657c3629c  ((1,), object)
        Spectrum
            ac8fa

In [9]:
io.scan_hdf5_node(f["Operations"], full_path = True, recursive = False)

/Operations
    /Operations/DisplayLevelsOperation
    /Operations/EDSInputOperation
    /Operations/ImageQuantificationOperation
    /Operations/IntensityProfileOperation
    /Operations/MixOperation
     - /Operations/Operations  ((1,), object)
    /Operations/StemInputOperation


# Dealing with metadata

In [14]:
help(io.get_det_uuid)
help(io.get_meta_dict)
help(io.get_meta_dict_det_no)
help(io.print_pretty)
help(io.write_meta_json)    
help(io.read_meta_json)

Help on function get_det_uuid in module modules.io:

get_det_uuid(f: h5py._hl.files.File, sig: str, det_no: int)
    Get the UUID key for the detector signal based on a detector index number
    
    Args:
        f (h5py._hl.files.File): the HDF5 file opened with h5py.File
        sig (str):     Provide signal type (Data subgroup) as a string
                       ["Image", "Line", "Spectrum", "SpectrumImage", "Spectrumstream"]
        det_no (int):  The detector number, since they have UUID names. If not provided, turned into 0.
    
    Returns:
        str: the uuid that can be used as a key to access data and metadate

Help on function get_meta_dict in module modules.io:

get_meta_dict(f: h5py._hl.files.File, sig: str, det: str, frame: int = 0) -> dict
    General importing function of EMD metadata.
    
    Args:
        f (h5py._hl.files.File): the HDF5 file opened with h5py.File
        sig (str):     Provide signal type (Data subgroup) as a string
                       ["Ima

## Examples

In [119]:
meta_test = io.get_meta_dict_det_no(f, "Image", det_no=0, frame = 0)
meta_test2 = io.get_meta_dict_det_no(f, "Image", det_no=2, frame = 0)

In [17]:
io.print_pretty(meta_test)

{
    "Acquisition": {
        "AcquisitionDatetime": {
            "DateTime": "0"
        },
        "AcquisitionStartDatetime": {
            "DateTime": "1554902425"
        },
        "BeamType": "",
        "SourceType": "Monochromator"
    },
    "BinaryResult": {
        "AcquisitionUnit": "",
        "CompositionType": "",
        "Detector": "SuperXG1",
        "Encoding": "",
        "Offset": {
            "x": "-9.016565482561257e-009",
            "y": "-9.016565482561257e-009"
        },
        "PixelSize": {
            "height": "3.522095891625491e-011",
            "width": "3.522095891625491e-011"
        },
        "PixelUnitX": "m",
        "PixelUnitY": "m"
    },
    "Core": {
        "MetadataDefinitionVersion": "7.9",
        "MetadataSchemaVersion": "v1/2013/07",
        "guid": "00000000000000000000000000000000"
    },
    "CustomProperties": {
        "Aperture[C1].Name": {
            "type": "string",
            "value": "2000"
        },
        "Apertu

In [121]:
print(meta_test["Core"]["MetadataDefinitionVersion"])

7.9


In [21]:
#accessing and converting the data in the metadata
timestamp = int(meta_test["Acquisition"]["AcquisitionStartDatetime"]["DateTime"])
print("Timestamp: ", timestamp)
starttime = dati.fromtimestamp(timestamp)
print("DateTime: ", starttime)

Timestamp:  1554902425
DateTime:  2019-04-10 15:20:25


# Working with Data

## Data parsers

In [10]:
def get_data(f: h5py._hl.files.File, sig: str, det: str):
    """
    Returns EMD data using a signal string and detector uuid
    
    Args:
        f (h5py._hl.files.File): the HDF5 file opened with h5py.File
        sig (str):     Provide signal type (Data subgroup) as a string 
                       ["Image", "Line", "Spectrum", "SpectrumImage", "Spectrumstream"]
        det (int):  The detector index, which is translated to a UUID str.
        
    Returns:
        h5py._hl.dataset.Dataset
    """
    data=f['Data'][sig][det]['Data']
    return data

def get_image_data_det_no(f: h5py._hl.files.File, det_no: int):
    """
    Wrapper for importing EMD image data using a detector index.
    
    Args:
        f (h5py._hl.files.File): the HDF5 file opened with h5py.File
        det_no (int):  The detector index, which is translated to a UUID str.
        
    Returns:
        h5py._hl.dataset.Dataset
    """
    sig = "Image"
    det = get_det_uuid(f, sig, det_no)
    return get_data(f, sig, det)
    

    

In [142]:
get_det_uuid = io.get_det_uuid
get_data = io.get_data
get_meta_dict = io.get_meta_dict

def get_spectrum_stream_acqset(f: h5py._hl.files.File, det: str):
    s = f["Data/SpectrumStream"][det]["AcquisitionSettings"][0]
    return json.loads(s)

def get_spectrum_stream_flut(f: h5py._hl.files.File, det: str):
    s = f["Data/SpectrumStream"][det]["FrameLocationTable"][:,0]
    return s


def get_detector_property(meta: dict, prop: str, exact_match: int = False):
    #The detector name to search for
    det_name = meta["BinaryResult"]["Detector"]
    #loop over all the detectors
    for i in meta["Detectors"].keys():
        det_dic = meta["Detectors"][i]
        if exact_match:
            if det_name == det_dic["DetectorName"]:
                return det_dic[prop]
        else:
            if det_name in det_dic["DetectorName"]:
                return det_dic[prop]

from scipy.sparse import csc_matrix, csr_matrix, dok_matrix
import time

def timeit(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()        
        if 'log_time' in kw:
            name = kw.get('log_name', method.__name__.upper())
            kw['log_time'][name] = int((te - ts) * 1000)
        else:
            print('%r  %2.2f ms' %(method.__name__, (te - ts) * 1000))
        return result    
    return timed
    

def convert_stream_to_sparse(d1d: np.ndarray, dim: tuple, dv: int = 65535, compress_type: str = 'dok'):
    
    #Initialize an array with the dimensions: total size * the number of channels
    temp = dok_matrix(dim, dtype=np.int16) 
    
    #find the indexes where counts are registered (!= the counting number)
    cinx = np.argwhere(d1d!=dv)[:,0] 
    #calc the pixel index to which these counts must be mapped
    pixind = cinx - np.arange(len(cinx)) - 1 
    
    #loop over the list of counts and put them in the right bin
    for i, j in zip(cinx, pixind): 
        chan = d1d[i] #the channel number = the value stored at the index
        temp[j, chan] += 1 #increment the right entry
        
    #return the right type depending on chosen compression
    if compress_type == 'none':
        return temp.toarray()
    elif compress_type == 'dok':
        return temp
    elif compress_type == 'csc':
        return temp.tocsc()
    elif compress_type == 'csr': 
        return temp.tocsr
    else:
        raise ValueError("Not recognized compression type, should be none, dok, csc or csr")
        
    
def get_frame_limits(frm, flut):
    '''Get the first index of a frame and the index of the next frame'''
    assert isinstance(frm, int), "Must provide valid frame index"
    ix1 = flut[frm] #we will get index error if 
    try:
        ix2 = flut[frm+1]
    except IndexError: #the last index
        ix2 = None
    return ix1, ix2

def get_frame_indexes(frm, flut, totln = None):
    '''Get all indexes from a frame'''
    assert isinstance(frm, int), "Must provide valid frame index"
    ix1, ix2 = get_frame_limits(frm, flut)
    if ix2 is None: #in the last frame, the lookuptable doesn't know the final index. must be provided
        assert isinstance(totln, int), "For the last frame, the total length of the stream must be provided"
        ix2 = totln
    return np.arange(ix1, ix2)

def get_frames_indexes(frms, flut, totln = None):
    '''Get all indexes from multiple frames. Performs a simple loop and adds the arrays together'''
    inxs = np.array([])
    for i in frms:
        inxs = np.append(inxs, get_frame_indexes(frm, flut, totln = totln))
    return inxs
    
def translate_stream_frame(d: h5py._hl.dataset.Dataset, flut: np.ndarray, 
                           xs: int, ys: int, cs: int, frm: int, 
                           dv: int = 65535, compress_type: str = 'none'):
    '''
    Return a 2D array or compressed matrix representation of a frame of a spectrum stream. 
    The rows represent a pixel index, the colums represent the channel. 
    The values stored represent the counts. Generally for EDX data only a few thousand counts are registered 
    per frame, so the data is very sparse.
    
    Args:
        d (h5py._hl.dataset.Dataset) : a SpectrumStream dataset read from an emd file
        flut (numpy.ndarray) : a frame lookuptable also read from the emd file
        xs (int): size of the scanning grid in the x-direciton
        ys (int): size of the scanning grid in the y-direction
        cs (int): number of channels
        frm (int): frame number
        dv (int): the number in the data that should be interpreted as a counter. Default = 65535
        compress_type (str): the type of compression
        
    Returns:
        numpy.ndarray or scipy.sparse.csc_matrix or scipy.sparse.csr_matrix or scipy.sparse.dok_matrix
    '''
    
    ix1, ix2 = get_frame_limits(frm, flut)
    
    #query the frame from the long spectrumstream
    d1d = d[ix1:ix2].flatten()
    
    temp = convert_stream_to_sparse(d1d, (xs*ys, cs), dv = dv, compress_type = compress_type)
    
    return temp
    

import concurrent.futures as cf

@timeit
def get_spectrum_stream_frames(f: h5py._hl.files.File, det_no: int = 0, 
                               frames: list = [], re_all = False, one_matrix = True, compress_type = "dok"):
    '''
    
    '''
    sig = "SpectrumStream"
    #get the uuid of the dataset based on the index det_no
    det = get_det_uuid(f, sig, det_no)
    #get the data
    d = get_data(f, sig, det)
    #get the corresponding metadata
    #Do this with dictionaries instead
    md1 = get_meta_dict(f, sig, det, frame = 0)
    md2 = get_meta_dict(f, sig, det, frame = 1)
    #get the acquisition parameters
    acq = get_spectrum_stream_acqset(f, det)
    #get the frame table
    flut = get_spectrum_stream_flut(f, det)
    #get a few commonly used vars
    chan = int(acq["bincount"]) #number of channels
    disp = float(get_detector_property(md1, "Dispersion")) #number of eV per channel
    
    xs = int(acq['RasterScanDefinition']['Width']) 
    ys = int(acq['RasterScanDefinition']['Height']) 
    
    #return one sparse matrix containing the entire stream
    if one_matrix:
        if frames: #where indexes of frames provided?
            inxs = get_frames_indexes(frames, flut, totln = d.len())
            dld = d[inxs]
            frame_dimension = len(frames)
            
        else: #no then do the whole array
            d1d = d[:].flatten()
            frame_dimension = len(flut)
            
        specstr = convert_stream_to_sparse(d1d, (xs*ys*frame_dimension, chan), compress_type = compress_type)
    
    #return a list of sparse matrices, each one for a frame
    else:
        if frames: #there are elements in frames
            loopover = frames
        else: #there are no elements in frames, loop over all
            loopover = range(len(flut))

        specstr = []

        with cf.ThreadPoolExecutor() as executor: #perform with threading
            #create the threads list of translate stream for all frames in loopover
            results = [executor.submit(translate_stream_frame, d, flut, xs, ys, 
                                       cs = chan, frm = i, compress_type = compress_type) for i in loopover]
            #When completed, add the output from translate stream to the list
            for f in cf.as_completed(results):
                specstr.append(f.result())

        #without threading
        #for i in loopover:
            #frmmat = translate_stream_frame(d, flut, xs, ys, cs = chan, frm = i, compress_type = "csc")
            #specstr.append(frmmat)
    
    specstr_obj = specstr
        
    if re_all:
        return specstr_obj, d, md1, md2, flut
    else:
        return specstr_obj
    



In [135]:
d.len()

63401928

In [124]:
duuid = get_det_uuid(f, "SpectrumStream", 0)
get_spectrum_stream_acqset(f, duuid)

{'encoding': 'uint16',
 'bincount': '4096',
 'StreamEncoding': 'uint16',
 'Size': '1048576',
 'RasterScanDefinition': {'Width': '512', 'Height': '512'}}

In [143]:
#d, md1, md2, flut = get_spectrum_stream_frames(f, 0)
allframes = get_spectrum_stream_frames(f, one_matrix = True)

'get_spectrum_stream_frames'  18258.94 ms


In [146]:
sys.getsizeof(allframes)/1000000

41.943168

In [None]:
class SpectrumStream(object):
    
    def __init__(self, lst, xs, ys):
        self._lst = lst
        self.xs = xs
        self.ys = ys
    
    def __getitem__(x, y, channels, frames):
        f = self._lst[frames]
        inx = self._get_inx_from_xy(x, y)
        for i in f
        
    
    @property
    def tolist(self):
        return self._lst
    
    def get_frame_sum(self, comp_type: str = "none"):
        for i in self._lst:
            temp += i
        #return the right type depending on chosen compression
        if compress_type == 'none':
            return temp.toarray()
        elif compress_type == 'dok':
            return temp
        elif compress_type == 'csc':
            return temp.tocsc()
        elif compress_type == 'csr': 
            return temp.tocsr
        else:
            raise ValueError("Not recognized compression type, should be none, dok, csc or csr")
    
    def _get_xy_from_inx(self, inx: np.ndarray):
        assert np.max(inx) < self.xs*self.ys, "An index is out of range"
        inx_ar = np.array(inx)
        return (inx_ar%self.xs, (inx_ar/self.xs).astype(int))

    def _get_inx_from_xy(self, x, y):
        assert np.max(x) < self.xs, "An x-index is outside the image range"
        assert np.max(y) < self.ys, "A y-index is outside the image range"
        X, Y = np.meshgrid(x,y)
        Xf = X.ravel()
        Yf = Y.ravel()
        return Yf*self.xs+Xf

In [113]:
x, y = get_xy_from_inx(np.arange(512*512), 512, 512)

In [41]:
io.print_pretty(md)

{
    "Acquisition": {
        "AcquisitionDatetime": {
            "DateTime": "0"
        },
        "AcquisitionStartDatetime": {
            "DateTime": "1554902425"
        },
        "BeamType": "",
        "SourceType": "Monochromator"
    },
    "BinaryResult": {
        "AcquisitionUnit": "",
        "CompositionType": "",
        "Detector": "SuperXG1",
        "Encoding": "",
        "Offset": {
            "x": "-9.016565482561257e-009",
            "y": "-9.016565482561257e-009"
        },
        "PixelSize": {
            "height": "3.522095891625491e-011",
            "width": "3.522095891625491e-011"
        },
        "PixelUnitX": "m",
        "PixelUnitY": "m"
    },
    "Core": {
        "MetadataDefinitionVersion": "7.9",
        "MetadataSchemaVersion": "v1/2013/07",
        "guid": "00000000000000000000000000000000"
    },
    "CustomProperties": {
        "Aperture[C1].Name": {
            "type": "string",
            "value": "2000"
        },
        "Apertu

## Data plotters and exporters

#### Images

In [None]:
def save_single_image(imgdata: np.ndarray, filename:str , metadata: dict, 
               scale_bar: bool = True, show_fig: bool = False, dpi: int = 100, save_meta: bool = True,
               sb_settings = {"location":'lower right', "color" : 'k', "length_fraction" : 0.15}, **kwargs):
    
    #initialize the figure and axes objects
    fig = plt.figure(frameon=False, figsize = (imgdata.shape[0]/dpi, imgdata.shape[1]/dpi))
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax.set_axis_off()
    fig.add_axes(ax)
    #plot the figure on the axes
    s = ax.imshow(imdat, **kwargs)
    
    if scale_bar:
        #get scale bar info from metadata
        px=(float(metadata["BinaryResult"]["PixelSize"]["width"]))
        unit=metadata["BinaryResult"]["PixelUnitX"]
        #check the units and adjust sb accordingly
        if unit=='1/m':
            px=px*10**(-9)
            scalebar = ScaleBar(px, '1/nm', SI_LENGTH_RECIPROCAL, **sb_settings)
        else:
            scalebar = ScaleBar(px, unit, **sb_settings)
        plt.gca().add_artist(scalebar)
    #save the figure
    plt.savefig(filename, dpi = dpi)
    
    if show_fig:
        plt.show()
    else:
        plt.close()
    
    if save_meta:
        #if metadata save the metadata to a json file with the same name
        path, ext = os.path.splitext(filename)
        write_meta_json(path+".json", metadata)
    
    

In [None]:
imdat = get_image_data_det_no(f, 0)[:,:,0]
metdat = get_meta_dict_det_no(f, "Image", 0)
save_single_image(imdat, "testfigure.tiff", metdat, save_meta = True, cmap = "plasma")

In [None]:
#does not work
def load_image_tiff(filename: str, load_meta: bool = True):
    with Image.open(filename) as tif:
        data = tif
        metadata = None
        if load_meta:
            path, ext = os.path.splitext(filename)
            metadata = read_meta_json(path+".json")
        return data, metadata
        #except:
        #    return data

In [None]:
ima, met = load_image_tiff("testfigure.tiff")

#### Spectra

# Hyperspy parsers

In [None]:
#Higher level 3rd party packages
import numba as nb
import hyperspy.api as hs


In [None]:
fhs = hs.load(filename)

In [None]:
fhs

In [None]:
#hyperspy interpreted metadata
help(type(fhs[5]))

In [None]:
fhs[5].original_metadata.Acquisition