In [1]:
import pandas as pd
import numpy as np
import h5py
file_name = "threading_try2.h5"
f = h5py.File(file_name, "a")

In [2]:
def create_list_of_datasets_in_h5file(h5_file):
    """
    Returns a list of the paths of all datasets in an h5 file.
    Can also be used to list all datasets under a given node (e.g., f['session001']).  In this case,
    the 'session001' is not included in the output paths.
    This comes from:
    https://stackoverflow.com/questions/44883175/how-to-list-all-datasets-in-h5py-file
    :param f:
    :return:
    """
    keys = []
    h5_file.visit(lambda key: keys.append(key) if type(h5_file[key]) is h5py._hl.dataset.Dataset else None)
    return keys


def create_list_of_groups_in_node(node):
    keys = []
    node.visit(lambda key: keys.append(key) if type(node[key]) is h5py._hl.group.Group else None)
    return keys

In [3]:
dsets = create_list_of_datasets_in_h5file(f['session001'])

In [4]:
dsets

['cal001/ins001/derived/absorbance_depth',
 'cal001/ins001/derived/absorbance_depth_timestamps',
 'cal001/ins001/derived/force_depth',
 'cal001/ins001/derived/force_depth_timestamps',
 'cal001/ins001/encoder/depths',
 'cal001/ins001/encoder/timestamps',
 'cal001/ins001/load_cell/forces',
 'cal001/ins001/load_cell/timestamps',
 'cal001/ins001/spectrometer1/derived/absorbances',
 'cal001/ins001/spectrometer1/spectra',
 'cal001/ins001/spectrometer1/timestamps',
 'cal001/ins001/spectrometer2/adc1s',
 'cal001/ins001/spectrometer2/adc2s',
 'cal001/ins001/spectrometer2/derived/absorbances',
 'cal001/ins001/spectrometer2/interferograms',
 'cal001/ins001/spectrometer2/opds',
 'cal001/ins001/spectrometer2/spectra',
 'cal001/ins001/spectrometer2/timestamps',
 'cal001/ins001/spectrometer2/wavenums',
 'cal001/ins002/derived/absorbance_depth',
 'cal001/ins002/derived/absorbance_depth_timestamps',
 'cal001/ins002/derived/force_depth',
 'cal001/ins002/derived/force_depth_timestamps',
 'cal001/ins002/e

In [5]:
groups = create_list_of_groups_in_node(f['session001'])

In [6]:
groups

['cal001',
 'cal001/ins001',
 'cal001/ins001/derived',
 'cal001/ins001/encoder',
 'cal001/ins001/load_cell',
 'cal001/ins001/spectrometer1',
 'cal001/ins001/spectrometer1/derived',
 'cal001/ins001/spectrometer2',
 'cal001/ins001/spectrometer2/derived',
 'cal001/ins002',
 'cal001/ins002/derived',
 'cal001/ins002/encoder',
 'cal001/ins002/load_cell',
 'cal001/ins002/spectrometer1',
 'cal001/ins002/spectrometer1/derived',
 'cal001/ins002/spectrometer2',
 'cal001/ins002/spectrometer2/derived']

In [7]:
def create_list_of_items_in_node(item_type, h5_file, node):
    keys = []
    if item_type == "group":
        my_type = h5py._hl.group.Group
    if item_type == "dataset":
        my_type = h5py._hl.dataset.Dataset
    h5_file[node].visit(lambda key: keys.append(key) if type(h5_file[node][key]) is my_type else None)
    return keys

In [8]:
groups001 = create_list_of_items_in_node("group", f, 'session001')
dsets001 = create_list_of_items_in_node("dataset", f, 'session001')

In [9]:
groups001

['cal001',
 'cal001/ins001',
 'cal001/ins001/derived',
 'cal001/ins001/encoder',
 'cal001/ins001/load_cell',
 'cal001/ins001/spectrometer1',
 'cal001/ins001/spectrometer1/derived',
 'cal001/ins001/spectrometer2',
 'cal001/ins001/spectrometer2/derived',
 'cal001/ins002',
 'cal001/ins002/derived',
 'cal001/ins002/encoder',
 'cal001/ins002/load_cell',
 'cal001/ins002/spectrometer1',
 'cal001/ins002/spectrometer1/derived',
 'cal001/ins002/spectrometer2',
 'cal001/ins002/spectrometer2/derived']

In [10]:
dsets001

['cal001/ins001/derived/absorbance_depth',
 'cal001/ins001/derived/absorbance_depth_timestamps',
 'cal001/ins001/derived/force_depth',
 'cal001/ins001/derived/force_depth_timestamps',
 'cal001/ins001/encoder/depths',
 'cal001/ins001/encoder/timestamps',
 'cal001/ins001/load_cell/forces',
 'cal001/ins001/load_cell/timestamps',
 'cal001/ins001/spectrometer1/derived/absorbances',
 'cal001/ins001/spectrometer1/spectra',
 'cal001/ins001/spectrometer1/timestamps',
 'cal001/ins001/spectrometer2/adc1s',
 'cal001/ins001/spectrometer2/adc2s',
 'cal001/ins001/spectrometer2/derived/absorbances',
 'cal001/ins001/spectrometer2/interferograms',
 'cal001/ins001/spectrometer2/opds',
 'cal001/ins001/spectrometer2/spectra',
 'cal001/ins001/spectrometer2/timestamps',
 'cal001/ins001/spectrometer2/wavenums',
 'cal001/ins002/derived/absorbance_depth',
 'cal001/ins002/derived/absorbance_depth_timestamps',
 'cal001/ins002/derived/force_depth',
 'cal001/ins002/derived/force_depth_timestamps',
 'cal001/ins002/e

In [11]:
all_groups = create_list_of_items_in_node("group", f, "/")

In [12]:
all_groups

['session001',
 'session001/cal001',
 'session001/cal001/ins001',
 'session001/cal001/ins001/derived',
 'session001/cal001/ins001/encoder',
 'session001/cal001/ins001/load_cell',
 'session001/cal001/ins001/spectrometer1',
 'session001/cal001/ins001/spectrometer1/derived',
 'session001/cal001/ins001/spectrometer2',
 'session001/cal001/ins001/spectrometer2/derived',
 'session001/cal001/ins002',
 'session001/cal001/ins002/derived',
 'session001/cal001/ins002/encoder',
 'session001/cal001/ins002/load_cell',
 'session001/cal001/ins002/spectrometer1',
 'session001/cal001/ins002/spectrometer1/derived',
 'session001/cal001/ins002/spectrometer2',
 'session001/cal001/ins002/spectrometer2/derived',
 'session002',
 'session002/cal001',
 'session002/cal001/ins001',
 'session002/cal001/ins001/derived',
 'session002/cal001/ins001/encoder',
 'session002/cal001/ins001/load_cell',
 'session002/cal001/ins001/spectrometer1',
 'session002/cal001/ins001/spectrometer1/derived',
 'session002/cal001/ins001/spe

In [13]:
def create_list_of_insertions_in_node(h5_file, node):
    insertions = []
    all_groups = create_list_of_items_in_node("group", h5_file, node)
    for group in all_groups:
        if group[-6:-3] == 'ins':
            insertions.append(group)
    return insertions


In [14]:
def create_session_insertion_dict(h5_file):
    keys = list(h5_file.keys())
    key_dict = {}
    for key in keys:
        # session_number = int(key[-3:])
        key_dict[key] = create_list_of_insertions_in_node(h5_file, key)
    print(key_dict)
    return key_dict


In [15]:
keys = list(f.keys())

In [16]:
keys

['session001',
 'session002',
 'session003',
 'session004',
 'session005',
 'session006',
 'session007',
 'session008',
 'session009',
 'session010']

In [17]:
sess_dict = create_session_insertion_dict(f)

{'session001': ['cal001/ins001', 'cal001/ins002'], 'session002': ['cal001/ins001'], 'session003': ['cal001/ins001', 'cal001/ins002', 'cal002/ins003', 'cal002/ins004'], 'session004': ['cal002/ins001', 'cal002/ins002'], 'session005': [], 'session006': ['cal001/ins001', 'cal001/ins002'], 'session007': ['cal001/ins001', 'cal001/ins002'], 'session008': ['cal001/ins001'], 'session009': ['cal001/ins001'], 'session010': ['cal001/ins001', 'cal001/ins002', 'cal001/ins003']}


In [18]:
def create_list_of_calibrations_in_node(h5_file, node):
    calibrations = []
    all_groups = create_list_of_items_in_node("group", h5_file, node)
    for group in all_groups:
        if group[-6:-3] == 'cal':
            calibrations.append(group)
    return calibrations



def create_session_calibration_dict(h5_file):
    keys = list(h5_file.keys())
    key_dict = {}
    for key in keys:
        key_dict[key] = create_list_of_calibrations_in_node(h5_file, key)
    print(key_dict)
    return key_dict


In [19]:
cal_dict = create_session_calibration_dict(f)

{'session001': ['cal001'], 'session002': ['cal001'], 'session003': ['cal001', 'cal002'], 'session004': ['cal001', 'cal002'], 'session005': ['cal001'], 'session006': ['cal001'], 'session007': ['cal001'], 'session008': ['cal001'], 'session009': ['cal001'], 'session010': ['cal001']}
