In [None]:
import pandas as pd
import numpy as np
import h5py
import datetime
import matplotlib.pyplot as plt
from scipy import interpolate
file_name = "more4/spect_test6.h5"
h5_file = h5py.File(file_name, "r")

In [2]:
def create_list_of_datasets_in_h5file(h5_file):
    """
    Returns a list of the paths of all datasets in an h5 file.
    Can also be used to list all datasets under a given node (e.g., f['session001']).  In this case,
    the 'session001' is not included in the output paths.
    This comes from:
    https://stackoverflow.com/questions/44883175/how-to-list-all-datasets-in-h5py-file
    :param f:
    :return:
    """
    keys = []
    h5_file.visit(lambda key: keys.append(key) if type(h5_file[key]) is h5py._hl.dataset.Dataset else None)
    return keys


def create_list_of_groups_in_node(node):
    keys = []
    node.visit(lambda key: keys.append(key) if type(node[key]) is h5py._hl.group.Group else None)
    return keys

def create_list_of_items_in_node(item_type, h5_file, node):
    keys = []
    if item_type == "group":
        my_type = h5py._hl.group.Group
    if item_type == "dataset":
        my_type = h5py._hl.dataset.Dataset
    h5_file[node].visit(lambda key: keys.append(key) if type(h5_file[node][key]) is my_type else None)
    return keys

def create_list_of_insertions_in_node(h5_file, node):
    insertions = []
    all_groups = create_list_of_items_in_node("group", h5_file, node)
    for group in all_groups:
        if group[-6:-3] == 'ins':
            insertions.append(group)
    return insertions

def create_session_insertion_dict(h5_file):
    keys = list(h5_file.keys())
    key_dict = {}
    for key in keys:
        # session_number = int(key[-3:])
        key_dict[key] = create_list_of_insertions_in_node(h5_file, key)
    print(key_dict)
    return key_dict

def create_list_of_calibrations_in_node(h5_file, node):
    calibrations = []
    all_groups = create_list_of_items_in_node("group", h5_file, node)
    for group in all_groups:
        if group[-6:-3] == 'cal':
            calibrations.append(group)
    return calibrations


def create_session_calibration_dict(h5_file):
    keys = list(h5_file.keys())
    key_dict = {}
    for key in keys:
        key_dict[key] = create_list_of_calibrations_in_node(h5_file, key)
    print(key_dict)
    return key_dict

def get_wavelengths_vector(h5_file, session, insertion, spectrometer):
    if spectrometer == "spectrometer1":
        vector_attr = "spec1_wavelengths_vector"
    elif spectrometer == "spectrometer2":
        vector_attr = "spec2_wavelengths_vector"
    else:
        print("spectrometer not specified")
        vector_attr = ""

    insertion_group = h5_file[f"{session}/{insertion}"]
    wavelengths_vector = insertion_group.parent.attrs[vector_attr][:]
    return wavelengths_vector

def get_absorbance_spectra_in_insertion(h5_file, session, insertion):
    absorbance_string = "spectrometer1/derived/absorbances"
    absorbance_spectra = h5_file[f"{session}/{insertion}/{absorbance_string}"][:]
    print(absorbance_spectra)
    return absorbance_spectra

def get_absorbance_spectra_and_depth_in_insertion(h5_file, session, insertion):
    dset_string = "derived/absorbance_depth"
    dset = h5_file[f"{session}/{insertion}/{dset_string}"]
    absorbance_spectra = dset[:, :-2]
    depths = dset[:, -1]
    forces = dset[:, -2]
    print(absorbance_spectra)
    return absorbance_spectra, depths, forces

def get_current_time():
    return np.datetime64(datetime.datetime.now()).astype(np.int64)


def timestamp_list_to_pandas_timestamps(timestamp_list):
    series = pd.Series(timestamp_list).astype('<M8[us]')
    return series

def find_zoomed_yvalues(wavelengths_vector, absorbance_spectrum):
    
    # get min_val, max_val of 1700 to 1800 nm
    mid_waves = (wavelengths_vector > 1700) & (wavelengths_vector < 1800)
    minval = absorbance_spectrum[mid_waves].min()
    maxval = absorbance_spectrum[mid_waves].max()
    ymin = minval - (minval * 0.01)
    ymax = maxval + (maxval * 0.01)
    return ymin, ymax

In [3]:
h5_file.keys()

<KeysViewHDF5 ['session001', 'session002', 'session003', 'session004', 'session005', 'session006', 'session007', 'session008', 'session009', 'session010', 'session011', 'session012', 'session013', 'session014', 'session015', 'session016', 'session017', 'session018', 'session019', 'session020', 'session021', 'session022', 'session023', 'session024', 'session025', 'session026', 'session027']>

In [4]:
session = h5_file['session027']

In [5]:
cal = h5_file['session027/cal001']

In [6]:
cal.attrs

<Attributes of HDF5 object at 140276225447632>

In [8]:
list(cal.attrs.items())

[('calibration_start_time', 1617097635473025),
 ('dark_spectrum',
  array([4035.  , 4089.  , 4052.25, 4047.  , 4074.25, 4092.75, 4114.25,
         4106.25, 4020.25, 4119.  , 4040.25, 4143.75, 4063.  , 4084.  ,
         4025.5 , 4083.  , 4071.75, 4035.25, 4045.75, 4046.5 , 4025.5 ,
         4113.75, 4054.75, 4064.  , 4021.5 , 4082.25, 4120.  , 4052.25,
         4091.25, 4050.25, 4059.  , 4028.  , 4054.75, 4029.5 , 3981.5 ,
         4014.25, 4014.25, 4055.25, 4043.75, 4052.75, 4007.  , 4097.  ,
         4055.  , 4057.  , 4042.75, 4082.25, 4037.5 , 4102.25, 4025.  ,
         4029.75, 4101.5 , 4098.75, 4131.25, 4088.5 , 4091.5 , 4057.25,
         4022.75, 4075.5 , 4021.  , 4034.25, 3987.  , 4029.75, 3997.  ,
         4047.  , 4054.  , 4069.25, 4046.  , 4063.75, 4072.25, 4045.25,
         4016.  , 4012.  , 4109.25, 4151.75, 4078.75, 4069.75, 4076.  ,
         4079.75, 4099.5 , 4123.25, 4002.5 , 4142.5 , 4054.5 , 4120.25,
         4071.25, 4140.  , 4024.  , 4070.25, 4005.5 , 4069.  , 4028.75

In [9]:
ins = h5_file["session027/cal001/ins001"]

In [11]:
create_list_of_items_in_node("dataset", h5_file, 'session027/cal001/ins002')

[]

In [12]:
ins.keys()

<KeysViewHDF5 ['derived']>