In [1]:
import pandas as pd
import numpy as np
import h5py
import datetime
import matplotlib.pyplot as plt
from scipy import interpolate
file_name = "more/connect_two_spectrometers.h5"
h5_file = h5py.File(file_name, "r")

In [2]:
def create_list_of_datasets_in_h5file(h5_file):
    """
    Returns a list of the paths of all datasets in an h5 file.
    Can also be used to list all datasets under a given node (e.g., f['session001']).  In this case,
    the 'session001' is not included in the output paths.
    This comes from:
    https://stackoverflow.com/questions/44883175/how-to-list-all-datasets-in-h5py-file
    :param f:
    :return:
    """
    keys = []
    h5_file.visit(lambda key: keys.append(key) if type(h5_file[key]) is h5py._hl.dataset.Dataset else None)
    return keys


def create_list_of_groups_in_node(node):
    keys = []
    node.visit(lambda key: keys.append(key) if type(node[key]) is h5py._hl.group.Group else None)
    return keys

def create_list_of_items_in_node(item_type, h5_file, node):
    keys = []
    if item_type == "group":
        my_type = h5py._hl.group.Group
    if item_type == "dataset":
        my_type = h5py._hl.dataset.Dataset
    h5_file[node].visit(lambda key: keys.append(key) if type(h5_file[node][key]) is my_type else None)
    return keys

def create_list_of_insertions_in_node(h5_file, node):
    insertions = []
    all_groups = create_list_of_items_in_node("group", h5_file, node)
    for group in all_groups:
        if group[-6:-3] == 'ins':
            insertions.append(group)
    return insertions

def create_session_insertion_dict(h5_file):
    keys = list(h5_file.keys())
    key_dict = {}
    for key in keys:
        # session_number = int(key[-3:])
        key_dict[key] = create_list_of_insertions_in_node(h5_file, key)
    print(key_dict)
    return key_dict

def create_list_of_calibrations_in_node(h5_file, node):
    calibrations = []
    all_groups = create_list_of_items_in_node("group", h5_file, node)
    for group in all_groups:
        if group[-6:-3] == 'cal':
            calibrations.append(group)
    return calibrations


def create_session_calibration_dict(h5_file):
    keys = list(h5_file.keys())
    key_dict = {}
    for key in keys:
        key_dict[key] = create_list_of_calibrations_in_node(h5_file, key)
    print(key_dict)
    return key_dict

def get_wavelengths_vector(h5_file, session, insertion, spectrometer):
    if spectrometer == "spectrometer1":
        vector_attr = "spec1_wavelengths_vector"
    elif spectrometer == "spectrometer2":
        vector_attr = "spec2_wavelengths_vector"
    else:
        print("spectrometer not specified")
        vector_attr = ""

    insertion_group = h5_file[f"{session}/{insertion}"]
    wavelengths_vector = insertion_group.parent.attrs[vector_attr][:]
    return wavelengths_vector

def get_absorbance_spectra_in_insertion(h5_file, session, insertion):
    absorbance_string = "spectrometer1/derived/absorbances"
    absorbance_spectra = h5_file[f"{session}/{insertion}/{absorbance_string}"][:]
    print(absorbance_spectra)
    return absorbance_spectra

def get_absorbance_spectra_and_depth_in_insertion(h5_file, session, insertion):
    dset_string = "derived/absorbance_depth"
    dset = h5_file[f"{session}/{insertion}/{dset_string}"]
    absorbance_spectra = dset[:, :-2]
    depths = dset[:, -1]
    forces = dset[:, -2]
    print(absorbance_spectra)
    return absorbance_spectra, depths, forces

def get_current_time():
    return np.datetime64(datetime.datetime.now()).astype(np.int64)


def timestamp_list_to_pandas_timestamps(timestamp_list):
    series = pd.Series(timestamp_list).astype('<M8[us]')
    return series

def find_zoomed_yvalues(wavelengths_vector, absorbance_spectrum):
    
    # get min_val, max_val of 1700 to 1800 nm
    mid_waves = (wavelengths_vector > 1700) & (wavelengths_vector < 1800)
    minval = absorbance_spectrum[mid_waves].min()
    maxval = absorbance_spectrum[mid_waves].max()
    ymin = minval - (minval * 0.005)
    ymax = maxval + (maxval * 0.005)
    return ymin, ymax

In [3]:
create_list_of_datasets_in_h5file(h5_file)

['session002/cal001/ins001/derived/absorbance_depth',
 'session002/cal001/ins001/derived/absorbance_depth_timestamps',
 'session002/cal001/ins001/derived/depth_increment_data',
 'session002/cal001/ins001/derived/force_depth',
 'session002/cal001/ins001/derived/force_depth_timestamps',
 'session002/cal001/ins001/encoder/depths',
 'session002/cal001/ins001/encoder/timestamps',
 'session002/cal001/ins001/load_cell/forces',
 'session002/cal001/ins001/load_cell/timestamps',
 'session002/cal001/ins001/spectrometer1/derived/absorbances',
 'session002/cal001/ins001/spectrometer1/spectra',
 'session002/cal001/ins001/spectrometer1/timestamps',
 'session002/cal001/ins002/derived/absorbance_depth',
 'session002/cal001/ins002/derived/absorbance_depth_timestamps',
 'session002/cal001/ins002/derived/depth_increment_data',
 'session002/cal001/ins002/derived/force_depth',
 'session002/cal001/ins002/derived/force_depth_timestamps',
 'session002/cal001/ins002/encoder/depths',
 'session002/cal001/ins002/e

In [4]:
h5_file['session006/cal001'].attrs.keys()

<KeysViewHDF5 ['calibration_start_time', 'dark_spectrum', 'dark_spectrum2', 'ftir_amplitude', 'ftir_calibration_cycles', 'ftir_calibration_gain', 'ftir_calibration_measure_mode', 'ftir_cycles', 'ftir_direction', 'ftir_frequency', 'ftir_gain', 'ftir_integrate_mode', 'ftir_measure_mode', 'ftir_temperature', 'ftir_wavenums_count', 'ftir_zero_fill', 'spec1_wavelengths_vector', 'spec2_wavelengths_vector', 'vis_capture_mode', 'vis_cycle_time', 'vis_data_count', 'vis_data_transmit', 'vis_exposure_seconds', 'vis_exposure_time', 'vis_gain_mode', 'vis_sensor_gain_mode', 'white_spectrum', 'white_spectrum2']>

In [5]:
h5_file['session006/cal001'].attrs['dark_spectrum2'][:]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [7]:
h5_file['session006/cal001/ins001'].parent.attrs['dark_spectrum']

array([3959.5 , 4018.25, 3984.25, 3974.25, 4001.75, 4002.25, 4022.5 ,
       4042.25, 3926.75, 3977.75, 3933.25, 4041.25, 3960.  , 4062.75,
       3954.25, 4028.5 , 3938.  , 4006.75, 3921.5 , 3968.5 , 3936.  ,
       4053.25, 3954.75, 3952.75, 3976.  , 3999.75, 4062.  , 4011.5 ,
       3952.  , 4020.25, 3959.25, 4004.25, 4031.75, 3981.  , 3915.5 ,
       3956.75, 3936.25, 3976.5 , 3989.  , 3993.75, 3918.  , 4035.5 ,
       4047.75, 4018.25, 3963.  , 3980.25, 3955.75, 4063.5 , 3946.75,
       4019.5 , 4027.  , 4031.75, 4058.75, 4085.25, 4091.  , 4047.25,
       3998.75, 3981.5 , 3945.75, 3994.75, 3977.75, 4000.5 , 3919.5 ,
       3981.  , 3970.25, 3969.25, 3984.  , 4021.  , 4054.5 , 4011.5 ,
       4050.75, 3967.  , 4060.  , 4086.  , 4003.  , 4097.25, 4044.75,
       4057.75, 4085.75, 4160.25, 3973.25, 4063.5 , 4036.  , 4146.5 ,
       4059.5 , 4090.25, 4008.  , 4090.5 , 4012.25, 4072.25, 3959.75,
       4029.  , 3964.75, 4082.5 , 3969.25, 4003.  , 3917.75, 4024.75,
       3986.5 , 4013

In [10]:
a1 = np.array([1, 2, 3])
a2 = np.array([4, 5, 6])
np.concatenate([a1, a2])

array([1, 2, 3, 4, 5, 6])

In [14]:
list(h5_file['session001'].attrs)

['encoder',
 'encoder_host',
 'encoder_tags',
 'field_workers',
 'load_cell',
 'load_cell_port',
 'load_cell_repetitions',
 'session_date',
 'session_location',
 'session_name',
 'session_notes',
 'session_number',
 'spectrometer1',
 'spectrometer2']

In [15]:
h5_file['session001'].attrs['spectrometer2']

'None'

In [16]:
a1.max()

3