In [1]:
import pandas as pd
import numpy as np
import h5py
import datetime
import math
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import seaborn as sns
from scipy.signal import savgol_filter
plt.style.use('seaborn')
%matplotlib widget
from mpl_toolkits.mplot3d import Axes3D

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import scale
from sklearn import model_selection
from sklearn import linear_model

In [10]:
def get_calibration_temperature(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal_temp = h5_file[calibration_path].attrs['ftir_temperature']
        return cal_temp
    
def get_insertion_temperature(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_temp = h5_file[insertion_path].attrs['ftir_temp']
        return ins_temp
    
def get_nirone_insertion_temperature(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_temp = h5_file[insertion_path].attrs['nirone_temp']
        return ins_temp
    
def get_visible_white_calibration_curve(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal = h5_file[calibration_path].attrs['white_spectrum'][:]
        return cal
    
def get_ftir_white_calibration_curve(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal = h5_file[calibration_path].attrs['white_spectrum2'][:]
        return cal
    
def get_visible_wavelength_vector(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        waves = h5_file[calibration_path].attrs['spec1_wavelengths_vector'][:]
        return waves
    
def get_ftir_wavelength_vector(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        waves = h5_file[calibration_path].attrs['spec2_wavelengths_vector'][:]
        return waves
    
def get_ftir_insertion_absorbances(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_abs = h5_file[f'{insertion_path}/spectrometer2/derived/absorbances'][:]
        return ins_abs
    
def get_ftir_insertion_raw_spectra(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_raw = h5_file[f'{insertion_path}/spectrometer2/spectra'][:]
        return ins_raw
    
def get_ftir_insertion_timestamps(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        timestamps = h5_file[f'{insertion_path}/spectrometer2/timestamps'][:]
        return timestamps
    
def get_visible_insertion_timestamps(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        timestamps = h5_file[f'{insertion_path}/spectrometer1/timestamps'][:]
        return timestamps
    
def get_visible_insertion_absorbances(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_abs = h5_file[f'{insertion_path}/spectrometer1/derived/absorbances'][:]
        return ins_abs
    
def get_visible_insertion_raw_spectra(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_raw = h5_file[f'{insertion_path}/spectrometer1/spectra'][:]
        return ins_raw
    
def create_list_of_items_in_node(item_type, file, node):
    with h5py.File(file, 'r') as h5_file:
        keys = []
        if item_type == "group":
            my_type = h5py._hl.group.Group
        if item_type == "dataset":
            my_type = h5py._hl.dataset.Dataset
        h5_file[node].visit(lambda key: keys.append(key) if type(h5_file[node][key]) is my_type else None)
        return keys

def create_list_of_calibrations_in_node(file, node):
    calibrations = []
    all_groups = create_list_of_items_in_node("group", file, node)
    for group in all_groups:
        if group[-6:-3] == 'cal':
            calibrations.append(node + '/' + group)
    return calibrations

def create_list_of_insertions_in_calibration(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        insertion_keys = list(h5_file[calibration_path].keys())
        insertions = [f'{calibration_path}/{key}' for key in insertion_keys]            
        return insertions
    
def select_by_depth_range(df, range_start, range_end):
    df_out = df.loc[(df['depth'] > range_start) & (df['depth'] < range_end)]
    return df_out

def calculate_absorbance_from_raw(raw_spectrum, white_spectrum, dark_spectrum):
    reflectance = ((raw_spectrum - dark_spectrum) / (white_spectrum - dark_spectrum))
    inverse_reflectance = 1/reflectance
    absorbance = np.log10(inverse_reflectance.astype(np.float64))
    return absorbance

def calculate_absorbance_for_2D_array(array, white_spectrum, dark_spectrum):
    absorbance_array = np.empty_like(array, dtype=np.float64)
    for i in range(array.shape[0]):
        absorbance_spectrum = calculate_absorbance_from_raw(array[i, :], white_spectrum, dark_spectrum)
        absorbance_array[i, :] = absorbance_spectrum
    return absorbance_array

def construct_full_file_path(data_path, file_name):
    file_path = data_path + file_name
    return file_path

def create_list_of_sessions_in_file(file_name):
    sessions = []
    all_groups = create_list_of_items_in_node("group", file_name, "/")
    for group in all_groups:
        if (group[0:3] == 'ses') and (len(group) == 10):
            sessions.append(group)
    return sessions

def create_list_of_insertions_in_file(file_name):
    insertions = []
    sessions = create_list_of_sessions_in_file(file_name)
    for session in sessions:
        calibrations = create_list_of_calibrations_in_node(file_name, session)
        for calibration in calibrations:
            cal_insertions = create_list_of_insertions_in_calibration(file_name, calibration)
            for insertion in cal_insertions:
                insertions.append(insertion)
    return insertions
            
def get_insertion_timestamp(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_time = h5_file[insertion_path].attrs['start_time']
        ins_timestamp = pd.Timestamp(ins_time, unit='us')
        return ins_timestamp
    
def get_calibration_timestamp(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal_time = h5_file[calibration_path].attrs['calibration_start_time']
        cal_timestamp = pd.Timestamp(cal_time, unit='us')
        return cal_timestamp  
    
def find_position_in_wavelength_vector(wavelength_vector, integer):
    position = np.where(np.isclose(wavelength_vector, integer, 1e-3))[0][0]
    return position

def normalize(value, max_value, min_value):
    normalized_value = (value - min_value)/(max_value - min_value)
    return normalized_value

def get_ftir_dark_calibration_curve(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal = h5_file[calibration_path].attrs['dark_spectrum2'][:]
        return cal
    
def get_visible_dark_calibration_curve(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal = h5_file[calibration_path].attrs['dark_spectrum'][:]
        return cal
        
def get_ftir_spectrum_timestamp(file, insertion_path, index):
    with h5py.File(file, 'r') as h5_file:
        time = h5_file[f'{insertion_path}/spectrometer2/timestamps'][index]
        timestamp = pd.Timestamp(time, unit='us')
        return timestamp
    
def get_visible_spectrum_timestamp(file, insertion_path, index):
    with h5py.File(file, 'r') as h5_file:
        time = h5_file[f'{insertion_path}/spectrometer1/timestamps'][index]
        timestamp = pd.Timestamp(time, unit='us')
        return timestamp
    
def compute_3D_distance(x1, y1, z1, x2, y2, z2):
    distance = math.sqrt((x1 - x2)**2 + (y1 - y2)**2 + (z1 - z2)**2)
    return distance

In [3]:
path_name = "/Users/linda/OneDrive/Documents/S4_mine_p/Projects/Data_collected/"
df = pd.read_csv('data/white_insertions_nirone_210502.csv')

In [5]:
df_columns = list(df.columns.values)
df_columns.append('cal_time_0')
df_columns.append('temperature')
info_df = pd.DataFrame(columns=df_columns)
timestamps_df = pd.DataFrame(columns=['timestamp'])

In [9]:
path_name = "/Users/linda/OneDrive/Documents/S4_mine_p/Projects/Data_collected/"
file_name = "210502_nirone_tests/nirone_testing_210502.h5"
file = path_name + '/' + file_name
with h5py.File(file, 'r') as h5_file:
    print(h5_file['session001/cal002/ins001'].attrs.keys())

<KeysViewHDF5 ['end_time', 'nirone_temp', 'start_time']>


In [22]:
path_name = "/Users/linda/OneDrive/Documents/S4_mine_p/Projects/Data_collected/"
df = pd.read_csv('data/white_insertions_nirone_210502.csv')
df_columns = list(df.columns.values)
df_columns.append('cal_time_0')
df_columns.append('temperature')
info_df = pd.DataFrame(columns=df_columns)
timestamps_df = pd.DataFrame(columns=['timestamp'])
# columns is calculated from the length of the wavelength vector
columns = np.arange(0, 401, 1)
spectra = pd.DataFrame(columns=columns)
# each i represents an insertion
for i in range(df.shape[0]):
    row_file = df['file_name'][i]
    file = construct_full_file_path(path_name, row_file)    
    calibration_path = df['session'][i] + "/" + df['calibration'][i]
    calibration_insertions = create_list_of_insertions_in_calibration(file, calibration_path)    
    calibration_first_timestamp = get_visible_spectrum_timestamp(file, calibration_insertions[0], 0)
    insertion_path = calibration_path + "/" + df['insertion'][i]
    insertion_temperature = get_nirone_insertion_temperature(file, insertion_path)
    # raw_spectra and timestamps have many spectra and timestamps per insertion
    raw_spectra = pd.DataFrame(get_visible_insertion_raw_spectra(file, insertion_path))    
    spectra = pd.concat([spectra, raw_spectra], axis=0, ignore_index=True)
    ts_array = get_visible_insertion_timestamps(file, insertion_path)
    timestamps = pd.DataFrame(pd.to_datetime(ts_array, unit='us'), columns=['timestamp'])
    timestamps_df = pd.concat([timestamps_df, timestamps], axis=0, ignore_index=True)
    # info and temperature will be the same for every spectrum in insertion
    info_row = df.iloc[i:i+1, :].copy()
    info_row['cal_time_0'] = calibration_first_timestamp
    info_row['temperature'] = insertion_temperature
    # each j represents a spectrum; the info is duplicated for each spectrum
    for j in range(raw_spectra.shape[0]):
        info_df = pd.concat([info_df, info_row], axis=0, ignore_index=True)
        
print(info_df.shape)
print(timestamps_df.shape)
print(spectra.shape)
spectra_df = pd.concat([info_df, timestamps_df, spectra], axis=1)
print(spectra_df.shape)


(2210, 11)
(2210, 1)
(2210, 401)
(2210, 413)


In [15]:
print(file)

/Users/linda/OneDrive/Documents/S4_mine_p/Projects/Data_collected/210410_nirone_vibration/nirone_vibration_210410.h5


In [None]:
with h5py.File(file, 'r') as h5_file:
    print()

In [31]:

unique_cals = info_df['c_unique'].unique()
for i in range(unique_cals.shape[0]):
    row_file = info_df['file_name'][i]
    file = construct_full_file_path(path_name, row_file)
    calibration_path = info_df['session'][i] + "/" + info_df['calibration'][i]
    calibration_insertions = create_list_of_insertions_in_calibration(file, calibration_path)
    
    for insertion in calibration_insertions[0:1]:
        insertion_path = calibration_path + "/" + insertion
        print(insertion_path)
    
    

session001/cal001/session001/cal001/ins001
session001/cal001/session001/cal001/ins001
session001/cal001/session001/cal001/ins001
session001/cal001/session001/cal001/ins001
session001/cal001/session001/cal001/ins001


In [23]:
spectra_df.iloc[0, 12:].shape

(401,)

In [26]:
info_df.head()

Unnamed: 0,file_name,date,session,calibration,insertion,c_unique,i_in_c,i_unique,conditions,cal_time_0,temperature
0,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
1,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
2,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
3,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
4,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275


In [27]:
unique_cals = info_df['c_unique'].unique()

In [28]:
unique_cals

array(['c001', 'c002', 'c003', 'c004', 'c005'], dtype=object)

In [45]:
unique_cals = info_df['c_unique'].unique()
for i in range(unique_cals.shape[0]):
    cal_df = info_df.loc[info_df['c_unique'] == unique_cals[i,]]
    cal_ins_unique

    for j in range()
    print(cal_df['i_in_c'].unique())
    # for insertion in cal_df['i_in_c']:
        # print(insertion)
                
    

['i01' 'i02']
['i01' 'i02' 'i03' 'i04' 'i05' 'i06' 'i07' 'i08']
['i01' 'i02' 'i03' 'i04' 'i05' 'i06' 'i07' 'i08']
['i01' 'i02' 'i03' 'i04' 'i05' 'i06' 'i07']
['i01' 'i02' 'i03' 'i04' 'i05' 'i06' 'i07' 'i08' 'i09' 'i10' 'i11' 'i12'
 'i13' 'i14' 'i15']


In [38]:
cals_df = info_df.groupby('c_unique').first()

In [39]:
cals_df

Unnamed: 0_level_0,file_name,date,session,calibration,insertion,i_in_c,i_unique,conditions,cal_time_0,temperature
c_unique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
c001,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
c002,210331_nirone_tests/nirone_test4.h5,3/30/21,session003,cal002,ins001,i01,c002i01,general,2021-03-31 11:18:27.591060,18.142696
c003,210410_nirone_vibration/nirone_vibration_21041...,4/10/21,session001,cal001,ins001,i01,c003i01,moving,2021-04-10 11:37:28.153518,25.865803
c004,210410_nirone_vibration/nirone_vibration_21041...,4/11/21,session001,cal002,ins009,i01,c004i01,general,2021-04-10 11:53:46.870994,28.518883
c005,210502_nirone_tests/nirone_testing_210502.h5,5/2/21,session001,cal002,ins001,i01,c005i01,sunshine,2021-05-02 15:37:04.314647,32.933575


In [40]:
unique_cals[i,]

'c005'

In [44]:
info_df.shape

(2210, 11)

In [46]:
info_df.head(10)

Unnamed: 0,file_name,date,session,calibration,insertion,c_unique,i_in_c,i_unique,conditions,cal_time_0,temperature
0,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
1,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
2,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
3,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
4,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
5,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
6,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
7,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
8,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275
9,210330_nirone_tests/nirone_test1.h5,3/30/21,session001,cal001,ins001,c001,i01,c001i01,general,2021-03-30 14:28:57.692793,22.340275


In [60]:
path_name = "/Users/linda/OneDrive/Documents/S4_mine_p/Projects/Data_collected/"
unique_cals = info_df['c_unique'].unique()
print(unique_cals)
for i in range(unique_cals[:2].shape[0]):
    cal_df = info_df.loc[info_df['c_unique'] == unique_cals[i,]].copy()
    cal_ins = cal_df['i_unique'].unique()
    file_name = cal_df.iloc[i, 0]
    session = cal_df.iloc[i, 2]
    cal = cal_df.iloc[i, 3]
    file = path_name + "/" + file_name
    calibration_path = session + "/" + cal    
    waves = get_visible_wavelength_vector(file, calibration_path)
    for j in range(cal_ins[:2].shape[0]):
        ins_df = cal_df.loc[cal_df['i_unique'] == cal_ins[j,]].copy()
        ins = ins_df.iloc[i, 4]
        insertion_path = calibration_path + "/" + ins
        print(insertion_path)
        absorbances = get_visible_insertion_absorbances(file, insertion_path)
        fig, ax = plt.subplots()
        for 
        
        
    
    

['c001' 'c002' 'c003' 'c004' 'c005']
session001/cal001/ins001
session001/cal001/ins002
session003/cal002/ins001
session003/cal002/ins002


In [58]:
unique_cals[:2]

array(['c001', 'c002'], dtype=object)