In [1]:
import pandas as pd
import numpy as np
import h5py
import datetime
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import seaborn as sns
from scipy.signal import savgol_filter
plt.style.use('seaborn')
%matplotlib widget
from mpl_toolkits.mplot3d import Axes3D

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import scale
from sklearn import model_selection
from sklearn import linear_model

In [2]:
def get_calibration_temperature(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal_temp = h5_file[calibration_path].attrs['ftir_temperature']
        return cal_temp
    
def get_insertion_temperature(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_temp = h5_file[insertion_path].attrs['ftir_temp']
        return ins_temp
    
def get_visible_white_calibration_curve(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal = h5_file[calibration_path].attrs['white_spectrum'][:]
        return cal
    
def get_ftir_white_calibration_curve(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal = h5_file[calibration_path].attrs['white_spectrum2'][:]
        return cal
    
def get_visible_wavelength_vector(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        waves = h5_file[calibration_path].attrs['spec1_wavelengths_vector'][:]
        return waves
    
def get_ftir_wavelength_vector(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        waves = h5_file[calibration_path].attrs['spec2_wavelengths_vector'][:]
        return waves
    
def get_ftir_insertion_absorbances(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_abs = h5_file[f'{insertion_path}/spectrometer2/derived/absorbances'][:]
        return ins_abs
    
def get_ftir_insertion_raw_spectra(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_raw = h5_file[f'{insertion_path}/spectrometer2/spectra'][:]
        return ins_raw
    
def get_ftir_insertion_timestamps(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        timestamps = h5_file[f'{insertion_path}/spectrometer2/timestamps'][:]
        return timestamps
    
def get_visible_insertion_absorbances(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_abs = h5_file[f'{insertion_path}/spectrometer1/derived/absorbances'][:]
        return ins_abs
    
def get_visible_insertion_raw_spectra(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_raw = h5_file[f'{insertion_path}/spectrometer1/spectra'][:]
        return ins_raw
    
def create_list_of_items_in_node(item_type, file, node):
    with h5py.File(file, 'r') as h5_file:
        keys = []
        if item_type == "group":
            my_type = h5py._hl.group.Group
        if item_type == "dataset":
            my_type = h5py._hl.dataset.Dataset
        h5_file[node].visit(lambda key: keys.append(key) if type(h5_file[node][key]) is my_type else None)
        return keys

def create_list_of_calibrations_in_node(file, node):
    calibrations = []
    all_groups = create_list_of_items_in_node("group", file, node)
    for group in all_groups:
        if group[-6:-3] == 'cal':
            calibrations.append(node + '/' + group)
    return calibrations

def create_list_of_insertions_in_calibration(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        insertion_keys = list(h5_file[calibration_path].keys())
        insertions = [f'{calibration_path}/{key}' for key in insertion_keys]            
        return insertions
    
def select_by_depth_range(df, range_start, range_end):
    df_out = df.loc[(df['depth'] > range_start) & (df['depth'] < range_end)]
    return df_out

def calculate_absorbance_from_raw(raw_spectrum, white_spectrum, dark_spectrum):
    reflectance = ((raw_spectrum - dark_spectrum) / (white_spectrum - dark_spectrum))
    inverse_reflectance = 1/reflectance
    absorbance = np.log10(inverse_reflectance.astype(np.float64))
    return absorbance

def calculate_absorbance_for_2D_array(array, white_spectrum, dark_spectrum):
    absorbance_array = np.empty_like(array, dtype=np.float64)
    for i in range(array.shape[0]):
        absorbance_spectrum = calculate_absorbance_from_raw(array[i, :], white_spectrum, dark_spectrum)
        absorbance_array[i, :] = absorbance_spectrum
    return absorbance_array

def construct_full_file_path(data_path, file_name):
    file_path = data_path + file_name
    return file_path

def create_list_of_sessions_in_file(file_name):
    sessions = []
    all_groups = create_list_of_items_in_node("group", file_name, "/")
    for group in all_groups:
        if (group[0:3] == 'ses') and (len(group) == 10):
            sessions.append(group)
    return sessions

def create_list_of_insertions_in_file(file_name):
    insertions = []
    sessions = create_list_of_sessions_in_file(file_name)
    for session in sessions:
        calibrations = create_list_of_calibrations_in_node(file_name, session)
        for calibration in calibrations:
            cal_insertions = create_list_of_insertions_in_calibration(file_name, calibration)
            for insertion in cal_insertions:
                insertions.append(insertion)
    return insertions
            
def get_insertion_timestamp(file, insertion_path):
    with h5py.File(file, 'r') as h5_file:
        ins_time = h5_file[insertion_path].attrs['start_time']
        ins_timestamp = pd.Timestamp(ins_time, unit='us')
        return ins_timestamp
    
def get_calibration_timestamp(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal_time = h5_file[calibration_path].attrs['calibration_start_time']
        cal_timestamp = pd.Timestamp(cal_time, unit='us')
        return cal_timestamp  
    
def find_position_in_wavelength_vector(wavelength_vector, integer):
    position = np.where(np.isclose(wavelength_vector, integer, 1e-3))[0][0]
    return position

def normalize(value, max_value, min_value):
    normalized_value = (value - min_value)/(max_value - min_value)
    return normalized_value

def get_ftir_dark_calibration_curve(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal = h5_file[calibration_path].attrs['dark_spectrum2'][:]
        return cal
    
def get_visible_dark_calibration_curve(file, calibration_path):
    with h5py.File(file, 'r') as h5_file:
        cal = h5_file[calibration_path].attrs['dark_spectrum'][:]
        return cal
        
def get_ftir_spectrum_timestamp(file, insertion_path, index):
    with h5py.File(file, 'r') as h5_file:
        time = h5_file[f'{insertion_path}/spectrometer2/timestamps'][index]
        timestamp = pd.Timestamp(time, unit='us')
        return timestamp
    
def get_visible_spectrum_timestamp(file, insertion_path, index):
    with h5py.File(file, 'r') as h5_file:
        time = h5_file[f'{insertion_path}/spectrometer1/timestamps'][index]
        timestamp = pd.Timestamp(time, unit='us')
        return timestamp

In [3]:
path_name = "/Users/linda/OneDrive/Documents/S4_mine_p/Projects/Data_collected/"
df = pd.read_csv('data/white_insertions.csv')

In [16]:
timestamps_df = pd.DataFrame(columns=['timestamp'])

In [17]:
timestamps_df

Unnamed: 0,timestamp


In [18]:
i = 0
row_file = df['file_name'][i]
file = construct_full_file_path(path_name, row_file)
calibration_path = df['session'][i] + "/" + df['calibration'][i]
calibration_insertions = create_list_of_insertions_in_calibration(file, calibration_path)    
calibration_first_timestamp = get_ftir_spectrum_timestamp(file, calibration_insertions[0], 0)
insertion_path = calibration_path + "/" + df['insertion'][i]

In [25]:
timestamps = pd.DataFrame(get_ftir_insertion_timestamps(file, insertion_path), columns=['timestamp'])

TypeError: Cannot convert input [[1618921760206730 1618921762335510 1618921764467705 1618921766584302
 1618921768703186 1618921770829741 1618921772943790 1618921775061336
 1618921777186547 1618921779302353 1618921781420535 1618921783534326
 1618921785659300 1618921787776428 1618921789907696 1618921792038503
 1618921794154220 1618921796304965 1618921798432951 1618921800598611
 1618921802718548 1618921804836555 1618921806967161 1618921809090336
 1618921811221596 1618921813342917 1618921815471829 1618921817591226
 1618921819705206 1618921821822417 1618921823941820 1618921826074290
 1618921828191982 1618921830323635 1618921832437453 1618921834557226
 1618921836684329 1618921838818380 1618921840984223 1618921843102297
 1618921845249348 1618921847368573 1618921849498588 1618921851636234
 1618921853760198 1618921855889676 1618921858017350 1618921860146460
 1618921862261267 1618921864397335 1618921866531546 1618921868648056
 1618921870775934 1618921872909472 1618921875044331 1618921877174021
 1618921879308481 1618921881437229 1618921883569475 1618921885705375]] of type <class 'numpy.ndarray'> to Timestamp

In [20]:
timestamps

Unnamed: 0,timestamp
0,1618921760206730
1,1618921762335510
2,1618921764467705
3,1618921766584302
4,1618921768703186
5,1618921770829741
6,1618921772943790
7,1618921775061336
8,1618921777186547
9,1618921779302353


In [21]:
timestamps_df = pd.concat([timestamps_df, timestamps], axis=0, ignore_index=True)

In [22]:
timestamps_df

Unnamed: 0,timestamp
0,1618921760206730
1,1618921762335510
2,1618921764467705
3,1618921766584302
4,1618921768703186
5,1618921770829741
6,1618921772943790
7,1618921775061336
8,1618921777186547
9,1618921779302353


In [23]:
calibration_first_timestamp = get_ftir_spectrum_timestamp(file, calibration_insertions[0], 0)

In [24]:
calibration_first_timestamp

Timestamp('2021-04-20 12:29:20.206730')

In [26]:
ts = get_ftir_insertion_timestamps(file, insertion_path)
ts

array([1618921760206730, 1618921762335510, 1618921764467705,
       1618921766584302, 1618921768703186, 1618921770829741,
       1618921772943790, 1618921775061336, 1618921777186547,
       1618921779302353, 1618921781420535, 1618921783534326,
       1618921785659300, 1618921787776428, 1618921789907696,
       1618921792038503, 1618921794154220, 1618921796304965,
       1618921798432951, 1618921800598611, 1618921802718548,
       1618921804836555, 1618921806967161, 1618921809090336,
       1618921811221596, 1618921813342917, 1618921815471829,
       1618921817591226, 1618921819705206, 1618921821822417,
       1618921823941820, 1618921826074290, 1618921828191982,
       1618921830323635, 1618921832437453, 1618921834557226,
       1618921836684329, 1618921838818380, 1618921840984223,
       1618921843102297, 1618921845249348, 1618921847368573,
       1618921849498588, 1618921851636234, 1618921853760198,
       1618921855889676, 1618921858017350, 1618921860146460,
       1618921862261267,

In [33]:
tspd = pd.DataFrame(pd.to_datetime(ts, unit='us'), columns=['timestamp'])

In [35]:
tspd['timestamp']

0    2021-04-20 12:29:20.206730
1    2021-04-20 12:29:22.335510
2    2021-04-20 12:29:24.467705
3    2021-04-20 12:29:26.584302
4    2021-04-20 12:29:28.703186
5    2021-04-20 12:29:30.829741
6    2021-04-20 12:29:32.943790
7    2021-04-20 12:29:35.061336
8    2021-04-20 12:29:37.186547
9    2021-04-20 12:29:39.302353
10   2021-04-20 12:29:41.420535
11   2021-04-20 12:29:43.534326
12   2021-04-20 12:29:45.659300
13   2021-04-20 12:29:47.776428
14   2021-04-20 12:29:49.907696
15   2021-04-20 12:29:52.038503
16   2021-04-20 12:29:54.154220
17   2021-04-20 12:29:56.304965
18   2021-04-20 12:29:58.432951
19   2021-04-20 12:30:00.598611
20   2021-04-20 12:30:02.718548
21   2021-04-20 12:30:04.836555
22   2021-04-20 12:30:06.967161
23   2021-04-20 12:30:09.090336
24   2021-04-20 12:30:11.221596
25   2021-04-20 12:30:13.342917
26   2021-04-20 12:30:15.471829
27   2021-04-20 12:30:17.591226
28   2021-04-20 12:30:19.705206
29   2021-04-20 12:30:21.822417
30   2021-04-20 12:30:23.941820
31   202

In [36]:
timestamps_df['timestamp']

0     1618921760206730
1     1618921762335510
2     1618921764467705
3     1618921766584302
4     1618921768703186
5     1618921770829741
6     1618921772943790
7     1618921775061336
8     1618921777186547
9     1618921779302353
10    1618921781420535
11    1618921783534326
12    1618921785659300
13    1618921787776428
14    1618921789907696
15    1618921792038503
16    1618921794154220
17    1618921796304965
18    1618921798432951
19    1618921800598611
20    1618921802718548
21    1618921804836555
22    1618921806967161
23    1618921809090336
24    1618921811221596
25    1618921813342917
26    1618921815471829
27    1618921817591226
28    1618921819705206
29    1618921821822417
30    1618921823941820
31    1618921826074290
32    1618921828191982
33    1618921830323635
34    1618921832437453
35    1618921834557226
36    1618921836684329
37    1618921838818380
38    1618921840984223
39    1618921843102297
40    1618921845249348
41    1618921847368573
42    1618921849498588
43    16189