In [None]:
print("What if you can achieve a better result!")

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import os 

from scipy.io import loadmat
import numpy as np
# Packages 



def read_hb_from_file(example_path): # Open the file and read through the first few lines to find where the data starts
    with open(example_path, 'r') as file:
        lines = file.readlines()
        data_start_line = None
        for i, line in enumerate(lines):
            if 'Data' in line:  # This should match the unique identifier of the data section
                data_start_line = i + 1
                # print(lines[data_start_line])
                break

    if data_start_line is not None:

        # Read the data section, skipping the lines of the metadata
        data = pd.read_csv(example_path, skiprows=data_start_line)

        # Now you have metadata and data as separate DataFrames
        # print(data)
    else:
        print("Data section not found.")
        
    np_data = data.to_numpy()
    ch_data = np_data[:, 1:1+52*2]
    
    return ch_data



def compute_delta(data):
    data = np.nan_to_num(data, 0)
    data += np.abs(np.min(data))
    
    light_700 = data[:,0::2].tolist()
    # light_700_baseline = np.mean(light_700, (0,1)) + 1e-15
    # delta_700 = -np.log10(light_700/light_700_baseline)

    light_830 = data[:,1::2].tolist()
    # light_830_baseline = np.mean(light_830, (0,1)) + 1e-15
    # delta_830 = -np.log10(light_830/light_830_baseline)
    # return delta_700, delta_830
    return np.array(light_700), np.array(light_830)


def get_closest_index_of_wavelength(wavelength, ref_ec):
    index_closest = np.argmin(np.abs(ref_ec[:,0] - wavelength))
    return index_closest
    



def extinction_coefficient_of_Hb(ref_file='extinction_coefficient.mat', low_wavelength=695, high_wavelength=830):
    ref_ec = loadmat(ref_file)['data']
    low_index = get_closest_index_of_wavelength(low_wavelength, ref_ec)
    high_index = get_closest_index_of_wavelength(high_wavelength, ref_ec)
    
    low_e_hbo, low_e_hbr = ref_ec[low_index, 1], ref_ec[low_index, 2]
    high_e_hbo, high_e_hbr = ref_ec[high_index, 1], ref_ec[high_index, 2]
     
    print(f'low_e_hbo: {low_e_hbo}, low_e_hbr: {low_e_hbr}')
    print(f'high_e_hbo: {high_e_hbo}, high_e_hbr: {high_e_hbr}')
    # e_HbO_704 = 298
    # e_HbR_704 = 1687.76
    # e_HbO_8279 = 965.2
    # e_HbR_8279 = 693.2
    E = np.array([[low_e_hbo, low_e_hbr],
                [high_e_hbo, high_e_hbr]])
    E_inv = np.linalg.inv(E)
    
    return E_inv 

def calculate_hb(delta_700, delta_830, E_inv, TOTAL_CHANNEL):
    
    all_channel_hb = np.empty((TOTAL_CHANNEL, delta_700.shape[0], 3))
    
    for ch in range(TOTAL_CHANNEL):
        
        ch_low_intensity = delta_700[:, ch]
        ch_high_intensity = delta_830[:, ch]

        ch_conc = np.array([ch_low_intensity, ch_high_intensity])
        delta_C = np.dot(E_inv, ch_conc)

        HbO, HbR = delta_C[0,:], delta_C[1,:]
        
        Hb = np.empty((len(HbO),3))
        Hb[:,0] = HbO
        Hb[:,1] = HbR
        Hb[:,2] = HbO + HbR
        
        all_channel_hb[ch] = Hb
    
    return all_channel_hb




def check_if_there_is_replicated_file(files_HCS, files_MDD):
    
    arr = []
    for f in files_HCS:
        name = f[2:5]
        if name in arr:
            print(name)
        else:
            arr.append(name)
    arr = []
    for f in files_MDD:
        name = f[2:5]
        if name in arr:
            print(name)
        else:
            arr.append(name)
    
    files_HCS = [f for f in files_HCS if f[-4:] == '.csv']
    files_MDD = [f for f in files_MDD if f[-4:] == '.csv']
    
    return files_HCS, files_MDD
    # arr_files_HCS = []


            
            
def convert_to_hb_using_MBLL(input_fold, output_fold, flag='cyrus'):
    if flag == 'cyrus':
        HCS_fold = input_fold + '/Controls/MES'
        MDD_fold = input_fold + '/Patients/MES'
        files_HCS = os.listdir(HCS_fold)
        files_MDD = os.listdir(MDD_fold)
        files_HCS, files_MDD = check_if_there_is_replicated_file(files_HCS, files_MDD)
    else:
        HCS_fold = MDD_fold = input_fold
        files = os.listdir(input_fold)
        files_HCS = [file for file in files if file.startswith('HC')]
        files_MDD = [file for file in files if file.startswith('DEP')]

    print(f"Total number of HCs: {len(files_HCS)}")
    print(f"Total number of MDDs: {len(files_MDD)}")

    HCS_LABEL = np.zeros(len(files_HCS))
    MDD_LABEL = np.ones(len(files_MDD))
    
    HCS_DATA = np.empty((len(files_HCS), 52, 1701, 2))
    MDD_DATA = np.empty((len(files_MDD), 52, 1701, 2))
    
    
    for sub, file in enumerate(files_HCS):
        
        TOTAL_CHANNEL = 52
        data = read_hb_from_file(HCS_fold + '/' + file)
        E_inv = extinction_coefficient_of_Hb()
        delta_700, delta_830 = compute_delta(data)
        two_lights = np.concatenate((delta_700[..., np.newaxis], delta_830[..., np.newaxis]), axis = -1)
        two_lights = np.transpose(two_lights, (1,0,2))
        
        HCS_DATA[sub] = two_lights

    for sub, file in enumerate(files_MDD):
        
        TOTAL_CHANNEL = 52
        data = read_hb_from_file(MDD_fold + '/' + file)
        E_inv = extinction_coefficient_of_Hb()
        delta_700, delta_830 = compute_delta(data)
        two_lights = np.concatenate((delta_700[..., np.newaxis], delta_830[..., np.newaxis]), axis = -1)
        two_lights = np.transpose(two_lights, (1,0,2))
        
        MDD_DATA[sub] = two_lights
                        
        
    LABEL = np.concatenate((HCS_LABEL, MDD_LABEL), axis = 0)
    DATA = np.concatenate((HCS_DATA, MDD_DATA), axis = 0)
    print('Final - Data - shape', DATA.shape)
    return LABEL, DATA

input_fold_cyrus = '/home/jy/Documents/fnirs/treatment_response/fnirs-depression-deeplearning/Prerequisite/RawData/Baseline_fnirs'
input_fold_fabeha = '/home/jy/Documents/fnirs/treatment_response/fnirs-depression-deeplearning/Prerequisite/RawData/Febeha_374_data'

input_dict_name_data = {
    'cyrus': input_fold_cyrus,
    'fabeha': input_fold_fabeha
}

ALL_DATA, ALL_LABEL = [], []
for type_name, input_fold in input_dict_name_data.items():
    print(f"Processing {type_name}")

    LABEL, DATA = convert_to_hb_using_MBLL(input_fold, output_fold, type_name)
    ALL_DATA.append(DATA)
    ALL_LABEL.append(LABEL)

ALL_DATA, ALL_LABEL = np.concatenate(ALL_DATA, axis = 0), np.concatenate(ALL_LABEL, axis = 0)
ALL_DATA = np.concatenate((ALL_DATA[...,0], ALL_DATA[...,1]), axis = 2)


In [43]:
ALL_DATA.shape, ALL_LABEL.shape


((514, 52, 3402), (514,))

In [None]:



# DATA_0 = plt.plot(DATA[...,0].mean(axis = (0,1)))

HC = ALL_DATA[ALL_LABEL == 0]
MDD = ALL_DATA[ALL_LABEL == 1]


plt.plot(HC.mean(axis = (0,1)), label='HC')
plt.plot(MDD.mean(axis = (0,1)), label='MDD')
plt.legend()

In [45]:
output_fold = '/home/jy/Documents/fnirs/treatment_response/fnirs-depression-deeplearning/allData/diagnosis514_light'


In [46]:
np.save(output_fold + '/light_data.npy', ALL_DATA)
np.save(output_fold + '/label.npy', ALL_LABEL)

