In [None]:
import pandas as pd
import numpy as np
import psutil
import ray

In [None]:
import neurokit2 as nk
import matplotlib.pyplot as plt
import pywt
import scipy
from tqdm import tqdm
from scipy import signal
import scipy.signal
from scipy.signal import butter, iirnotch, lfilter

In [None]:
df_test = pd.read_csv('/smc_work/code/SMC_shea/csv_files/df_test.csv',encoding='utf-8-sig')

In [None]:
# QT 버전
columns = [
           'Q_amplitude_mean','Q_amplitude_median','Q_amplitude_std','Q_amplitude_min','Q_amplitude_max',\
           'T_amplitude_mean','T_amplitude_median','T_amplitude_std','T_amplitude_min','T_amplitude_max',\
           'QQ_interval_mean','QQ_interval_median','QQ_interval_std','QQ_interval_min','QQ_interval_max',\
           'TT_interval_mean','TT_interval_median','TT_interval_std','TT_interval_min','TT_interval_max',\
           'QT_interval_mean','QT_interval_median','QT_interval_std','QT_interval_min','QT_interval_max',\
          ]

df_test_wave = pd.DataFrame(columns = columns)

In [None]:
ecg_array = {}

In [None]:
def bandpass(lowcut, highcut, order=5):
    nyq = 0.5 * 500
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

In [None]:
num_cpus = psutil.cpu_count(logical=False)

In [None]:
ray.init(num_cpus=num_cpus)

In [None]:
batch = 500
unique_id_dict = {}
start = 0
for i in range(num_cpus):
    unique_id_dict[i] = df_test['unique_id'][start:start+batch].values.tolist()
    start += batch

In [None]:
@ray.remote
def find_peak(df_unique_id):
    i = 0
    for unique_id in tqdm(df_unique_id):

        ecg_array[unique_id] = {}

        ecg_wave = np.load('/smc_work/data/smc_numpy_data/II/'+unique_id+'.npy')

        b , a = bandpass(0.5, 45)
        filtered_ecg = signal.filtfilt(b,a,ecg_wave)

        _, rpeaks = nk.ecg_peaks(filtered_ecg, sampling_rate=500)
        _, waves_peak = nk.ecg_delineate(filtered_ecg, rpeaks, sampling_rate=500, method="peak")
        try:
            _, waves_cwt = nk.ecg_delineate(filtered_ecg, rpeaks, sampling_rate=500, method="cwt", show=False, show_type='peaks')
        except :
            _, waves_cwt = nk.ecg_delineate(filtered_ecg, rpeaks, sampling_rate=500, method="dwt", show=False, show_type='peaks')

        waves_cwt['ECG_QQ_interval'] =  np.diff(waves_cwt['ECG_Q_Peaks']).tolist() ## Q-Q interval
        waves_cwt['ECG_TT_interval'] =  np.diff(waves_cwt['ECG_T_Peaks']).tolist() ## T-T interval
        T_len = len(waves_cwt['ECG_T_Peaks'])
        Q_len = len(waves_cwt['ECG_Q_Peaks'])
        if(T_len == Q_len):
            waves_cwt['ECG_QT_interval'] = (rpeaks['ECG_T_Peaks'] - waves_cwt['ECG_Q_Peaks']).tolist() ## Q-T interval
        elif(T_len == Q_len + 1):
            waves_cwt['ECG_QT_interval'] = (rpeaks['ECG_T_Peaks'][1:] - waves_cwt['ECG_Q_Peaks']).tolist() ## Q-T interval


        q_amplitude_array = np.array(waves_cwt['ECG_Q_Peaks'])
        q_amplitude_list = q_amplitude_array[~np.isnan(q_amplitude_array)].astype(int).tolist()

        if(len(q_amplitude_list)>0):
            ecg_array[unique_id]['Q_amplitude_mean'] = np.nanmean(ecg_wave[q_amplitude_list])
            ecg_array[unique_id]['Q_amplitude_median']  = np.nanmedian(ecg_wave[q_amplitude_list])
            ecg_array[unique_id]['Q_amplitude_std'] = np.nanstd(ecg_wave[q_amplitude_list])
            ecg_array[unique_id]['Q_amplitude_min'] = np.nanmin(ecg_wave[q_amplitude_list])
            ecg_array[unique_id]['Q_amplitude_max'] = np.nanmax(ecg_wave[q_amplitude_list])

        t_amplitude_array = np.array(waves_cwt['ECG_T_Peaks'])
        t_amplitude_list = t_amplitude_array[~np.isnan(t_amplitude_array)].astype(int).tolist()

        if(len(t_amplitude_list)>0):

            ecg_array[unique_id]['T_amplitude_mean'] = np.nanmean(ecg_wave[t_amplitude_list])
            ecg_array[unique_id]['T_amplitude_median']  = np.nanmedian(ecg_wave[t_amplitude_list])
            ecg_array[unique_id]['T_amplitude_std'] = np.nanstd(ecg_wave[t_amplitude_list])
            ecg_array[unique_id]['T_amplitude_min'] = np.nanmin(ecg_wave[t_amplitude_list])
            ecg_array[unique_id]['T_amplitude_max'] = np.nanmax(ecg_wave[t_amplitude_list])


        ecg_array[unique_id]['QQ_interval_mean'] = np.nanmean(waves_cwt['ECG_QQ_interval'])
        ecg_array[unique_id]['QQ_interval_median'] = np.nanmedian(waves_cwt['ECG_QQ_interval'])
        ecg_array[unique_id]['QQ_interval_std'] = np.nanstd(waves_cwt['ECG_QQ_interval'])
        ecg_array[unique_id]['QQ_interval_min'] = np.nanmin(waves_cwt['ECG_QQ_interval'])
        ecg_array[unique_id]['QQ_interval_max'] = np.nanmax(waves_cwt['ECG_QQ_interval'])

        ecg_array[unique_id]['TT_interval_mean'] = np.nanmean(waves_cwt['ECG_TT_interval'])
        ecg_array[unique_id]['TT_interval_median'] = np.nanmedian(waves_cwt['ECG_TT_interval'])
        ecg_array[unique_id]['TT_interval_std'] = np.nanstd(waves_cwt['ECG_TT_interval'])
        ecg_array[unique_id]['TT_interval_min'] = np.nanmin(waves_cwt['ECG_TT_interval'])
        ecg_array[unique_id]['TT_interval_max'] = np.nanmax(waves_cwt['ECG_TT_interval'])

        #ecg_array[unique_id]['label'] = df_train[df_train['unique_id']==unique_id]['label'][i]
        
        if( i % 19 == 0):
            plt.cla() # Clear the current axes
            plt.clf() # Clear the current figure
            plt.close()
        i += 1
    return ecg_array

futures = [find_peak.remote(unique_id_dict[i]) for i in range(num_cpus)]

results = ray.get(futures)

In [None]:
results

In [None]:
res = {}
for i in range(len(results)):
    res.update(results[i])

In [None]:
df_test_wave = pd.DataFrame.from_dict(res)
df_test_wave.T

In [None]:
df_test_wave.T.to_csv('./csv_files/test_qt_interval.csv',encoding='utf-8-sig')