# Extraindo batimentos cardiacos
# Normais e com crises

In [1]:
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
import mne
import neurokit2 as nk

In [2]:
load_dotenv()

path_data = os.getenv('PATH_DATASET')

PATH = os.path.abspath('../../')

## Carregamento de dados

In [3]:
df1 = pd.read_csv(PATH+'/data/Generalized_seizures_dataset.csv') # Seizure

df2 = pd.read_csv(PATH + '/data/data_train.csv') # Normal

In [4]:
fs = 256

df1 = df1[df1.freq==fs].loc[:,['path','inicio','fim']].astype({'inicio':int,'fim':int}).reset_index(drop=True)

df2 = df2[df2.freq==fs]['path'].iloc[:len(df1)]

## Funções de processamento

In [5]:
# Carregando .edf

def load_edf(file):
    file = file.replace('tse', 'edf')
    data = mne.io.read_raw_edf(path_data + '/' + file)
    info = data.info
    return {"info":info,"raw":data}

# Canal de ECG puro 
def raw_to_dataframe(signal,channels):
    df = signal.to_data_frame()
    limit_start = fs*10 # limitando 10 segundos iniciais
    limit_end = len(df)-(fs*10) # limitando 10 segundos finais
    return df[channels].iloc[limit_start:limit_end]*(-1)

# Identificar canais de ecg
def get_ecg_channels(info):
    ch_ecg = [ch for ch in info.ch_names if 'EKG' in ch]
    return ch_ecg

# Filtrando ECG
# Recuperando batimentos cardíacos
def ecg_process(signal,fs):
    ecg_clean = nk.ecg_clean(signal, sampling_rate=fs, method="hamilton2002")
    return nk.ecg_segment(ecg_clean, rpeaks=None, sampling_rate=fs, show=False)

In [6]:
def pipeline(path,start,end):
    try:
        infos = load_edf(path)
        ecg_channels = get_ecg_channels(infos['info'])
        signal = raw_to_dataframe(infos['raw'],ecg_channels)
        signal = signal.reset_index(drop=True).iloc[start*fs:end*fs]
        result = ecg_process(signal,fs)
        return {'file':path, 'data': result}
    except (ValueError, IndexError, ZeroDivisionError) as error:
        return [] 

## Executando pipeline

In [None]:
# ECG com crises

from joblib import Parallel, delayed

result1 = Parallel(n_jobs=-1)(delayed(pipeline)(row['path'],row['inicio'],row['fim']) for index,row in df1.iterrows())

result1 = [result for result in result1 if len(result)>0]

In [22]:
# Agrupando Arquivo e batimentos

def grouping_heart_beats(result):
    file_path = result['file']
    heart_beats = [result['data'][key]['Signal'].to_list() for key in result['data'].keys()]
    return {'file': file_path,'heart_beats': heart_beats} 

all_results1 = [grouping_heart_beats(result) for result in result1]

In [None]:
result2 = Parallel(n_jobs=-1)(delayed(pipeline)(path,0,10) for path in df2.T.to_list())

In [35]:
result2 = [result for result in result2 if len(result)>0]

In [36]:
all_results2 = [grouping_heart_beats(result) for result in result2]

In [39]:
# Salvando resultados

import pickle

with open('ecg_generated_seizure.pkl','wb') as handle:
    pickle.dump(all_results1, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('ecg_normal.pkl','wb') as handle:
    pickle.dump(all_results2, handle, protocol=pickle.HIGHEST_PROTOCOL)
