# Post-Ictal Heart Rate Oscillations in Partial Epilepsy

In [1]:
import os
import wfdb
import numpy as np
import pandas as pd
from collections import Counter
from matplotlib import pyplot as plt
from hrvanalysis import get_time_domain_features,get_frequency_domain_features, plot_psd

import warnings
warnings.filterwarnings("ignore")

In [2]:
if True:
    # baixando os arquivos de HRO (heart rate oscillations).
    wfdb.dl_database("szdb", os.path.join(os.getcwd(), "szdb"))

Generating record list for: sz01
Generating record list for: sz02
Generating record list for: sz03
Generating record list for: sz04
Generating record list for: sz05
Generating record list for: sz06
Generating record list for: sz07
Generating list of all files for: sz01
Generating list of all files for: sz02
Generating list of all files for: sz03
Generating list of all files for: sz04
Generating list of all files for: sz05
Generating list of all files for: sz06
Generating list of all files for: sz07
Created local base download directory: c:\Users\Vinc\Music\2022.2\EEL7307-08235 (20222) - Introdução a Informática Médica\git\informatica_medica\szdb
Downloading files...
Finished downloading files


# Calculo dos tempos onde há convulsão

In [3]:
a = ['sz01 00:14:36 00:16:12', # 1
    'sz02 01:02:43 01:03:43', # 2
    'sz02 02:55:51 02:56:16', # 2
    'sz03 01:24:34 01:26:22', # 3
    'sz03 02:34:27 02:36:17', # 3
    'sz04 00:20:10 00:21:55', # 4
    'sz05 00:24:07 00:25:30', # 5
    'sz06 00:51:25 00:52:19', # 6
    'sz06 02:04:45 02:06:10', # 6
    'sz07 01:08:02 01:09:31'] # 7

def _(string):

    start = string.split()[1].split(':')
    end = string.split()[2].split(':')
    
    start_s = int(start[0])*60*60 + int(start[1])*60 + int(start[2])
    end_s = int(end[0])*60*60 + int(end[1])*60 + int(end[2])
    
    #print(start_s, end_s)
    return [start_s, end_s]
    
tempos = []
for string in a:
    tempos.append(_(string))
                  
tempos = [[[876, 972]],  #1
 [[3763, 3823],[10551, 10576]], # 2
 [[5074, 5182],[9267, 9377]], # 3
 [[1210, 1315]], # 4
 [[1447, 1530]], # 5
 [[3085, 3139],[7485, 7570]], # 6
 [[4082, 4171]]] # 7

tempos

[[[876, 972]],
 [[3763, 3823], [10551, 10576]],
 [[5074, 5182], [9267, 9377]],
 [[1210, 1315]],
 [[1447, 1530]],
 [[3085, 3139], [7485, 7570]],
 [[4082, 4171]]]

# Obtenção da VFC (variabilidade da frequência cardíaca) a partir do arquivo de anotações

In [4]:
# 140 media de amostras 'C' (1).
def _process_vfc(pandas, tempos):
    
    """
    faz as anotações em cada pandas dataset em função dos tempos em segundos
    onde há convulsão.
    """
    
    indexes_list = []
    #vfc[(vfc.tempo >= tempo[0]) & (vfc.tempo <= tempo[1])]
    # pegando indexes onde há convulsão.
    for tempo in tempos:
        indexes = pandas.index[(pandas['tempo'] >= tempo[0]) & (pandas['tempo'] <= tempo[1])].tolist()
        print('indexes "C" (1): ',np.min(indexes), np.max(indexes))
        print('dif:',np.max(indexes) - np.min(indexes))
        indexes_list.extend(indexes)
        
    #print(indexes_list)
   
    print('tamanho amostras',len(pandas))
    print('--------------------------')
    # criando uma coluna auxiliar para usar apply.
    pandas['index1'] = pandas.index
    # criando a coluna labels, recebe 'C' se o index da linha esta na lista, se não 'N'.
    #pandas['labels'] = pandas.apply(lambda row: 'C' if pandas.index(row) in indexes else 'N', axis=1)
    pandas['labels'] = pandas.apply(lambda row: 1 if row.index1 in indexes_list else 0, axis=1)
    pandas = pandas.drop(columns = ['index1'])
    # retorna o dataset anotado.
    return pandas


In [5]:
def most_frequent(List):
    """
    obtem a label com maior numero de ocorrencias em uma lista ('N' (0), 'C' (1)).
    """
    occurence_count = Counter(List)
    return occurence_count.most_common(1)[0][0]

In [6]:
df_vfc = pd.DataFrame()
for i in range(1,8):
    
    """
    para cada dataset, carrega as anotações, converte para pandas dataframe e cria a coluna das labels ('N' (0): normal, 'C' (1): convulsão).
    
    retorna um dataframe com todos os datasets juntos anotados (df_vfc).
    """
    
    path = "szdb/sz0"+str(i)
    
    record = wfdb.rdrecord(path) # Efetuamos a leitura do arquivo apenas para obter o número total de pontos e fs.
    fs=record.fs
    nr_sampl=record.sig_len
    del record # Liberamos memória
    print('Número de amostras=',nr_sampl,', taxa de amostragem=',fs)
    
    # Obtem as anotações do arquivo sz01.ari
    sampfrom=0
    sampto=nr_sampl  # pega todo o sinal.
    annotation = wfdb.rdann(path, 'ari', sampfrom=sampfrom, sampto=sampto) 
    annotation.fs = fs # Ajustamos o tempo das anotações para a mesma taxa de amostragem do sinal
    
    # Efetuamos novamente a leitura de todo o sinal
    annotation = wfdb.rdann(path, 'ari')
    r_times=annotation.sample #criamos um array com os tempos (em número de amostras)
    vfc = pd.DataFrame(r_times,columns = ['sample'])
    vfc['tempo']= vfc['sample']/fs # convertemos o número da amostra em segundos
    vfc['vfc'] =  vfc['tempo'].shift(-1)-vfc['tempo']
    vfc = vfc.fillna(0) # remove NaN substituindo por zeros.
                            
    tempo_t = tempos[i-1]                       
    vfc = _process_vfc(pandas = vfc, tempos = tempo_t)
    df_vfc = pd.concat([df_vfc, vfc])

Número de amostras= 1079998 , taxa de amostragem= 200
indexes "C" (1):  1088 1273
dif: 185
tamanho amostras 8385
--------------------------
Número de amostras= 2519998 , taxa de amostragem= 200
indexes "C" (1):  3875 3959
dif: 84
indexes "C" (1):  10842 10877
dif: 35
tamanho amostras 13195
--------------------------
Número de amostras= 2711998 , taxa de amostragem= 200
indexes "C" (1):  5790 5957
dif: 167
indexes "C" (1):  10954 11172
dif: 218
tamanho amostras 16384
--------------------------
Número de amostras= 1079998 , taxa de amostragem= 200
indexes "C" (1):  1226 1408
dif: 182
tamanho amostras 6229
--------------------------
Número de amostras= 1080006 , taxa de amostragem= 200
indexes "C" (1):  2164 2324
dif: 160
tamanho amostras 8076
--------------------------
Número de amostras= 2159998 , taxa de amostragem= 200
indexes "C" (1):  3729 3796
dif: 67
indexes "C" (1):  8894 8991
dif: 97
tamanho amostras 12758
--------------------------
Número de amostras= 1439998 , taxa de amostrag

In [7]:
np.unique(df_vfc.labels.to_list(), return_counts = True)

(array([0, 1]), array([72507,  1412], dtype=int64))

In [63]:
# Extrai atributos no domínio tempo

"""
Em uma janela de 140 amostras (media de amostras onde há convulsão),
obtem as features no tempo e na frequencia e cria um pandas dataframe.
"""

amostragem = 17

df_final = pd.DataFrame()
#for i in range(0:len(df_vfc):140):
for index ,i in enumerate(range(0,len(df_vfc),amostragem)):    
    
    #df_aux = pd.DataFrame()
    label = most_frequent(List = df_vfc['labels'][i:i+amostragem].to_list())

    RR_interval = df_vfc['vfc'][i:i+amostragem] # pegando 140 em 140 amostras.
    td_features = get_time_domain_features(RR_interval);
    #display(td_features)
    # Extrai atributos no domínio da frequência
    fd_features = get_frequency_domain_features(RR_interval, sampling_frequency = 80);
    
    td_features.update(fd_features) 
        
    df_aux = pd.DataFrame(td_features,  index=[index])
    #df_aux['labels'].loc[index] = label
    df_aux['labels'] = label
    
    df_final = pd.concat([df_final,df_aux])
    
df_final.head()

Unnamed: 0,mean_nni,sdnn,sdsd,nni_50,pnni_50,nni_20,pnni_20,rmssd,median_nni,range_nni,...,min_hr,std_hr,lf,hf,lf_hf_ratio,lfnu,hfnu,total_power,vlf,labels
0,0.815588,0.050587,0.043192,0,0.0,0,0.0,0.043283,0.82,0.18,...,65934.065934,4413.90217,4.714183e-06,1.414255e-05,0.333333,25.0,75.0,2.003528e-05,1.178546e-06,0
1,0.844412,0.053265,0.049745,0,0.0,0,0.0,0.050621,0.86,0.165,...,65934.065934,4523.832246,6.025408e-06,1.807622e-05,0.333333,25.0,75.0,2.560798e-05,1.506352e-06,0
2,0.853824,0.036251,0.037118,0,0.0,0,0.0,0.037375,0.85,0.125,...,65217.391304,2880.969284,4.296333e-06,1.2889e-05,0.333333,25.0,75.0,1.825941e-05,1.074083e-06,0
3,0.841471,0.026325,0.034673,0,0.0,0,0.0,0.034709,0.835,0.095,...,67039.106145,2148.57031,3.997984e-07,1.199395e-06,0.333333,25.0,75.0,1.699143e-06,9.994961e-08,0
4,0.860882,0.03406,0.037779,0,0.0,0,0.0,0.037811,0.86,0.115,...,64864.864865,2655.071218,1.004159e-08,3.012476e-08,0.333333,25.0,75.0,4.267675e-08,2.510397e-09,0


np.unique(df_final.labels.to_list(), return_counts = True)

In [64]:
df_final['labels'].value_counts()


0    4266
1      83
Name: labels, dtype: int64

In [65]:
print(len(df_final))
df_final = df_final.dropna()
print(len(df_final))

4349
3040


df_final.fillna(0, inplace=True)
df_final.isnull().values.any()

In [66]:
df_final.to_csv('vfc_dataset_janela-17_fs-80.csv', index=False); 

Atributos:

https://aura-healthcare.github.io/hrv-analysis/hrvanalysis.html#module-hrvanalysis.extract_features

Notebook base:
https://anaconda.org/cesar_ufsc/vfc_do_ecg_c_wfdb/notebook