# Prueba de la extración de feature vectors

In [1]:
#Importación de librerías
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from mne import io
import random
import scipy.signal as signal
from datetime import datetime, timedelta
from FVfunctions import getMeData, pot4signals, BANDAS
from features_stats import stats_features
from dataset_reader import get_seizure_events, get_seizure_array

#### 1. Lectura

In [2]:
DATA_DIR = r"../eeg_dataset/"
path01 = rf"{DATA_DIR}PN05/PN05-2.edf"
raw01 = io.read_raw_edf(path01)
paciente = "PN05"
realizacion = "2"

Extracting EDF parameters from C:\Users\gonza\Documents\ITBA\2024 1C\PSIB\TP FINAL PSIB\eeg_dataset\PN05\PN05-2.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  raw01 = io.read_raw_edf(path01)
  raw01 = io.read_raw_edf(path01)
  raw01 = io.read_raw_edf(path01)


#### 2. Info del archivo

In [11]:
info = raw01.info
print(info)
print(info['ch_names'])

<Info | 8 non-empty values
 bads: []
 ch_names: EEG T3, EEG T5, EEG F7, EEG F3, EEG C3, EEG P3
 chs: 6 EEG
 custom_ref_applied: False
 highpass: 0.0 Hz
 lowpass: 256.0 Hz
 meas_date: 2016-01-01 06:46:02 UTC
 nchan: 6
 projs: []
 sfreq: 512.0 Hz
 subject_info: 1 item (dict)
>
['EEG T3', 'EEG T5', 'EEG F7', 'EEG F3', 'EEG C3', 'EEG P3']


#### 3. Obtención de las mediciones a utilizar y otros datos importantes

In [12]:
# Se obtiene el nombre de todos los canales
ch_nms = info["ch_names"]

# Se obtienen los canales seleccionados del lazo izquierdo
filt_ch_nms = ['EEG T3','EEG T5','EEG F7','EEG F3','EEG C3','EEG P3']

# Seleccionar los datos de los canales filtrados
raw01_filt = raw01.pick(filt_ch_nms)

# Obtener los datos de los canales filtrados por nombre
data_namefilt = raw01_filt.get_data()

# Se convierten las mediciones a microvoltios
data_namefilt = data_namefilt * 1e6

# Dimensiones de 'data_filt'
dim_data_filt = np.shape(data_namefilt)

# Verifición
print("Canales filtrados:", filt_ch_nms)
print(f"Cantidad de canales resultantes: {dim_data_filt[0]}")
print(f"Cantidad de datos en cada canal: {dim_data_filt[1]}")

Canales filtrados: ['EEG T3', 'EEG T5', 'EEG F7', 'EEG F3', 'EEG C3', 'EEG P3']
Cantidad de canales resultantes: 6
Cantidad de datos en cada canal: 4733440


In [13]:
# Frecuencia de muestreo y cantidad de muestras tomadas
fs = info["sfreq"]
len_data = dim_data_filt[1]

# Array de instantes
start = 0
stop = (1 / fs) * len_data
arr_t = np.arange(start=start, stop=stop, step=(1 / fs))

# Verificación de datos
print("Frecuencia de muestreo:", fs)
print("Instantes [s]:", arr_t[0], "...", arr_t[-1])

Frecuencia de muestreo: 512.0
Instantes [s]: 0.0 ... 9244.998046875


#### 4. getMeData

In [14]:
# Encontramos las seizures del archivo
path_seiz = rf"{DATA_DIR}PN05/Seizures-list-PN05.txt"
seizures = get_seizure_events(path_seiz)
seizures_in_02 = [seiz for seiz in seizures if seiz["file_name"] == "PN05-2.edf"]
arr_mtx_t_epi = get_seizure_array(seizures_in_02)
seizures_in_02

[{'seizure_number': 2,
  'file_name': 'PN05-2.edf',
  'registration_start_time': '06.46.02',
  'registration_end_time': '09.19.47',
  'seizure_start_time': '08.45.25',
  'seizure_end_time': '08.46.00'}]

In [15]:
# Matriz de inicio de registro y final de registro
seiz = seizures_in_02[0] # una sola !!
mtx_t_reg = np.array([seiz['registration_start_time'], seiz['registration_end_time']])

# Utilización de la función para una única señal
arr_seg, arr_labels = getMeData(sig=data_namefilt[0], mtx_t_reg=mtx_t_reg, arr_mtx_t_epi=arr_mtx_t_epi)
arr_seg

array([[ -6.44861197, -14.41007862, -21.80983335, ...,  -4.48831135,
         -4.21901134,  -2.8510888 ],
       [ -5.88095295,  -3.27032485,  -3.43263646, ...,  12.59162582,
          7.36264985,   1.6076962 ],
       [ -0.83301453,  -1.25101932,  -2.04141079, ..., -23.45783771,
        -24.17600637, -25.67897428],
       ...,
       [  3.58760999,   6.20900517,   8.79049448, ...,  -1.13346295,
         -0.1678562 ,   1.88573375],
       [ -2.09849208,  -4.00966745,  -7.20468465, ...,   6.28703538,
          1.87245697,  -3.05232032],
       [ 15.88263508,   5.90186138, -14.44083264, ...,  64.91341421,
         43.74291694,  13.72439515]])

In [16]:
# Potencia
pot_seg = pot4signals(arr_seg, fs, divisor=1)
pot_names = [f"potAbs{band.capitalize()}" for band in BANDAS.keys()]

# Estadistica
stats_data = stats_features(arr_seg)
stats_names = list(stats_data.values())[:-1]
stats_seg = stats_data["matriz de features stat"]

# Feature fector
arr_fv = np.hstack((pot_seg, stats_seg))
columnas = pot_names + stats_names
df_fv = pd.DataFrame(data=arr_fv, columns=columnas)
df_fv

Unnamed: 0,potAbsDelta,potAbsTheta,potAbsAlpha,potAbsBeta,potAbsGamma,kurtosis,RMS,skewness,media,desvio estandar
0,217.644236,49.269005,89.263621,95.580341,55.646721,0.585607,21.551527,0.285625,6.390122,20.582387
1,178.283112,68.107614,68.265979,55.249263,21.184491,0.05875,20.155965,-0.071286,-1.154695,20.122863
2,148.265309,81.879942,29.160342,32.561074,17.223644,0.168532,20.059845,-0.216411,-4.901466,19.451813
3,131.956727,91.34394,18.468434,41.782452,16.610201,-0.005117,18.680859,0.482669,-0.225125,18.679502
4,171.847087,80.592865,21.834394,24.324345,9.639249,-0.66914,17.785635,-0.05615,1.242607,17.742174
5,49.95144,266.194512,118.48891,33.96866,10.059972,-0.137855,22.604525,-0.39736,2.536449,22.461767
6,60.522752,694.580842,171.390084,42.324166,8.917114,-0.446337,30.976213,-0.094013,-0.918058,30.962606
7,46.869211,908.317318,155.500322,71.472523,11.021553,-0.840184,33.378961,-0.038565,0.574652,33.374014
8,76.57021,566.605445,24.871353,73.751879,16.436683,-0.485127,26.430783,0.213435,2.710957,26.291386
9,80.895259,92.963097,126.074663,172.348406,22.912454,0.375545,24.919111,0.079699,-3.971633,24.600574


In [21]:
# Agregamos datos de otro archivo
path03 = rf"{DATA_DIR}PN05/PN05-3.edf"
raw03 = io.read_raw_edf(path03)
raw03_filt = raw03.pick(filt_ch_nms)
data_namefilt03 = raw03_filt.get_data()
data_namefilt03 = data_namefilt03 * 1e6

# Buscamos la seizures
seizures_in_03 = [seiz for seiz in seizures if seiz["file_name"] == "PN05-3.edf"]
arr_mtx_t_epi = get_seizure_array(seizures_in_03)
seiz03 = seizures_in_03[0] # agarro una en particular para ver sus registration
mtx_t_reg = np.array([seiz03['registration_start_time'], seiz03['registration_end_time']])

# Mas segmentos!!
more_seg, more_labels = getMeData(sig=data_namefilt[0], mtx_t_reg=mtx_t_reg, arr_mtx_t_epi=arr_mtx_t_epi)

Extracting EDF parameters from C:\Users\gonza\Documents\ITBA\2024 1C\PSIB\TP FINAL PSIB\eeg_dataset\PN05\PN05-3.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  raw03 = io.read_raw_edf(path03)
  raw03 = io.read_raw_edf(path03)
  raw03 = io.read_raw_edf(path03)


array([[ 11.36461276,   9.73249322,   8.90681429, ...,   7.57488971,
          5.15430558,   3.30573716],
       [  2.28886905,   2.58590525,   2.58870395, ...,  93.06489976,
         80.14400456,  35.45502607],
       [ -4.82471127, 112.54538089, 199.1416388 , ..., 274.12982677,
        246.91960145, 220.26158401],
       ...,
       [  9.44553849,   0.7100948 ,  -3.86634223, ...,  11.10267467,
         10.07497096,   6.89727064],
       [ -9.76609101, -22.85620336, -30.24289177, ...,   4.16171784,
         -3.85329999, -18.98555966],
       [ -1.14742916, -17.02214374, -26.70167237, ..., -18.45459858,
        -16.15936454,  -5.19403582]])

In [33]:
# Potencia
more_pot = pot4signals(more_seg, fs, divisor=1)

# Estadistica
more_stat_data = stats_features(more_seg)
more_stat = more_stat_data["matriz de features stat"]

# Feature fector
more_fv = np.hstack((more_pot, more_stat))
print('hola ivo')
new_fv = np.concatenate((arr_fv, more_fv))
new_labels = np.concatenate((arr_labels, more_labels))
new_df_fv = pd.DataFrame(new_fv, columns=columnas)
new_df_fv

hola ivo


Unnamed: 0,potAbsDelta,potAbsTheta,potAbsAlpha,potAbsBeta,potAbsGamma,kurtosis,RMS,skewness,media,desvio estandar
0,217.644236,49.269005,89.263621,95.580341,55.646721,0.585607,21.551527,0.285625,6.390122,20.582387
1,178.283112,68.107614,68.265979,55.249263,21.184491,0.058750,20.155965,-0.071286,-1.154695,20.122863
2,148.265309,81.879942,29.160342,32.561074,17.223644,0.168532,20.059845,-0.216411,-4.901466,19.451813
3,131.956727,91.343940,18.468434,41.782452,16.610201,-0.005117,18.680859,0.482669,-0.225125,18.679502
4,171.847087,80.592865,21.834394,24.324345,9.639249,-0.669140,17.785635,-0.056150,1.242607,17.742174
...,...,...,...,...,...,...,...,...,...,...
76,122.300133,56.377653,80.224473,67.992885,34.666015,1.104256,24.447418,0.099206,0.159798,24.446895
77,47.972869,71.167041,65.417250,218.081616,150.463341,0.180981,29.972813,-0.018467,-1.760497,29.921065
78,10.683147,14.711758,5.697948,13.439185,3.419257,0.243305,7.876359,0.185646,0.476258,7.861947
79,82.284653,37.969012,51.101159,62.933390,2.754750,-0.014141,16.279322,0.044644,-1.191299,16.235675


In [37]:
new_df_fv['label'] = new_labels
new_df_fv.to_csv('first_features.csv', sep=',', encoding='utf-8')
new_df_fv

Unnamed: 0,potAbsDelta,potAbsTheta,potAbsAlpha,potAbsBeta,potAbsGamma,kurtosis,RMS,skewness,media,desvio estandar,label
0,217.644236,49.269005,89.263621,95.580341,55.646721,0.585607,21.551527,0.285625,6.390122,20.582387,0.0
1,178.283112,68.107614,68.265979,55.249263,21.184491,0.058750,20.155965,-0.071286,-1.154695,20.122863,0.0
2,148.265309,81.879942,29.160342,32.561074,17.223644,0.168532,20.059845,-0.216411,-4.901466,19.451813,0.0
3,131.956727,91.343940,18.468434,41.782452,16.610201,-0.005117,18.680859,0.482669,-0.225125,18.679502,0.0
4,171.847087,80.592865,21.834394,24.324345,9.639249,-0.669140,17.785635,-0.056150,1.242607,17.742174,0.0
...,...,...,...,...,...,...,...,...,...,...,...
76,122.300133,56.377653,80.224473,67.992885,34.666015,1.104256,24.447418,0.099206,0.159798,24.446895,1.0
77,47.972869,71.167041,65.417250,218.081616,150.463341,0.180981,29.972813,-0.018467,-1.760497,29.921065,1.0
78,10.683147,14.711758,5.697948,13.439185,3.419257,0.243305,7.876359,0.185646,0.476258,7.861947,1.0
79,82.284653,37.969012,51.101159,62.933390,2.754750,-0.014141,16.279322,0.044644,-1.191299,16.235675,1.0


In [40]:
df_from_csv = pd.read_csv("first_features.csv", index_col=0)
df_from_csv

Unnamed: 0,potAbsDelta,potAbsTheta,potAbsAlpha,potAbsBeta,potAbsGamma,kurtosis,RMS,skewness,media,desvio estandar,label
0,217.644236,49.269005,89.263621,95.580341,55.646721,0.585607,21.551527,0.285625,6.390122,20.582387,0.0
1,178.283112,68.107614,68.265979,55.249263,21.184491,0.058750,20.155965,-0.071286,-1.154695,20.122863,0.0
2,148.265309,81.879942,29.160342,32.561074,17.223644,0.168532,20.059845,-0.216411,-4.901466,19.451813,0.0
3,131.956727,91.343940,18.468434,41.782452,16.610201,-0.005117,18.680859,0.482669,-0.225125,18.679502,0.0
4,171.847087,80.592865,21.834394,24.324345,9.639249,-0.669140,17.785635,-0.056150,1.242607,17.742174,0.0
...,...,...,...,...,...,...,...,...,...,...,...
76,122.300133,56.377653,80.224473,67.992885,34.666015,1.104256,24.447418,0.099206,0.159798,24.446895,1.0
77,47.972869,71.167041,65.417250,218.081616,150.463341,0.180981,29.972813,-0.018467,-1.760497,29.921065,1.0
78,10.683147,14.711758,5.697948,13.439185,3.419257,0.243305,7.876359,0.185646,0.476258,7.861947,1.0
79,82.284653,37.969012,51.101159,62.933390,2.754750,-0.014141,16.279322,0.044644,-1.191299,16.235675,1.0
