# Prueba de la extración de feature vectors

In [1]:
#Importación de librerías
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from mne import io
import random
import scipy.signal as signal
from datetime import datetime, timedelta
from FVfunctions import getMeData, pot4signals, BANDAS
from features_stats import stats_features
from dataset_reader import get_seizure_events, get_seizure_array

#### 1. Lectura

In [2]:
DATA_DIR = r"../eeg_dataset/"
path01 = rf"{DATA_DIR}PN05/PN05-2.edf"
raw01 = io.read_raw_edf(path01)
paciente = "PN05"
realizacion = "2"

Extracting EDF parameters from C:\Users\gonza\Documents\ITBA\2024 1C\PSIB\TP FINAL PSIB\eeg_dataset\PN05\PN05-2.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  raw01 = io.read_raw_edf(path01)
  raw01 = io.read_raw_edf(path01)
  raw01 = io.read_raw_edf(path01)


#### 2. Info del archivo

In [3]:
info = raw01.info
print(info)
print(info['ch_names'])

<Info | 8 non-empty values
 bads: []
 ch_names: EEG Fp1, EEG F3, EEG C3, EEG P3, EEG O1, EEG F7, EEG T3, EEG T5, ...
 chs: 37 EEG
 custom_ref_applied: False
 highpass: 0.0 Hz
 lowpass: 256.0 Hz
 meas_date: 2016-01-01 06:46:02 UTC
 nchan: 37
 projs: []
 sfreq: 512.0 Hz
 subject_info: 1 item (dict)
>
['EEG Fp1', 'EEG F3', 'EEG C3', 'EEG P3', 'EEG O1', 'EEG F7', 'EEG T3', 'EEG T5', 'EEG Fc1', 'EEG Fc5', 'EEG Cp1', 'EEG Cp5', 'EEG F9', 'EEG Fz', 'EEG Cz', 'EEG Pz', 'EEG F4', 'EEG C4', 'EEG P4', 'EEG O2', 'EEG F8', 'EEG T4', 'EEG T6', 'EEG Fc2', 'EEG Fc6', 'EEG Cp2', 'EEG Cp6', 'EEG F10', 'EKG EKG', 'SPO2', 'HR', '1', '2', 'EEG FP2', 'EEG P9', 'EEG P10', 'MK']


#### 3. Obtención de las mediciones a utilizar y otros datos importantes

In [4]:
# Se obtiene el nombre de todos los canales
ch_nms = info["ch_names"]

# Se obtienen los canales seleccionados del lazo izquierdo
filt_ch_nms = ['EEG T3','EEG T5','EEG F7','EEG F3','EEG C3','EEG P3']

# Seleccionar los datos de los canales filtrados
raw01_filt = raw01.pick(filt_ch_nms)

# Obtener los datos de los canales filtrados por nombre
data_namefilt = raw01_filt.get_data()

# Se convierten las mediciones a microvoltios
data_namefilt = data_namefilt * 1e6

# Dimensiones de 'data_filt'
dim_data_filt = np.shape(data_namefilt)

# Verifición
print("Canales filtrados:", filt_ch_nms)
print(f"Cantidad de canales resultantes: {dim_data_filt[0]}")
print(f"Cantidad de datos en cada canal: {dim_data_filt[1]}")

Canales filtrados: ['EEG T3', 'EEG T5', 'EEG F7', 'EEG F3', 'EEG C3', 'EEG P3']
Cantidad de canales resultantes: 6
Cantidad de datos en cada canal: 4733440


In [5]:
# Frecuencia de muestreo y cantidad de muestras tomadas
fs = info["sfreq"]
len_data = dim_data_filt[1]

# Array de instantes
start = 0
stop = (1 / fs) * len_data
arr_t = np.arange(start=start, stop=stop, step=(1 / fs))

# Verificación de datos
print("Frecuencia de muestreo:", fs)
print("Instantes [s]:", arr_t[0], "...", arr_t[-1])

Frecuencia de muestreo: 512.0
Instantes [s]: 0.0 ... 9244.998046875


#### 4. getMeData

In [6]:
# Encontramos las seizures del archivo
path_seiz = rf"{DATA_DIR}PN05/Seizures-list-PN05.txt"
seizures = get_seizure_events(path_seiz)
seizures_in_02 = [seiz for seiz in seizures if seiz["file_name"] == "PN05-2.edf"]
arr_mtx_t_epi = get_seizure_array(seizures_in_02)
seizures_in_02

[{'seizure_number': 2,
  'file_name': 'PN05-2.edf',
  'registration_start_time': '06.46.02',
  'registration_end_time': '09.19.47',
  'seizure_start_time': '08.45.25',
  'seizure_end_time': '08.46.00'}]

In [13]:
# Matriz de inicio de registro y final de registro
seiz = seizures_in_02[0] # una sola !!
mtx_t_reg = np.array([seiz['registration_start_time'], seiz['registration_end_time']])

# Utilización de la función para una única señal
arr_seg, arr_labels = getMeData(sig=data_namefilt[0], mtx_t_reg=mtx_t_reg, arr_mtx_t_epi=arr_mtx_t_epi, winlen=5, proportion=0.3)
arr_seg

array([[-23.48834369, -20.00143077, -12.24644944, ...,   8.66168329,
          8.28245089,   8.17361767],
       [ 11.44634766,  13.15459522,  12.44651119, ...,  -6.25272673,
         -3.49090718,  -0.06684019],
       [  3.41432557,  -4.61866502, -10.673142  , ...,  14.40121867,
         12.14284157,   7.56411843],
       ...,
       [ -1.26404534,   0.75047677,   1.60762505, ..., -21.02902802,
        -17.60421034,  -6.22424478],
       [ -4.46730181,   5.93943617,  13.35069097, ...,  -4.07302243,
         -2.54854822,  -2.42114141],
       [  5.39174753,   0.33348861,  -5.14761914, ..., -15.1769505 ,
        -13.0471606 ,  -8.51781653]])

In [14]:
# Potencia
pot_seg = pot4signals(arr_seg, fs, divisor=1)
pot_names = [f"potAbs{band.capitalize()}" for band in BANDAS.keys()]

# Estadistica
stats_data = stats_features(arr_seg)
stats_names = list(stats_data.values())[:-1]
stats_seg = stats_data["matriz de features stat"]

# Feature fector
arr_fv = np.hstack((pot_seg, stats_seg))
columnas = pot_names + stats_names
df_fv = pd.DataFrame(data=arr_fv, columns=columnas)
df_fv

Unnamed: 0,potAbsDelta,potAbsTheta,potAbsAlpha,potAbsBeta,potAbsGamma,kurtosis,RMS,skewness,media,desvio estandar
0,33.483541,14.177523,81.956903,141.717006,101.96739,1.2409,24.95507,0.395203,1.26007,24.923237
1,53.79931,50.624691,136.330796,139.602735,84.929012,4.371765,20.73513,0.107059,0.365863,20.731902
2,98.979875,46.794092,105.509042,211.057883,212.346049,2.660658,27.036281,0.701389,1.428164,26.998534
3,54.98367,64.382793,77.332336,267.651009,288.748001,2.069546,29.48413,0.219422,1.032189,29.466057
4,111.710939,244.347058,306.51797,435.29188,429.416725,1.304487,37.12169,0.355178,1.674864,37.083887
5,62.007023,44.427133,48.921803,72.366165,44.052993,1.795948,20.205173,0.144526,0.216827,20.20401
6,4.856088,9.323213,18.488792,18.013655,2.550403,0.448921,8.344513,-0.086968,1.650615,8.179632
7,12.402086,12.738799,17.837006,27.509635,2.217386,0.188764,9.079593,-0.24746,1.778408,8.903723
8,11.99074,23.121522,30.417384,60.359779,3.18766,-0.023238,11.24722,-0.151257,0.222435,11.245021
9,9.293412,18.29683,21.309082,29.266513,3.330018,0.638245,9.009898,-0.088502,0.420381,9.000086


In [17]:
# Agregamos datos de otro archivo
path03 = rf"{DATA_DIR}PN05/PN05-3.edf"
raw03 = io.read_raw_edf(path03)
raw03_filt = raw03.pick(filt_ch_nms)
data_namefilt03 = raw03_filt.get_data()
data_namefilt03 = data_namefilt03 * 1e6

# Buscamos la seizures
seizures_in_03 = [seiz for seiz in seizures if seiz["file_name"] == "PN05-3.edf"]
arr_mtx_t_epi = get_seizure_array(seizures_in_03)
seiz03 = seizures_in_03[0] # agarro una en particular para ver sus registration
mtx_t_reg = np.array([seiz03['registration_start_time'], seiz03['registration_end_time']])

# Mas segmentos!!
more_seg, more_labels = getMeData(sig=data_namefilt[0], mtx_t_reg=mtx_t_reg, arr_mtx_t_epi=arr_mtx_t_epi, winlen=5, proportion=0.3)

Extracting EDF parameters from C:\Users\gonza\Documents\ITBA\2024 1C\PSIB\TP FINAL PSIB\eeg_dataset\PN05\PN05-3.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


  raw03 = io.read_raw_edf(path03)
  raw03 = io.read_raw_edf(path03)
  raw03 = io.read_raw_edf(path03)


In [18]:
# Potencia
more_pot = pot4signals(more_seg, fs, divisor=1)

# Estadistica
more_stat_data = stats_features(more_seg)
more_stat = more_stat_data["matriz de features stat"]

# Feature fector
more_fv = np.hstack((more_pot, more_stat))
print('hola ivo')
new_fv = np.concatenate((arr_fv, more_fv))
new_labels = np.concatenate((arr_labels, more_labels))
new_df_fv = pd.DataFrame(new_fv, columns=columnas)
new_df_fv

hola ivo


Unnamed: 0,potAbsDelta,potAbsTheta,potAbsAlpha,potAbsBeta,potAbsGamma,kurtosis,RMS,skewness,media,desvio estandar
0,33.483541,14.177523,81.956903,141.717006,101.96739,1.2409,24.95507,0.395203,1.26007,24.923237
1,53.79931,50.624691,136.330796,139.602735,84.929012,4.371765,20.73513,0.107059,0.365863,20.731902
2,98.979875,46.794092,105.509042,211.057883,212.346049,2.660658,27.036281,0.701389,1.428164,26.998534
3,54.98367,64.382793,77.332336,267.651009,288.748001,2.069546,29.48413,0.219422,1.032189,29.466057
4,111.710939,244.347058,306.51797,435.29188,429.416725,1.304487,37.12169,0.355178,1.674864,37.083887
5,62.007023,44.427133,48.921803,72.366165,44.052993,1.795948,20.205173,0.144526,0.216827,20.20401
6,4.856088,9.323213,18.488792,18.013655,2.550403,0.448921,8.344513,-0.086968,1.650615,8.179632
7,12.402086,12.738799,17.837006,27.509635,2.217386,0.188764,9.079593,-0.24746,1.778408,8.903723
8,11.99074,23.121522,30.417384,60.359779,3.18766,-0.023238,11.24722,-0.151257,0.222435,11.245021
9,9.293412,18.29683,21.309082,29.266513,3.330018,0.638245,9.009898,-0.088502,0.420381,9.000086


In [19]:
new_df_fv['label'] = new_labels
new_df_fv.to_csv('first_features.csv', sep=',', encoding='utf-8')
new_df_fv

Unnamed: 0,potAbsDelta,potAbsTheta,potAbsAlpha,potAbsBeta,potAbsGamma,kurtosis,RMS,skewness,media,desvio estandar,label
0,33.483541,14.177523,81.956903,141.717006,101.96739,1.2409,24.95507,0.395203,1.26007,24.923237,0.0
1,53.79931,50.624691,136.330796,139.602735,84.929012,4.371765,20.73513,0.107059,0.365863,20.731902,0.0
2,98.979875,46.794092,105.509042,211.057883,212.346049,2.660658,27.036281,0.701389,1.428164,26.998534,0.0
3,54.98367,64.382793,77.332336,267.651009,288.748001,2.069546,29.48413,0.219422,1.032189,29.466057,0.0
4,111.710939,244.347058,306.51797,435.29188,429.416725,1.304487,37.12169,0.355178,1.674864,37.083887,0.0
5,62.007023,44.427133,48.921803,72.366165,44.052993,1.795948,20.205173,0.144526,0.216827,20.20401,0.0
6,4.856088,9.323213,18.488792,18.013655,2.550403,0.448921,8.344513,-0.086968,1.650615,8.179632,0.0
7,12.402086,12.738799,17.837006,27.509635,2.217386,0.188764,9.079593,-0.24746,1.778408,8.903723,0.0
8,11.99074,23.121522,30.417384,60.359779,3.18766,-0.023238,11.24722,-0.151257,0.222435,11.245021,0.0
9,9.293412,18.29683,21.309082,29.266513,3.330018,0.638245,9.009898,-0.088502,0.420381,9.000086,0.0


In [20]:
df_from_csv = pd.read_csv("first_features.csv", index_col=0)
df_from_csv

Unnamed: 0,potAbsDelta,potAbsTheta,potAbsAlpha,potAbsBeta,potAbsGamma,kurtosis,RMS,skewness,media,desvio estandar,label
0,33.483541,14.177523,81.956903,141.717006,101.96739,1.2409,24.95507,0.395203,1.26007,24.923237,0.0
1,53.79931,50.624691,136.330796,139.602735,84.929012,4.371765,20.73513,0.107059,0.365863,20.731902,0.0
2,98.979875,46.794092,105.509042,211.057883,212.346049,2.660658,27.036281,0.701389,1.428164,26.998534,0.0
3,54.98367,64.382793,77.332336,267.651009,288.748001,2.069546,29.48413,0.219422,1.032189,29.466057,0.0
4,111.710939,244.347058,306.51797,435.29188,429.416725,1.304487,37.12169,0.355178,1.674864,37.083887,0.0
5,62.007023,44.427133,48.921803,72.366165,44.052993,1.795948,20.205173,0.144526,0.216827,20.20401,0.0
6,4.856088,9.323213,18.488792,18.013655,2.550403,0.448921,8.344513,-0.086968,1.650615,8.179632,0.0
7,12.402086,12.738799,17.837006,27.509635,2.217386,0.188764,9.079593,-0.24746,1.778408,8.903723,0.0
8,11.99074,23.121522,30.417384,60.359779,3.18766,-0.023238,11.24722,-0.151257,0.222435,11.245021,0.0
9,9.293412,18.29683,21.309082,29.266513,3.330018,0.638245,9.009898,-0.088502,0.420381,9.000086,0.0
