# Convertendo EDF para Parquet

In [1]:
import mne 
import pandas as pd
import numpy as np

In [2]:
# Variaveis de ambiente
import os
from os.path import join, dirname
from dotenv import load_dotenv

dotenv_path = join(dirname('__file__'), '.env')

load_dotenv(dotenv_path)

RAW_DATA = os.environ.get("RAW_DATA")
ROOT_PATH = os.environ.get("ROOT_PATH")

In [3]:
files = pd.read_csv(f"{ROOT_PATH}{RAW_DATA}siena/database/RECORDS", header=None)[0].to_list()

In [4]:
pacientes = np.unique([file.split('/')[0] for file in files])
pacientes

array(['PN00', 'PN01', 'PN03', 'PN05', 'PN06', 'PN07', 'PN09', 'PN10',
       'PN11', 'PN12', 'PN13', 'PN14', 'PN16', 'PN17'], dtype='<U4')

In [20]:
paciente = pacientes[7]

used = [file for file in files if paciente in file]

used

['PN10/PN10-10.edf',
 'PN10/PN10-1.edf',
 'PN10/PN10-2.edf',
 'PN10/PN10-3.edf',
 'PN10/PN10-4.5.6.edf',
 'PN10/PN10-7.8.9.edf']

In [21]:
def save_eeg(df, name):
    ch_eeg = [col for col in df.columns if 'EEG' in col]
    
    out = f"{ROOT_PATH}{RAW_DATA}EEG/{name}"
    
    df.loc[:,ch_eeg+['window']].to_parquet(out)


def save_ekg(df, name):
    ch_ekg = [col for col in df.columns if 'EKG' in col]
    
    out = f"{ROOT_PATH}{RAW_DATA}EKG/{name}"
    
    df.loc[:,ch_ekg+['window']].to_parquet(out)

In [22]:
def pipeline(file):
    raw = mne.io.read_raw_edf(f"{ROOT_PATH}{RAW_DATA}siena/database/{file}")
    
    df = raw.to_data_frame()
    
    idx = np.arange(df.shape[0])
    
    df['id'] = idx

    infos = dict(raw.info)

    fs = int(infos['sfreq'])

    win = []
    
    for i in range(df.shape[0]//fs):
        win = win + list(np.repeat(i, fs))
        
    df['window'] = win

    cols = [col.replace(' ','_') for col in df.columns]
    
    df.columns = cols

    name = file.split('/')[-1].replace('edf','parquet')

    save_eeg(df, name)

    save_ekg(df, name)
    

In [23]:

for file in used:
    pipeline(file)

Extracting EDF parameters from /mnt/c/Users/davi.nascimento/Documents/siena_eeg_ecg/data/raw/siena/database/PN10/PN10-10.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /mnt/c/Users/davi.nascimento/Documents/siena_eeg_ecg/data/raw/siena/database/PN10/PN10-1.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /mnt/c/Users/davi.nascimento/Documents/siena_eeg_ecg/data/raw/siena/database/PN10/PN10-2.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /mnt/c/Users/davi.nascimento/Documents/siena_eeg_ecg/data/raw/siena/database/PN10/PN10-3.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /mnt/c/Users/davi.nascimento/Documents/siena_eeg_ecg/data/raw/siena/database/PN10/PN10-4.5.6.edf...
EDF file detected
Setting ch