# Preprocess raw EDF data to filtered FIF format

Define event names in a dictionary

In [1]:
import sys
import os
import numpy as np
import pandas as pd
import mne
import matplotlib.pyplot as plt

from mne.preprocessing import ICA
from pyprep.find_noisy_channels import NoisyChannels

from IPython import display

import eeglabio

Find EDF file based on subject number and load using MNE

In [2]:
parent_data_path = f"../../../George Kenefati/Dan Friedman sEEG Data/"
save_path = f"../../../George Kenefati/Dan Friedman sEEG Data/Processed FIF Data/" 
roi_data_path = f"../../../George Kenefati/Dan Friedman sEEG Data/Processed Anatomical Regions Data/"

In [24]:
def state_and_subfile_creator(day_id):
    day_and_state = []
    for state in os.listdir(os.path.join(parent_data_path, sub_id, day_id)):
        if state.startswith(sub_state):
            state_id = sub_state;
        
    for subfile in os.listdir(os.path.join(parent_data_path,sub_id, day_id, state_id)):
        if subfile.endswith(".EDF"):
            acq_id = subfile;
            day_and_state.append(acq_id)
    day_and_state.append(state_id)
    print(day_and_state)
    return(day_and_state)



def preprocessing_function(state_id, file_name, picks):
    print(f"{sub_id}\nreading raw file...")
    raw_fname = os.path.join(parent_data_path,sub_id, day_id, state_id, file_name)

    raw = mne.io.read_raw_edf(raw_fname, preload=True, include=picks)


    # high level inspection
    print(raw.ch_names)
    print(len(raw.ch_names))

    # apply notch filter
    print(f"{sub_id}\napplying notch filter...")
    raw = raw.notch_filter(60., notch_widths=6, picks=picks)
    display.clear_output(wait=True)

    # apply bandpass filter
    print(f"{sub_id}\napplying bandpass filter...")
    if file_name.endswith("512.EDF"):
        h_freq_value = 100.
    else:
        h_freq_value = 500.
    raw = raw.filter(l_freq=1., h_freq = h_freq_value)
    display.clear_output(wait=True)
    
    # ICA analysis
    num_components = len(raw.info['ch_names'])
    ica = ICA(n_components=int(np.floor(num_components/2)), random_state=42, max_iter='auto')
    ica.fit(raw)

    # find bad channels automatically
    # print(f"{sub_id}\nremoving bad channels...")
    # raw_pyprep = NoisyChannels(raw, random_state=42)
    # raw_pyprep.find_all_bads(ransac=False, channel_wise=False, max_chunk_size=None)
    # raw.info['bads'] = raw_pyprep.get_bads()
    # display.clear_output(wait=True)

    # save copy of data
    save_fname_fif = f"{sub_num}_{day_list[i]}_{file_name[:-4]}-raw.fif"
    print(f"Saving processed data as '{save_fname_fif}'...")
    raw.save(save_path+save_fname_fif,
             verbose=True, overwrite=True)
    
    display.clear_output(wait=True)

    # high level inspection
    print(raw.ch_names)

In [4]:
# Functions are divided into surface and depth generators, surface generator allows users to select chosen nodes given image data
# depth generator used for when images can offer no better info on channel location

def ROI_Surface_Generator(ROI_Data_Path, ROI_sub_id, surface_side): # surface side must be 'lh' (lefthand) or 'rh' (righthand)
    for file in os.listdir(ROI_Data_Path):
        if str(ROI_sub_id) in file and not str('depth') in file and str(surface_side) in file:
            processed_roi_csv = pd.read_csv(os.path.join(ROI_Data_Path, file))

    ROI_channels = processed_roi_csv['ID'].to_list()
    Chosen_ROI_Channels = [] # reset
    channel_ids = 0
    
    while channel_ids != len(ROI_channels):
        verification_status = input(f"Proposed Channel in Region of Interest: {ROI_channels[channel_ids]} - Verify from Image, Keep y/n ?")
        if verification_status == 'y':
            Chosen_ROI_Channels.append(ROI_channels[channel_ids])
            channel_ids = channel_ids + 1
            display.clear_output(wait=True)
        elif verification_status == 'n':
            channel_ids = channel_ids + 1
            display.clear_output(wait=True)
        else:
            print("Invalid Reponse, please enter 'y' or 'n'")
            channel_ids = channel_ids + 0
            display.clear_output(wait=True)
            
    return(Chosen_ROI_Channels) # returns chosen channels as list of channel IDs



def ROI_Depth_Generator(ROI_Data_Path, ROI_sub_id): # use for files containing 'depth'
    for file in os.listdir(ROI_Data_Path):
        if str(ROI_sub_id) in file and str('depth') in file:
            processed_roi_csv = pd.read_csv(os.path.join(ROI_Data_Path, file))
    
    ROI_channels = processed_roi_csv['ID'].to_list()
    return(ROI_channels)

In [9]:
channels_id_dict = {}
for i in range(0, len(sub_ids_lst)):
    selected_channels = ROI_Depth_Generator(roi_data_path, sub_ids_lst[i])
    for j in range(0, len(selected_channels)):
        if selected_channels[j][-2:][:-1] == '0':
            selected_channels[j] = selected_channels[j][:-2] + selected_channels[j][-1:]
    channels_id_dict[sub_ids_lst[i]] = selected_channels


In [20]:
channels_id_dict['865']

['DLCAI3',
 'DLCAI4',
 'DLCAI5',
 'DLCAI6',
 'DLCPI3',
 'DLCPI4',
 'DLCPI5',
 'DLCPI6',
 'DRCAI1',
 'DRCAI2',
 'DRCAI6',
 'DRCPI1',
 'DRCPI2',
 'DRCPI3',
 'DRCPI4',
 'DROFO1',
 'DROFO2',
 'DROFO3',
 'DROFO4']

In [142]:
raw = mne.io.read_raw_edf('../../../George Kenefati/Dan Friedman sEEG Data/NY872/Day 1/Awake1/Awake1.EDF')
raw2 = mne.io.read_raw_edf('../../../George Kenefati/Dan Friedman sEEG Data/NY872/Day 21/Awake1/Awake1.EDF')

Extracting EDF parameters from /media/sb10flpc002/08e63286-43ce-4f61-9491-1ed048c96f20/George Kenefati/Dan Friedman sEEG Data/NY872/Day 1/Awake1/Awake1.EDF...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /media/sb10flpc002/08e63286-43ce-4f61-9491-1ed048c96f20/George Kenefati/Dan Friedman sEEG Data/NY872/Day 21/Awake1/Awake1.EDF...
EDF file detected
Setting channel info structure...
Creating raw.info structure...


In [144]:
len(raw.info['ch_names'])

125

In [62]:
# Subjects: [NY - 846, 853, 865, 870, 871, 872, 878, 884, 888, 893]
sub_ids_lst = ['846', '865', '870', '871', '872', '878', '884', '888', '893']

In [63]:
for i in range(0, len(sub_ids_lst)):
    for folder in os.listdir(parent_data_path):
        if sub_ids_lst[i] in folder and not folder.endswith('elec'):
            for sub_folder in os.listdir(os.path.join(parent_data_path, folder)):
                channel_names = []
                if sub_folder.startswith('Day'):
                    for state_folder in os.listdir(os.path.join(parent_data_path, folder, sub_folder)):
                        for file in os.listdir(os.path.join(parent_data_path, folder, sub_folder, state_folder)):
                            if file.endswith('.EDF'):
                                raw = mne.io.read_raw_edf(os.path.join(parent_data_path, folder, sub_folder, state_folder, file))
                                channel_names.append(raw.info['ch_names'])
                    all_channels_equal = channel_names.count(channel_names[0]) == len(channel_names)
                    if all_channels_equal == True:
                        print(f'All channels are the same for subject {sub_ids_lst[i]}')
                    else:
                        print(f'All channels are not the same for subject {sub_ids_lst[i]}')


Extracting EDF parameters from /media/sb10flpc002/08e63286-43ce-4f61-9491-1ed048c96f20/George Kenefati/Dan Friedman sEEG Data/NY846/Day 1/Awake2/Awake2.EDF...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /media/sb10flpc002/08e63286-43ce-4f61-9491-1ed048c96f20/George Kenefati/Dan Friedman sEEG Data/NY846/Day 1/Awake2/Awake2_512.EDF...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /media/sb10flpc002/08e63286-43ce-4f61-9491-1ed048c96f20/George Kenefati/Dan Friedman sEEG Data/NY846/Day 1/Awake1/Awake1_512.EDF...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameters from /media/sb10flpc002/08e63286-43ce-4f61-9491-1ed048c96f20/George Kenefati/Dan Friedman sEEG Data/NY846/Day 1/Awake1/Awake1.EDF...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Extracting EDF parameter

In [8]:
sub_num = 'NY853' # Set this at the start for each subject name
sub_state = 'Asleep1'  #Set this to Asleep1 / Asleep2 / Awake1 / Awake2

In [25]:
sub_id=''; acq_id=''
for i in range(0, len(sub_ids_lst)):
    for folder in os.listdir(parent_data_path):   
        if folder.endswith(sub_ids_lst[i]):
            sub_id = folder
            print(sub_id)

    day_list = []        
    for days in os.listdir(os.path.join(parent_data_path,sub_id)):
        if days.startswith("Day"):
            day_list.append(str(days))


    for j in range(0, len(day_list)):
        complete_info = state_and_subfile_creator(day_list[j])
        day_id = day_list[j]
        for z in range(0, 2):
            preprocessing_function(complete_info[-1], complete_info[z], channels_id_dict[sub_ids_lst[i]])

['DLCAI3', 'DLCAI4', 'DLCAI5', 'DLCAI6', 'DLCPI3', 'DLCPI4', 'DLCPI5', 'DLCPI6', 'DROFO1', 'DROFO2', 'DROFO3', 'DROFO4', 'DRCAI1', 'DRCAI2', 'DRCAI6', 'DRCPI1', 'DRCPI2', 'DRCPI3', 'DRCPI4']
['Asleep1.EDF', 'Asleep1_512.EDF', 'Asleep1']
NY865
reading raw file...
Extracting EDF parameters from /media/sb10flpc002/08e63286-43ce-4f61-9491-1ed048c96f20/George Kenefati/Dan Friedman sEEG Data/NY865/Day 6/Asleep1/Asleep1.EDF...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 616063  =      0.000 ...   300.812 secs...
[]
0
NY865
applying notch filter...


ValueError: picks (['DLCAI3', 'DLCAI4', 'DLCAI5', 'DLCAI6', 'DLCPI3', 'DLCPI4', 'DLCPI5', 'DLCPI6', 'DRCAI1', 'DRCAI2', 'DRCAI6', 'DRCPI1', 'DRCPI2', 'DRCPI3', 'DRCPI4', 'DROFO1', 'DROFO2', 'DROFO3', 'DROFO4']) could not be interpreted as channel names (no channel "['DLCAI3', 'DLCAI4', 'DLCAI5', 'DLCAI6', 'DLCPI3', 'DLCPI4', 'DLCPI5', 'DLCPI6', 'DRCAI1', 'DRCAI2', 'DRCAI6', 'DRCPI1', 'DRCPI2', 'DRCPI3', 'DRCPI4', 'DROFO1', 'DROFO2', 'DROFO3', 'DROFO4']"), channel types (no type "DLCAI3" present), or a generic type (just "all" or "data")