# Preprocess raw EDF data to filtered FIF format

Define event names in a dictionary

In [6]:
import sys
import os
import numpy as np
import mne
import matplotlib.pyplot as plt
from mne.preprocessing import ICA
from pyprep.find_noisy_channels import NoisyChannels
from IPython import display
import eeglabio

%matplotlib inline

Find EDF file based on subject number and load using MNE

In [7]:
sub_num = "007"

In [8]:
data_dir ="../../../Data/EEG DATA/"
save_dir ="../../../Data/Processed Data/"

sub_id=''; acq_id=''
for folder in os.listdir(data_dir):
    save_fname_fif = sub_num[:3] + '_preprocessed-raw.fif'
    if folder.startswith(sub_num):# and save_fname_fif not in os.listdir(save_dir):
        sub_id=folder
        print(sub_id)
        print(save_fname_fif)

for subfile in os.listdir(os.path.join(data_dir,sub_id)):
    if not subfile.endswith(".edf"):
        continue
    else:
        acq_id = subfile;

# read data, set EOG channel, and drop unused channels
print(f"{sub_id}\nreading raw file...")
eeg_data_raw_file = os.path.join(data_dir,sub_id,acq_id)

raw = mne.io.read_raw_edf(eeg_data_raw_file)

# 32 channels
if (len(raw.ch_names)>32 and len(raw.ch_names)<64):
    if "X" in raw.ch_names:
        Fp1_eog_flag = 1
        del raw
        raw = mne.io.read_raw_edf(eeg_data_raw_file, eog=["Fp1"], preload=True)
    if "X" in raw.ch_names and "Oth4" in raw.ch_names:
        non_eeg_chs = ['X', 'Y', 'Z', 'Oth4']
    elif "X" in raw.ch_names:
        non_eeg_chs = ['X', 'Y', 'Z']
    raw.drop_channels(non_eeg_chs)
    custom_montage = mne.channels.read_custom_montage('../../Misc/Montage/Hydro_Neo_Net_32_xyz_cms_No_FID.sfp') 
    # custom_montage = mne.channels.read_custom_montage('../Misc/Montage/Hydro_Neo_Net_32_xyz_cms.sfp') 
        
# 64 channels
else:
    wrong_64_mtg_flag = 0
    if "FT7" in raw.ch_names:
        wrong_64_mtg_flag = 1
        eog_adj = 4
    else:
        eog_adj = 5
    
    Fp1_eog_flag = 0
    if "VEO" in raw.ch_names:
        del raw
        raw = mne.io.read_raw_edf(eeg_data_raw_file, eog=["VEO"], preload=True)
    elif "VEOG" in raw.ch_names:
        del raw
        raw = mne.io.read_raw_edf(eeg_data_raw_file, eog=["VEOG"], preload=True)
    elif "X" in raw.ch_names:
        Fp1_eog_flag = 1
        del raw
        raw = mne.io.read_raw_edf(eeg_data_raw_file, eog=["Fp1"], preload=True)
    
    # display.clear_output(wait=True)

    # high level inspection
    print(raw.ch_names)
    print(len(raw.ch_names))
    
    # drop non EEG channels
    if "HEOG" in raw.ch_names:
        non_eeg_chs = ['HEOG', 'EKG', 'EMG', 'Trigger']
    elif "HEO" in raw.ch_names:
        non_eeg_chs = ['HEO', 'EKG', 'EMG', 'Trigger']
    elif "X" in raw.ch_names and "Oth4" in raw.ch_names:
        non_eeg_chs = ['X', 'Y', 'Z', 'Oth4']
    elif "X" in raw.ch_names:
        non_eeg_chs = ['X', 'Y', 'Z']
    elif "EEG66" in raw.ch_names:
        non_eeg_chs = ['EEG66','EEG67','EEG68','EEG69']
    raw.drop_channels(non_eeg_chs)
    
    # make adjustment for wrong montage subjects
    if wrong_64_mtg_flag:
        raw.drop_channels(['FT7','FT8','PO5','PO6']) # for C24, 055, 056, 047
    
    print(raw.ch_names)
    print(len(raw.ch_names))
    
    # load electrode locations 
    print(f"{sub_id}\nsetting custom montage...")
    if wrong_64_mtg_flag:
        custom_montage = mne.channels.read_custom_montage('../../Misc/Montage/Hydro_Neo_Net_64_xyz_cms_No_FID_Caps.sfp') 
    else:
        custom_montage = mne.channels.read_custom_montage('../../Misc/Montage/Hydro_Neo_Net_64_xyz_cms_No_FID.sfp') 
    
raw.set_montage(custom_montage)
display.clear_output(wait=True)

# apply notch filter
print(f"{sub_id}\napplying notch filter...")
raw = raw.notch_filter(60., notch_widths=3)
display.clear_output(wait=True)

007.EEGDATA
applying notch filter...
Filtering raw data in 1 contiguous segment
Setting up band-stop filter from 58 - 62 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 58.00
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 57.75 Hz)
- Upper passband edge: 62.00 Hz
- Upper transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 62.25 Hz)
- Filter length: 3301 samples (6.602 s)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    0.6s


#### Crop noisy ends of data!

In [9]:
# subject 007
# raw.plot(start=1483,duration=1,n_channels=10,scalings=0.0005)
# raw.crop(tmax=1483) 
# raw.plot(start=1480,duration=1,n_channels=10,scalings=0.0005)

# subject 010
# raw.plot(start=1990,duration=1,n_channels=10,scalings=0.0005)
# raw.crop(tmax=1997.8)
# raw.plot(start=1988,duration=1,n_channels=10,scalings=0.0005)

2023-09-25 15:02:08,428 - qdarkstyle - INFO - QSS file successfully loaded.
2023-09-25 15:02:08,429 - qdarkstyle - INFO - Found version patches to be applied.
2023-09-25 15:02:08,429 - qdarkstyle - INFO - Found application patches to be applied.


0,1
Measurement date,"February 13, 2020 11:09:39 GMT"
Experimenter,Unknown
Digitized points,34 points
Good channels,"31 EEG, 1 EOG"
Bad channels,
EOG channels,Fp1
ECG channels,Not available
Sampling frequency,500.00 Hz
Highpass,0.00 Hz
Lowpass,250.00 Hz


In [10]:
# apply bandpass filter
print(f"{sub_id}\napplying bandpass filter...")
raw = raw.filter(l_freq=1., h_freq=100.)
display.clear_output(wait=True)

# resample data to decrease file size
resample_freq = 400
print(f"{sub_id}\nresampling data from {raw.info['sfreq']} Hz to {resample_freq} Hz...")
raw.resample(resample_freq, npad='auto')
display.clear_output(wait=True)

# find bad channels automatically
print(f"{sub_id}\nremoving bad channels...")
raw_pyprep = NoisyChannels(raw, random_state=42)
raw_pyprep.find_all_bads(ransac=False, channel_wise=False, max_chunk_size=None)
raw.info['bads'] = raw_pyprep.get_bads()
raw.interpolate_bads(reset_bads=True)
display.clear_output(wait=True)

# re-reference channels
print(f"{sub_id}\nre-referencing channels to average...")
raw, _ = mne.set_eeg_reference(raw, ref_channels='average', copy=True)
display.clear_output(wait=True)

# fit ICA 
print(f"{sub_id}\nfitting ICA...")
num_goods = len(raw.ch_names) - len(raw.info['bads']) - 1 # adjust for EOG
ica = ICA(n_components=int(np.floor(num_goods/2)), random_state=42, max_iter='auto')
ica.fit(raw)
display.clear_output(wait=True)

# find EOG artifacts
print(f"{sub_id}\nfinding EOG artifacts...")

# ica.find_bads_eog is BROKEN. alt: exclude first two components
eog_indices, eog_scores = ica.find_bads_eog(raw,
                                            threshold='auto')
ica.exclude = eog_indices
# ica.exclude = [0,1]

display.clear_output(wait=True)

# apply ICA
print(f"{sub_id}\napplying ICA...")
ica.apply(raw)
display.clear_output(wait=True)

# save copy of data
print(f"Saving processed data as '{save_fname_fif}'...")

if "VEO" in raw.ch_names:
    raw.drop_channels("VEO")
elif "VEOG" in raw.ch_names:
    raw.drop_channels("VEOG")
elif Fp1_eog_flag:
    raw.set_channel_types({'Fp1': 'eeg'})
    raw.set_montage(custom_montage)

# set_dir = '../../SET Data/'
raw.save(save_dir+save_fname_fif, 
         verbose=True, overwrite=True)
# mne.export.export_raw(set_dir+save_fname_fif[:-4]+'.set', 
#                       raw, overwrite=True, verbose=True)
display.clear_output(wait=True)

# high level inspection
print(raw.ch_names)
print('\nNumber of remaining channels: ',len(raw.ch_names) - len(raw.info['bads']))
print('\nDropped channels: ',raw.info['bads'])

print("Raw data preprocessing complete.")

display.clear_output(wait=True)

Channels marked as bad:
none
