In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
import mne, re, os
import torch
import pickle

from utils import *

In [2]:
SNR = 100.0
#PARCELLATION = 'aparc.a2009s'
PARCELLATION = 'HCPMMP1_combined'
DATA_PATH = '/home/williamtheodor/Documents/DL for EEG Classification/data/'

data_dict = load_mmidb_data_dict(DATA_PATH, PARCELLATION, SNR, chop=False)

In [3]:
DATA_PATH_RAW = '../../data/eegmmidb (raw)/files/'
DATA_PATH_CONCEPTS = '../../data/sanity check concepts MMIDB/'

bands = data_dict.keys()
patients_to_exclude = ['S088', 'S089', 'S090', 'S092', 'S104', 'S106']
patients = [key for key in data_dict['Alpha'].keys() if key not in patients_to_exclude]
runs = [key[-3:] for key in data_dict['Alpha'][patients[0]].keys() if key[-3:] not in ['R01', 'R02']]

subjects_dir, subject, trans, src_path, bem_path = get_fsaverage()
labels = get_labels(subjects_dir, parcellation_name=PARCELLATION)
label_names = [label.name for label in np.array(labels).flatten()]

bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']

In [4]:
for label_idx in range(len(label_names)):
    for band in bands:
    # make directory if it doesn't exist
        if not os.path.exists(f'{DATA_PATH_CONCEPTS}{band}_{label_names[label_idx]}'):
            os.makedirs(f'{DATA_PATH_CONCEPTS}{band}_{label_names[label_idx]}')

In [5]:
NUMNER_PATIENTS = len(patients)
NUMBER_RUNS = len(runs)
NUMBER_WINDOWS = 15
NUMBER_CHANNELS = 20

NUMBER_BANDS = len(bands)
NUMBER_LABELS = len(label_names)

WINDOW_LENGTH = 4 # seconds
SAMPLING_FREQ = 256 # Hz
NUMBER_SAMPLES = int(WINDOW_LENGTH*SAMPLING_FREQ)



baseline_run = 'R01' # baseline open eyes

X = np.zeros((NUMNER_PATIENTS, NUMBER_RUNS, NUMBER_WINDOWS, NUMBER_CHANNELS, NUMBER_SAMPLES))
Y = np.ones((NUMNER_PATIENTS, NUMBER_RUNS, NUMBER_WINDOWS)) * -1

for patient in tqdm(patients):
    baseline_activity = np.array([data_dict[band][patient][patient+baseline_run]['T0'] for band in bands]).reshape(NUMBER_BANDS, NUMBER_LABELS)

    for run in runs:

        FILE = DATA_PATH_RAW+f'{patient}/{patient}{run}.edf'

        raw = get_raw(FILE)
        annotations = get_annotations(FILE)

        annotation_dict = get_window_dict(raw, annotations)

        for key in ['T1', 'T2']:
            for raw_idx, raw in enumerate(annotation_dict[key]):

                raw = pick_and_rename_MMIDB_channels(raw)

                activity = np.array([data_dict[band][patient][patient+run][key][raw_idx] for band in bands])
                activity -= baseline_activity

                most_active_band_idx = np.argmax(activity.mean(axis=1))
                most_active_band = bands[most_active_band_idx]

                brain_region_idx = activity[most_active_band_idx].argmax()
                brain_region = label_names[brain_region_idx]

                concept = most_active_band + '_' + brain_region       

                
                x = np.zeros((1, NUMBER_CHANNELS, NUMBER_SAMPLES))
                x[:,:19,:] = raw.copy().get_data()[:,:NUMBER_SAMPLES].reshape(1,NUMBER_CHANNELS-1,NUMBER_SAMPLES)
                x[:,19,:] = np.ones((1, NUMBER_SAMPLES)) * -1  
                x = torch.from_numpy(x).float()
                
                picklePath = DATA_PATH_CONCEPTS + concept + '/' + patient + run + '_' + key + '_' + concept + '.pkl'
                with open(picklePath, 'wb') as handle:
                    pickle.dump(x, handle, protocol=pickle.HIGHEST_PROTOCOL)

  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
100%|██████████| 103/103 [08:47<00:00,  5.12s/it]


In [6]:
label_names

['???-lh',
 'Anterior Cingulate and Medial Prefrontal Cortex-lh',
 'Auditory Association Cortex-lh',
 'Dorsal Stream Visual Cortex-lh',
 'DorsoLateral Prefrontal Cortex-lh',
 'Early Auditory Cortex-lh',
 'Early Visual Cortex-lh',
 'Inferior Frontal Cortex-lh',
 'Inferior Parietal Cortex-lh',
 'Insular and Frontal Opercular Cortex-lh',
 'Lateral Temporal Cortex-lh',
 'MT+ Complex and Neighboring Visual Areas-lh',
 'Medial Temporal Cortex-lh',
 'Orbital and Polar Frontal Cortex-lh',
 'Paracentral Lobular and Mid Cingulate Cortex-lh',
 'Posterior Cingulate Cortex-lh',
 'Posterior Opercular Cortex-lh',
 'Premotor Cortex-lh',
 'Primary Visual Cortex (V1)-lh',
 'Somatosensory and Motor Cortex-lh',
 'Superior Parietal Cortex-lh',
 'Temporo-Parieto-Occipital Junction-lh',
 'Ventral Stream Visual Cortex-lh',
 '???-rh',
 'Anterior Cingulate and Medial Prefrontal Cortex-rh',
 'Auditory Association Cortex-rh',
 'Dorsal Stream Visual Cortex-rh',
 'DorsoLateral Prefrontal Cortex-rh',
 'Early Auditor