In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
import mne, re, os
import torch
import pickle

from utils import *

In [2]:
SNR = 100.0
#PARCELLATION = 'aparc.a2009s'
PARCELLATION = 'HCPMMP1_combined'
DATA_PATH = '/home/williamtheodor/Documents/DL for EEG Classification/data/'

data_dict = load_mmidb_data_dict(DATA_PATH, PARCELLATION, SNR, chop=False)

In [4]:
DATA_PATH_RAW = '../../data/eegmmidb (raw)/files/'
DATA_PATH_CONCEPTS = '../../data/sanity check concepts MMIDB desync/'

bands = data_dict.keys()
patients_to_exclude = ['S088', 'S089', 'S090', 'S092', 'S104', 'S106']
patients = [key for key in data_dict['Alpha'].keys() if key not in patients_to_exclude]
runs = ['R03', 'R04', 'R07', 'R08', 'R11', 'R12']

subjects_dir, subject, trans, src_path, bem_path = get_fsaverage()
labels = get_labels(subjects_dir, parcellation_name=PARCELLATION)
label_names = [label.name for label in np.array(labels).flatten()]

#bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']
bands = ['Alpha']

In [5]:
for label_idx in range(len(label_names)):
    for band in bands:
    # make directory if it doesn't exist
        if not os.path.exists(f'{DATA_PATH_CONCEPTS}{band}_{label_names[label_idx]}'):
            os.makedirs(f'{DATA_PATH_CONCEPTS}{band}_{label_names[label_idx]}')

In [6]:
NUMNER_PATIENTS = len(patients)
NUMBER_RUNS = len(runs)
NUMBER_WINDOWS = 15
NUMBER_CHANNELS = 20

NUMBER_BANDS = len(bands)
NUMBER_LABELS = len(label_names)

WINDOW_LENGTH = 4 # seconds
SAMPLING_FREQ = 256 # Hz
NUMBER_SAMPLES = int(WINDOW_LENGTH*SAMPLING_FREQ)


baseline_run = 'R01' # baseline open eyes

X = np.zeros((NUMNER_PATIENTS, NUMBER_RUNS, NUMBER_WINDOWS, NUMBER_CHANNELS, NUMBER_SAMPLES))
Y = np.ones((NUMNER_PATIENTS, NUMBER_RUNS, NUMBER_WINDOWS)) * -1

for patient in tqdm(patients):
    #baseline_activity = np.array([data_dict[band][patient][patient+baseline_run]['T0'] for band in bands]).reshape(NUMBER_BANDS, NUMBER_LABELS)
    baseline_activity = data_dict['Alpha'][patient][patient+baseline_run]['T0'].reshape(NUMBER_LABELS)

    for run in runs:

        FILE = DATA_PATH_RAW+f'{patient}/{patient}{run}.edf'

        raw = get_raw(FILE)
        annotations = get_annotations(FILE)

        annotation_dict = get_window_dict(raw, annotations)

        for key in ['T1', 'T2']:
            for raw_idx, raw in enumerate(annotation_dict[key]):

                raw = pick_and_rename_MMIDB_channels(raw)

                #activity = np.array([data_dict[band][patient][patient+run][key][raw_idx] for band in bands])
                activity = data_dict['Alpha'][patient][patient+run][key][raw_idx]
                activity -= baseline_activity
                activity = np.abs(activity)

                #most_active_band_idx = np.argmax(activity.mean(axis=1))
                #most_active_band = bands[most_active_band_idx]

                #brain_region_idx = activity[most_active_band_idx].argmax()
                brain_region_idx = activity.argmax()
                brain_region = label_names[brain_region_idx]

                #concept = most_active_band + '_' + brain_region 
                concept = 'Alpha_' + brain_region 

                
                x = np.zeros((1, NUMBER_CHANNELS, NUMBER_SAMPLES))
                x[:,:19,:] = raw.copy().get_data()[:,:NUMBER_SAMPLES].reshape(1,NUMBER_CHANNELS-1,NUMBER_SAMPLES)
                x[:,19,:] = np.ones((1, NUMBER_SAMPLES)) * -1  
                x = torch.from_numpy(x).float()
                
                picklePath = DATA_PATH_CONCEPTS + concept + '/' + patient + run + '_' + key + '_' + concept + '.pkl'
                with open(picklePath, 'wb') as handle:
                    pickle.dump(x, handle, protocol=pickle.HIGHEST_PROTOCOL)

  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
  raw = mne.io.read_raw_edf(edf_file_path, verbose=False, preload=True)
100%|██████████| 103/103 [04:25<00:00,  2.58s/it]


In [7]:
# for all concepts, save the number of examples with T1 and T2
for label in label_names:
    for band in bands:
        concept = band + '_' + label
        path = DATA_PATH_CONCEPTS + concept + '/'
        files = os.listdir(path)
        T1 = 0
        T2 = 0
        for file in files:
            if 'T1' in file:
                T1 += 1
            elif 'T2' in file:
                T2 += 1
        print(f'{concept}: T1 = {T1}, T2 = {T2}')

Alpha_???-lh: T1 = 0, T2 = 0
Alpha_Anterior Cingulate and Medial Prefrontal Cortex-lh: T1 = 2, T2 = 0
Alpha_Auditory Association Cortex-lh: T1 = 41, T2 = 43
Alpha_Dorsal Stream Visual Cortex-lh: T1 = 18, T2 = 16
Alpha_DorsoLateral Prefrontal Cortex-lh: T1 = 126, T2 = 128
Alpha_Early Auditory Cortex-lh: T1 = 0, T2 = 0
Alpha_Early Visual Cortex-lh: T1 = 19, T2 = 18
Alpha_Inferior Frontal Cortex-lh: T1 = 190, T2 = 199
Alpha_Inferior Parietal Cortex-lh: T1 = 7, T2 = 7
Alpha_Insular and Frontal Opercular Cortex-lh: T1 = 0, T2 = 0
Alpha_Lateral Temporal Cortex-lh: T1 = 5, T2 = 4
Alpha_MT+ Complex and Neighboring Visual Areas-lh: T1 = 44, T2 = 42
Alpha_Medial Temporal Cortex-lh: T1 = 0, T2 = 0
Alpha_Orbital and Polar Frontal Cortex-lh: T1 = 300, T2 = 312
Alpha_Paracentral Lobular and Mid Cingulate Cortex-lh: T1 = 7, T2 = 8
Alpha_Posterior Cingulate Cortex-lh: T1 = 0, T2 = 0
Alpha_Posterior Opercular Cortex-lh: T1 = 11, T2 = 7
Alpha_Premotor Cortex-lh: T1 = 10, T2 = 18
Alpha_Primary Visual Cor