In [None]:
!pip install mne



In [None]:
import mne
import numpy as np
import os
import h5py

In [None]:
os.chdir('/content/drive/MyDrive/ECE-GY_9123_DL')
# !wget -r -N -c -np https://physionet.org/files/eegmmidb/1.0.0/

# Assistant self-defined functions

In [None]:
def loadEEG(subject):

  """
  
  """
  subject_str = 'S{:03d}'.format(subject)
  dataPath = '/content/drive/MyDrive/ECE-GY_9123_DL/project/data'
  print('\n Subject {} loading ... \n'.format(subject))

  # load files from subject folder
  dataFolder_s = os.path.join(dataPath, subject_str)
  fileNames = sorted([f for f in os.listdir(dataFolder_s) if f.endswith('edf')],
                    key=lambda x: x[-2:])

  sr = 160
  start_ind = list(range(0, 168, 14))       # start index of each file
  raw_eeg_s = np.empty((168, 640, 64))      # allocate memories for raw eeg
  tasks_s = np.empty((168,))
  reps_s = np.empty((168,))
  trails_s = np.empty((168,))
  trailReps_s = np.empty((168,))
  for i_f, f in enumerate(fileNames):
    cnt_t1 = 0
    cnt_t2 = 0
    trial_ind = -1       # valid trial counts
    if f not in ['{}R01.edf'.format(subject_str), '{}R02.edf'.format(subject_str)]:
      print(f)
      data = mne.io.read_raw_edf(os.path.join(dataPath, dataFolder_s, f))
      raw_eeg = data.get_data().T
      trials = np.array(data.annotations)

      # define task labels
      if i_f % 4 == 2:
        task = 1
      elif i_f % 4 == 3:
        task = 2
      elif i_f % 4 == 0:
        task = 3
      else:
        task = 4

      # define repetition labels
      if i_f <= 5:
        rep = 1
      elif i_f <= 9:
        rep = 2
      else:
        rep = 3

      trail_reps = []
      for i_trial, trial in enumerate(trials):
        items = list(trial.items())
        onset_ind = int(items[0][1] * sr)
        duration = items[1][1]
        description = items[2][1]
        if description == 'T1':
          cnt_t1 += 1
          trial_lbl = 1
          trial_ind += 1
          if cnt_t1 <= 7:
            trail_reps.append(cnt_t1)
        elif description == 'T2':
          cnt_t2 += 1
          trial_lbl = 2
          trial_ind += 1
          if cnt_t2 <= 7:
            trail_reps.append(cnt_t2)

        # check if T1 or T2 exceeds 7 trials
        if cnt_t1 <= 7 and cnt_t2 <= 7 and description != 'T0':
          # load first 4s data of each trial
          raw_eeg_trial = raw_eeg[onset_ind:onset_ind+640, :]
          raw_eeg_s[trial_ind+start_ind[i_f-2]] = raw_eeg_trial
          tasks_s[trial_ind+start_ind[i_f-2]] = task
          reps_s[trial_ind+start_ind[i_f-2]] = rep
          trails_s[trial_ind+start_ind[i_f-2]] = trial_lbl
      trailReps_s[(i_f-2)*14:(i_f-1)*14] = trail_reps

  return raw_eeg_s, tasks_s, reps_s, trails_s, trailReps_s

# Load and concatenate data

In [None]:
raw_eeg = np.empty((17808, 640, 64))
tasks = np.empty((17808,))
reps = np.empty((17808,))
trials = np.empty((17808,))
trial_reps = np.empty((17808,))
subs = np.empty((17808,))
start_ind = list(range(0, 17809, 168))        # start index of each subject
n_subject = 0

subjects = list(range(1, 110))
for subject in subjects:
  
  # skip problematic subjects
  if subject in [100, 104, 106]:
    continue
  
  raw_eeg_s, tasks_s, reps_s, trails_s, trailReps_s = loadEEG(subject)
  raw_eeg[start_ind[n_subject]:start_ind[n_subject]+168] = raw_eeg_s
  tasks[start_ind[n_subject]:start_ind[n_subject]+168] = tasks_s
  reps[start_ind[n_subject]:start_ind[n_subject]+168] = reps_s
  trials[start_ind[n_subject]:start_ind[n_subject]+168] = trails_s
  subs[start_ind[n_subject]:start_ind[n_subject]+168] = np.ones(168) * subject
  trial_reps[start_ind[n_subject]:start_ind[n_subject]+168] = trailReps_s
  n_subject += 1

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
EDF file detected
Setting channel info structure...
Creating raw.info structure...
S027R11.edf
Extracting EDF parameters from /content/drive/MyDrive/ECE-GY_9123_DL/project/data/S027/S027R11.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
S027R12.edf
Extracting EDF parameters from /content/drive/MyDrive/ECE-GY_9123_DL/project/data/S027/S027R12.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
S027R13.edf
Extracting EDF parameters from /content/drive/MyDrive/ECE-GY_9123_DL/project/data/S027/S027R13.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
S027R14.edf
Extracting EDF parameters from /content/drive/MyDrive/ECE-GY_9123_DL/project/data/S027/S027R14.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...

 Subject 28 loading ... 

S028R03.edf
Extracting EDF parameters

In [None]:
f_data = h5py.File("./project/raw_EEG.h5", "w")
f_data.create_dataset("data", data=raw_eeg)
f_data.create_dataset("tasks", data=tasks)
f_data.create_dataset("reps", data=reps)
f_data.create_dataset("trials", data=trials)
f_data.create_dataset("trial_reps", data=trial_reps)
f_data.create_dataset("subjects", data=subs)
f_data.close()