# DS 6011: Read in data

- Andrew J. Graves, Cory Clayton, Joon Soh, Gabe Yohe
- 10/17/20

Here we have developed a dataset class that specifies parameters unique to each data set

In [1]:
# Dataset class that specifies file structure for different datasets
class Dataset:
    
    # Constructor assigns name, location, and size
    def __init__(self, name):
        self.name = name
        
        # Data dictionary for all datasets: directory, subjects, tasks
        data_dict = {'motormovement_imagine': 
                         {'base_dir': 'https://github.com/VIXSoh/SRM/raw/master/data/',
                          'n_subj': 109,
                          'n_task': 14
                         }
                     # Add datasets here
                    }
        # Assign attributes to dataset instance
        self.base_dir = data_dict[self.name]['base_dir']
        self.n_subj = data_dict[self.name]['n_subj']
        self.n_task = data_dict[self.name]['n_task']
    
    # Generates appropriate file paths
    def get_file_format(self, subj, task):
        
        # Checks if name follows this file structure
        if self.name in ['motormovement_imagine']:
            
            # Get all file paths
            subj_num = f"S{str(subj).rjust(3, '0')}"
            task_num = f"R{str(task).rjust(2, '0')}"
            self.file_path = f'{self.base_dir}{subj_num}/{subj_num}{task_num}.edf'
            
# Write congig for different datasets. Class ingests config

The following are helper functions for reading the data from various locations

In [2]:
# Get data from internet using wget
def wget_raw_edf(f):
    
    from wget import download
    from os import remove
    from mne.io import read_raw_edf
    
    wg = download(f) # download the data locally (temporarily)
    raw = read_raw_edf(wg, preload = True) # read data as an MNE object
    remove(wg) # delete the file locally
    return raw

# Specifies which files to iterate through
def gen_iter(param, n_params):
    
    if param != 'all': # for subset of the data
        param_iter = param
    else: # for all of the data
        param_iter = range(1, n_params+1)
    return param_iter

# Reads multiple EEG files with a single call
def read_data(dataset, subjects='all', tasks='all'):
    
    # Instantiate the data set (raw locations, number of subjects / tasks)
    data = Dataset(dataset)
    
    missing = [] # initialize missing file list
    subj_iter = gen_iter(subjects, data.n_subj) # get subject iterator
    task_iter = gen_iter(tasks, data.n_task) # get task iterator

    # Iterate through subjects
    for subj in subj_iter:
        
        # Iterate through tasks
        for task in task_iter:
            # Specify the file format
            data.get_file_format(subj, task)
            
            try: # Handles missing files
                return wget_raw_edf(data.file_path) # change back later
                # Send to preproc function next
            except:
                print(f'---\nThis file does not exist: {data.file_path}\n---')
                # Need to write the missing file list out
                missing.append(data.file_path)

Read in data

In [18]:
dat1= read_data('motormovement_imagine', subjects=[1], tasks=[4])

Extracting EDF parameters from C:\Users\joony\CAPSTONE\S001R04 (1).edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...


In [19]:
dic_var = {
"Fp1":[-29.4367, 83.9171, -6.9900],
"Fpz":[0.1123, 88.2470, -1.7130],
"Fp2":[29.8723, 84.8959, -7.0800],
"AF7":[-54.8397, 68.5722, -10.5900],
"AF3":[-33.7007, 76.8371, 21.2270],
"AFz":[0.2313, 80.7710, 35.4170],
"AF4":[35.7123, 77.7259, 21.9560],
"AF8":[55.7433, 69.6568, -10.7550],
"F7":[-70.2629, 42.4743, -11.4200],
"F5":[-64.4658, 48.0353, 16.9210],
"F3":[-50.2438, 53.1112, 42.1920],
"F1":[-27.4958, 56.9311, 60.3420],
"Fz":[0.3122, 58.5120, 66.4620],
"F2":[29.5142, 57.6019, 59.5400],
"F4":[51.8362, 54.3048, 40.8140],
"F6":[67.9142, 49.8297, 16.3670],
"F8":[73.0431, 44.4217, -12.0000],
"FT7":[-80.7750, 14.1203, -11.1350],
"FC5":[-77.2149, 18.6433, 24.4600],
"FC3":[-60.1819, 22.7162, 55.5440],
"FC1":[-34.0619, 26.0111, 79.9870],
"FCz":[0.3761, 27.3900, 88.6680],
"FC2":[34.7841, 26.4379, 78.8080],
"FC4":[62.2931, 23.7228, 55.6300],
"FC6":[79.5341, 19.9357, 24.4380],
"FT8":[81.8151, 15.4167, -11.3300],
"T9":[-85.8941, -15.8287, -48.283],
"T7":[-84.1611, -16.0187, -9.346],
"C5":[-80.2801, -13.7597, 29.1600],
"C3":[-65.3581, -11.6317, 64.3580],
"C1":[-36.1580, -9.9839, 89.7520],
"Cz":[0.4009, -9.1670, 100.2440],
"C2":[37.6720, -9.6241, 88.4120],
"C4":[67.1179, -10.9003, 63.5800],
"C6":[83.4559, -12.7763, 29.2080],
"T8":[85.0799, -15.0203, -9.4900],
"T10":[85.5599, -16.3613, -48.2710],
"TP7":[-84.8302, -46.0217, -7.056],
"CP5":[-79.5922, -46.5507, 30.9490],
"CP3":[-63.5562, -47.0088, 65.6240],
"CP1":[-35.5131, -47.2919, 91.3150],
"CPz":[0.3858, -47.3180, 99.4320],
"CP2":[38.3838, -47.0731, 90.6950],
"CP4":[66.6118, -46.6372, 65.5800],
"CP6":[83.3218, -46.1013, 31.2060],
"TP8":[85.5488, -45.5453, -7.1300],
"P7":[-72.4343, -73.4527, -2.487],
"P5":[-67.2723, -76.2907, 28.3820],
"P3":[-53.0073, -78.7878, 55.9400],
"P1":[-28.6203, -80.5249, 75.4360],
"Pz":[0.3247, -81.1150, 82.6150],
"P2":[31.9197, -80.4871, 76.7160],
"P4":[55.6667, -78.5602, 56.5610],
"P6":[67.8877, -75.9043, 28.0910],
"P8":[73.0557, -73.0683, -2.5400],
"PO7":[-54.8404, -97.5279, 2.7920],
"PO3":[-36.5114, -100.8529, 37.1670],
"POz":[0.2156, -102.1780, 50.6080],
"PO4":[36.7816, -100.8491, 36.3970],
"PO8":[55.6666, -97.6251, 2.7300],
"O1":[-29.4134, -112.4490, 8.8390],
"Oz":[0.1076, -114.8920, 14.6570],
"O2":[29.8426, -112.1560, 8.8000],
"Iz":[0.0045, -118.5650, -23.0780],
}


In [20]:
dic_var

{'Fp1': [-29.4367, 83.9171, -6.99],
 'Fpz': [0.1123, 88.247, -1.713],
 'Fp2': [29.8723, 84.8959, -7.08],
 'AF7': [-54.8397, 68.5722, -10.59],
 'AF3': [-33.7007, 76.8371, 21.227],
 'AFz': [0.2313, 80.771, 35.417],
 'AF4': [35.7123, 77.7259, 21.956],
 'AF8': [55.7433, 69.6568, -10.755],
 'F7': [-70.2629, 42.4743, -11.42],
 'F5': [-64.4658, 48.0353, 16.921],
 'F3': [-50.2438, 53.1112, 42.192],
 'F1': [-27.4958, 56.9311, 60.342],
 'Fz': [0.3122, 58.512, 66.462],
 'F2': [29.5142, 57.6019, 59.54],
 'F4': [51.8362, 54.3048, 40.814],
 'F6': [67.9142, 49.8297, 16.367],
 'F8': [73.0431, 44.4217, -12.0],
 'FT7': [-80.775, 14.1203, -11.135],
 'FC5': [-77.2149, 18.6433, 24.46],
 'FC3': [-60.1819, 22.7162, 55.544],
 'FC1': [-34.0619, 26.0111, 79.987],
 'FCz': [0.3761, 27.39, 88.668],
 'FC2': [34.7841, 26.4379, 78.808],
 'FC4': [62.2931, 23.7228, 55.63],
 'FC6': [79.5341, 19.9357, 24.438],
 'FT8': [81.8151, 15.4167, -11.33],
 'T9': [-85.8941, -15.8287, -48.283],
 'T7': [-84.1611, -16.0187, -9.346],
 

In [21]:
mont = mne.channels.make_dig_montage(dic_var)
mont

<DigMontage | 0 extras (headshape), 0 HPIs, 0 fiducials, 64 channels>

In [22]:
dat1.set_montage(montage=mont)

  dat1.set_montage(montage=mont)


ValueError: DigMontage is only a subset of info. There are 64 channel positions not present in the DigMontage. The required channels are:

['Fc5.', 'Fc3.', 'Fc1.', 'Fcz.', 'Fc2.', 'Fc4.', 'Fc6.', 'C5..', 'C3..', 'C1..', 'Cz..', 'C2..', 'C4..', 'C6..', 'Cp5.', 'Cp3.', 'Cp1.', 'Cpz.', 'Cp2.', 'Cp4.', 'Cp6.', 'Fp1.', 'Fpz.', 'Fp2.', 'Af7.', 'Af3.', 'Afz.', 'Af4.', 'Af8.', 'F7..', 'F5..', 'F3..', 'F1..', 'Fz..', 'F2..', 'F4..', 'F6..', 'F8..', 'Ft7.', 'Ft8.', 'T7..', 'T8..', 'T9..', 'T10.', 'Tp7.', 'Tp8.', 'P7..', 'P5..', 'P3..', 'P1..', 'Pz..', 'P2..', 'P4..', 'P6..', 'P8..', 'Po7.', 'Po3.', 'Poz.', 'Po4.', 'Po8.', 'O1..', 'Oz..', 'O2..', 'Iz..'].

Consider using inst.set_channel_types if these are not EEG channels, or use the on_missing parameter if the channel positions are allowed to be unknown in your analyses.

## Manipulates an example subject (dated)

In [4]:
%matplotlib inline

# Import modules
import numpy as np
import mne

# Example file from example subject
raw = read_data('motormovement_imagine', subjects=[1], tasks=[4])

# Print the information
raw.info['nchan']

Extracting EDF parameters from C:\Users\joony\CAPSTONE\S001R04 (1).edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 19999  =      0.000 ...   124.994 secs...


64

In [8]:
raw['nchan']

ValueError: picks ('nchan') could not be interpreted as channel names (no channel "nchan"), channel types (no type "nchan"), or a generic type (just "all" or "data")

In [7]:
# Create the montage
mont = mne.channels.make_standard_montage('standard_1020')
mont.ch_names = [i.upper() for i in mont.ch_names]
n_chan = len(mont.ch_names)

In [7]:
#raw.set_channel_types({'EXG1': 'misc', 'EXG2': 'misc',
#                       'EXG3': 'eog', 'EXG4': 'eog', 
#                       'EXG5': 'misc', 'EXG6': 'misc',
#                       'EXG7': 'misc', 'EXG8': 'misc',
#                       'Status': 'stim'})

raw.info['ch_names'] =  [i.replace('.', '').upper() for i in raw.info['ch_names'] if i not in ['T9', 'T10']]

#raw.pick_types(eeg=True)

# Set the montage
raw.set_montage(montage=mont)

RuntimeError: info channel name inconsistency detected, please notify mne-python developers

In [6]:
len(raw.info['ch_names'])

64

In [7]:
# Print the EEG data shape
shape = raw.get_data().shape
print(shape)

(64, 20000)


In [8]:
# Sampling and Nyquist frequency
samp_freq = raw.info['sfreq']
nyq = samp_freq / 2
print(nyq)

80.0


In [9]:
# Task time
shape[1] / (samp_freq)

125.0

In [10]:
# T0 is baseline, T1 is left fist / both, T2 if right fist / both
# Create an event dictionary
event_dict = {'baseline': 1, 'left/both': 2, 'right/both': 3}

ev = mne.events_from_annotations(raw)
print(ev[1])
np.delete(ev[0], 1, axis=1)

Used Annotations descriptions: ['T0', 'T1', 'T2']
{'T0': 1, 'T1': 2, 'T2': 3}


array([[    0,     1],
       [  672,     3],
       [ 1328,     1],
       [ 2000,     2],
       [ 2656,     1],
       [ 3328,     2],
       [ 3984,     1],
       [ 4656,     3],
       [ 5312,     1],
       [ 5984,     3],
       [ 6640,     1],
       [ 7312,     2],
       [ 7968,     1],
       [ 8640,     3],
       [ 9296,     1],
       [ 9968,     2],
       [10624,     1],
       [11296,     3],
       [11952,     1],
       [12624,     2],
       [13280,     1],
       [13952,     2],
       [14608,     1],
       [15280,     3],
       [15936,     1],
       [16608,     2],
       [17264,     1],
       [17936,     3],
       [18592,     1],
       [19264,     2]])

In [11]:
unfilt = raw.plot()

RuntimeError: info channel name inconsistency detected, please notify mne-python developers

In [14]:
filt = raw.plot(highpass=.5, lowpass=nyq-1)

Setting up band-pass filter from 0.5 - 79 Hz

IIR filter parameters
---------------------
Butterworth bandpass zero-phase (two-pass forward and reverse) non-causal filter:
- Filter order 16 (effective, after forward-backward)
- Cutoffs at 0.50, 79.00 Hz: -6.02, -6.02 dB



RuntimeError: info channel name inconsistency detected, please notify mne-python developers

In [13]:
nyq-1

79.0