This script (Version B) is designed to pre-process a folder of `.mat` v7.3 files that contains one channel each. This is optimal for very large recordings that would otherwise max out RAM if processed as a single file. Please note that comments have largely been removed from this version; please see Version A for full commentary.

### Configuration

In [1]:
import os
import gc
import numpy as np
import pandas as pd

import mne

from tqdm import tqdm
import datetime
import hdf5storage
import collections as cl

import utils__config

In [2]:
os.chdir(utils__config.working_directory)
os.getcwd()

'g:\\My Drive\\Residency\\Research\\Lab - Damisah\\Project - Sleep'

### Parameters

In [3]:
dir_path = 'Cache/Subject02/Apr26/Macro_Apr26'
dictionary_path = 'Data/Subject02/S02_dictionary.xlsx'
legui_path = 'Cache/Subject02/S02_electrodes.csv'
save_path = 'Cache/Subject02/Apr26/S02_Apr26_256hz.fif'

# dir_path = 'Cache/Subject02/Apr27/Macro_Apr27'
# dictionary_path = 'Data/Subject02/S02_dictionary.xlsx'
# legui_path = 'Cache/Subject02/S02_electrodes.csv'
# save_path = 'Cache/Subject02/Apr27/S02_Apr27_256hz.fif'

In [4]:
sampling_freq = 256

### Convert MAT to MNE Object

In [5]:
ch_dictionary = pd.read_excel(dictionary_path)

channel_map = {'macro' : 'seeg',
               'scalp' : 'eeg',
               'ecg' : 'ecg',
               'emg' : 'emg',
               'eog' : 'eog',
               'micro' : 'misc',
               'ttl' : 'stim',
               'vitals' : 'bio',
               'empty' : 'misc'}

In [6]:
for channel in tqdm(os.listdir(dir_path)):

    # Load Data
    file_path = os.path.join(dir_path, channel)
    data = hdf5storage.loadmat(file_path)

    # Convert V to uV
    time_series = data['time_series'] * 1e-6

    # Channel Name
    ch_num = int(channel.split('_')[1].split('.')[0].split('Channel')[1])
    ch_name = ch_dictionary.loc[ch_dictionary['number'] == ch_num, 'name'].tolist()

    # Channel Type
    ch_type = ch_dictionary.loc[ch_dictionary['number'] == ch_num, 'type'].tolist()[0]
    ch_type = channel_map[ch_type]

    # Sampling Frequency
    sfreq = data['meta_data']['sampling_rate'].astype(np.int64)[0][0][0]

    # Start Time
    raw_time = data['meta_data']['time_stamp'][0][0][0][0].astype(np.int64)
    time_start = datetime.datetime(raw_time[0], raw_time[1], raw_time[3], 
                                   raw_time[4], raw_time[5], raw_time[6], 
                                   raw_time[7], tzinfo = datetime.timezone.utc)

    # Create Raw object
    info = mne.create_info(ch_names = ch_name,
                           sfreq = sfreq,
                           ch_types = ch_type)

    single_raw = mne.io.RawArray(data = time_series,
                          info = info)

    single_raw.set_meas_date(time_start)

    # Crop start and stop times (optional)
    if ('tmin' in locals() or 'tmin' in globals()):
        tmin = (tmin - time_start).total_seconds()
        tmax = (tmax - time_start).total_seconds()
        single_raw.crop(tmin = tmin, tmax = tmax)

    # Decimate (automatic bandpass to Nyquist frequency)
    single_raw.resample(sfreq = sampling_freq)

    # Concatenate channels
    if ('raw' in locals() or 'raw' in globals()):
        raw.add_channels([single_raw])

    else:
        raw = single_raw.copy()

Creating RawArray with float64 data, n_channels=1, n_times=86400001
    Range : 0 ... 86400000 =      0.000 ... 43200.000 secs
Ready.
Creating RawArray with float64 data, n_channels=1, n_times=86400001
    Range : 0 ... 86400000 =      0.000 ... 43200.000 secs
Ready.
Creating RawArray with float64 data, n_channels=1, n_times=86400001
    Range : 0 ... 86400000 =      0.000 ... 43200.000 secs
Ready.
Creating RawArray with float64 data, n_channels=1, n_times=86400001
    Range : 0 ... 86400000 =      0.000 ... 43200.000 secs
Ready.
Creating RawArray with float64 data, n_channels=1, n_times=86400001
    Range : 0 ... 86400000 =      0.000 ... 43200.000 secs
Ready.


Filter

In [None]:
# # Bandpass filter 
# (Note that the .NS3 files already had
#  an 0.3 - 500 Hz filter applied at 
#  the hardware level)
#raw.filter(l_freq = None, h_freq = 60, n_jobs = -1)

# Notch filter to remove 60 Hz line noise
raw.notch_filter(np.arange(60, sampling_freq/2, 60))

Re-reference

In [None]:
# Re-reference macro electrodes to macro-CAR
macro_ref = ch_dictionary[ch_dictionary['type'] == 'macro']['name'].to_list()
raw = raw.set_eeg_reference(ref_channels = macro_ref, ch_type = 'seeg')

# Re-reference scalp electrodes to scalp-CAR
scalp_ref = ch_dictionary[ch_dictionary['type'] == 'scalp']['name'].to_list()
raw = raw.set_eeg_reference(ref_channels = scalp_ref, ch_type = 'eeg')

Subset electrodes

In [None]:
# LeGUI channel selection (but keep scalp, eog, and ecg)
print('Original channel count:', len(raw.info.ch_names))
legui_df = pd.read_csv(legui_path)
legui_df = legui_df.loc[(legui_df.status == 'accept') & (legui_df.type == 'macro')]
legui_channels = legui_df.elec_label.to_numpy()
other_channels = ch_dictionary.loc[ch_dictionary['type'].isin(['scalp', 'ecg', 'eog']), 'name']
keep_channels_2 = legui_channels.tolist() + other_channels.tolist()
raw = raw.pick_channels(keep_channels_2)
print('Channels after LeGUI selection:', len(raw.ch_names))

Export

In [None]:
raw.save(save_path, overwrite = True)