In [188]:
import scipy.signal
import numpy as np
from pymatreader import read_mat
import os


In [189]:
# Create a list with all the dataset files
dataset_files = []
dataset_folder = 'dataset' # dataset folder to load data
for file in os.listdir(dataset_folder):
    dataset_files.append(os.path.join(dataset_folder, file)) # append relative path

In [190]:
def preprocess(file):
    mat = read_mat(file)
    bci_exp = mat['bciexp'] # reference to the bci exp data which are the only relevant data
    labels = bci_exp['label'] # all channels
    print(bci_exp.items())

    sampling_rate = bci_exp['srate']
    downsampling_fact = 5 # downsampling factor, 250 / 5 = 50 Hz
    bandpass = np.array([1, 12.5]) # cutoff frequencies for the bandpass filter
    interval_len = .75 # seconds

    # calculate bandpass filter coefficients
    butter_a, butter_b = scipy.signal.butter(N=2, Wn=bandpass / (sampling_rate / 2), btype='bandpass')

    channels_of_interest = ['O9', 'CP1', 'CP2', 'O10', 'P7', 'P3', 'Pz',
            'P4', 'P8', 'PO7', 'PO3', 'Oz', 'PO4', 'PO8']

    # get the index of each channel in labels array
    channel_indexes = np.array([labels.index(ch) for ch in channels_of_interest])
    lmast_channel_idx = np.char.equal('LMAST', labels)

    # number of samples per analysis window
    num_samples_per_window = int(interval_len * sampling_rate / downsampling_fact) - 1

    stimuli = np.array(bci_exp['stim'], dtype=np.double)
    num_stimuli = np.sum(np.diff( np.sum(stimuli[:, :, 0], axis=0)) > 0)

    eeg_data = bci_exp['data']
    num_trials = eeg_data.shape[2]
    num_channels = len(channel_indexes)
    data = np.zeros(shape=(num_channels, num_samples_per_window, num_stimuli, num_trials))
    model = np.zeros(shape=(num_samples_per_window, num_samples_per_window, num_stimuli, num_trials))

    for i in range(1):
        right_data = eeg_data[channel_indexes, :, i] - (eeg_data[lmast_channel_idx, :, i] / 2)
        right_data_t = right_data.T
        x = scipy.signal.filtfilt(butter_a, butter_b, right_data_t)
        x = scipy.signal.lfilter()


In [191]:
preprocess(dataset_files[0])

dict_items([('srate', 250), ('data', array([[[ 2.450e+01, -9.320e+01,  4.130e+01, ..., -2.240e+01,
         -1.010e+01,  1.350e+01],
        [ 1.940e+01, -9.840e+01,  4.740e+01, ..., -3.140e+01,
         -2.190e+01,  1.080e+01],
        [ 2.010e+01, -1.048e+02,  4.860e+01, ..., -3.390e+01,
         -2.100e+01,  2.300e+00],
        ...,
        [-1.292e+02,  1.670e+01,  1.340e+01, ..., -1.710e+01,
          8.100e+00,  1.963e+02],
        [-1.339e+02,  1.200e+01,  8.600e+00, ..., -1.870e+01,
          3.200e+00,  1.774e+02],
        [-1.253e+02,  1.460e+01,  1.100e+01, ..., -4.100e+00,
         -1.700e+00,  1.665e+02]],

       [[ 3.220e+01, -9.070e+01,  5.030e+01, ..., -3.260e+01,
         -2.700e+01,  1.700e+00],
        [ 3.050e+01, -9.820e+01,  5.880e+01, ..., -2.970e+01,
         -1.280e+01, -3.000e-01],
        [ 2.850e+01, -1.077e+02,  5.600e+01, ..., -3.380e+01,
         -1.620e+01, -5.100e+00],
        ...,
        [-1.342e+02,  2.630e+01, -7.000e-01, ..., -7.500e+00,
         

ValueError: The length of the input vector x must be greater than padlen, which is 15.