In [1]:
import scipy.io as sio
import glob
import numpy
import time

In [2]:
def load_patient_train_data(paths):
# Load training data for patient


    X = []
    Y = []

    print('...loading train data')
    start = time.time()

    for path in sorted(glob.glob(paths), key=numericalSort):
        X.append(sio.loadmat(path))
        Y.append(int(path[-5]))
    
    Y = numpy.array(Y)
    print('time elapsed: %s sec' %(time.time() - start))
    
    
    return(X, Y)

In [3]:
# The numericalSort function splits out any digits in a filename, 
# turns it into an actual number, and returns the result for sorting

import re
numbers = re.compile(r'(\d+)')
def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

In [4]:
# Find samples of training set X that contain no data, or consist entirely of zeros
def find_zero_index(X):
    zero_index = []
    print('...locating zero-data')
    
    for i in xrange(len(X)):
        if numpy.sum(numpy.absolute(X[i]['dataStruct']['data'][0][0])) == 0:
            zero_index.append(i)
    
    return zero_index

In [5]:
# remove all-zero data
# X must be list, Y can be numpy array which is cast to a list, and cast back to numpy array upon return
def remove_zero_data(X, Y):
    
    zero_index = find_zero_index(X)
    index_correction = 0
    # index_correction is needed because every time element is deleted from a list the following elements are shifted
    # EX: if 2nd element is deleted, the 3rd element becomes the 2nd, the 4th becomes the 3rd, etc.
    Y = list(Y)
    print('...removing zero-data')
    for i in xrange(len(zero_index)):

        del X[zero_index[i] - index_correction]
        del Y[zero_index[i] - index_correction]

        index_correction += 1
   
    Y = numpy.array(Y)
    return(X, Y)

In [6]:
# load training data
X_train1, Y_train1 = load_patient_train_data('F:/Kaggle/Seizure Prediction/train_1/*.mat')

...loading train data
time elapsed: 107.023999929 sec


In [None]:
X_train_clean, Y_train1_clean = remove_zero_data(X_train1, Y_train1)

...locating zero-data
