## reference link (PYEEG)
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3070217/

In [1]:
import scipy.io as sio
import glob
import numpy
import time
import os

In [2]:
def load_patient_data(paths):


    # Load training data for patient
    if "train" in paths:
        X = []
        Y = []
        
        print('...loading train data')
        start = time.time()

        for path in sorted(glob.glob(paths), key=numericalSort):
            X.append(sio.loadmat(path))
            Y.append(int(path[-5]))

        Y = numpy.array(Y)
        print('time elapsed: %s sec' %(time.time() - start))

        return(X, Y)
    
    # Load test data for patient
    else:
        

        X = []
        file_array = []

        print('...loading test data')
        start = time.time()

        for path in sorted(glob.glob(paths), key=numericalSort):
            X.append(sio.loadmat(path))
            file_array.append(os.path.split(path)[1])

        print('time elapsed: %s sec' %(time.time() - start))

        return(X, file_array)

In [3]:
# The numericalSort function splits out any digits in a filename, 
# turns it into an actual number, and returns the result for sorting

import re
numbers = re.compile(r'(\d+)')
def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

In [4]:
# Find samples of training set X that contain no data, or consist entirely of zeros
def find_zero_index(X):
    zero_index = []
    print('...locating zero-data')
    
    for i in xrange(len(X)):
        if numpy.sum(numpy.absolute(X[i]['dataStruct']['data'][0][0])) == 0:
            zero_index.append(i)
    
    print('length of zero-data: ' + str(len(zero_index)))
    return zero_index

In [5]:
# remove all-zero data
# X must be list, Y can be numpy array which is cast to a list, and cast back to numpy array upon return
def remove_zero_data(X, Y):
    
    zero_index = find_zero_index(X)
    index_correction = 0
    # index_correction is needed because every time element is deleted from a list the following elements are shifted
    # EX: if 2nd element is deleted, the 3rd element becomes the 2nd, the 4th becomes the 3rd, etc.
    Y = list(Y)
    print('...removing zero-data')
    
    for i in xrange(len(zero_index)):

        del X[zero_index[i] - index_correction]
        del Y[zero_index[i] - index_correction]

        index_correction += 1
   
    Y = numpy.array(Y)
    return(X, Y)

In [6]:
def n_sample_gradient(X):
    
    
    num_freq= X.shape[1]
    channels = X.shape[2]
    samples = X.shape[0]
    
    grad_X = numpy.zeros([samples, num_freq, channels])
        
    for sample in xrange(X.shape[0]):
         for channel in xrange(X.shape[2]):
            grad_X[sample, :, channel] = numpy.gradient(X[sample,:,channel])
            
    return grad_X

In [7]:
def hjorth_mobility(X):
    
    grad_X = n_sample_gradient(X)
    var_X = numpy.zeros([X.shape[0], 
                         X.shape[2] ])
    grad_var_X = numpy.zeros([X.shape[0], 
                              X.shape[2]])

    
    for sample in xrange(X.shape[0]):
        var_X[sample, :] = numpy.var(X[sample,:,:], axis=0)
        grad_var_X[sample, :] = numpy.var(grad_X[sample, :, :], axis=0)

    
    mobility = numpy.sqrt(grad_var_X/var_X)
    
    
    return mobility

In [8]:
def hjorth_complexity(X):
    
    grad_X = n_sample_gradient(X)
            
    mobility = hjorth_mobility(X)
    mobility_dxdt = hjorth_mobility(grad_X)
    
    complexity = mobility_dxdt/mobility
    
    return complexity

In [9]:
def create_X_batch(X, samples, index):
    X_batch = numpy.zeros([samples, 
                          X[0]['dataStruct']['data'][0][0].shape[0],
                          X[0]['dataStruct']['data'][0][0].shape[1]])
    
    for sample in xrange(samples):
        X_batch[sample,:,:] = X[sample]['dataStruct']['data'][0][0]
        
    return X_batch

In [11]:
def hjorth_activity(X):

    activity = numpy.zeros([len(X), X[0]['dataStruct']['data'][0][0].shape[1] ])

    for sample in xrange(len(X)):
        activity[sample, :] = numpy.var(X[sample]['dataStruct']['data'][0][0], axis=0)
    
    return activity

In [19]:
#save feature
# feature must be numpy array

def save_feature(save_path, data_name, feature_name, feature):
        full_save_path = save_path %(data_name, feature_name)
        numpy.save(full_save_path, feature)
        
        print('saved file: ' + str(data_name) + '' + str(feature_name))
        

In [13]:
path_array = ['F:/Kaggle/Seizure Prediction/train_1/*.mat',
             'F:/Kaggle/Seizure Prediction/train_2/*.mat',
             'F:/Kaggle/Seizure Prediction/train_3/*.mat',
             'F:/Kaggle/Seizure Prediction/test_1/*.mat',
             'F:/Kaggle/Seizure Prediction/test_2/*.mat',
             'F:/Kaggle/Seizure Prediction/test_3/*.mat']

feature_array = ['activity', 'mobility', 'complexity']

In [None]:
for path in (path_array):
    
    
    X, Y = load_patient_data(path)
    
    # remove zero-data from training set
    if "train" in path:
        X, Y = remove_zero_data(X, Y)
    
    
    #compute Hjorth Activty
    activity = hjorth_activity(X)
    
    
    #compute Hjorth Parameters in batches

    bins = 10
    X_len = len(X)
    channels = 16


    # create list (1 to 1300)
    X_index = numpy.linspace(0, X_len-1, X_len-1)
    # Separate created list into 10 bins. The size of these bins is used to perform batch computation of FFT
    # and half-power frequency calulation
    # Calculation is far from optimized
    data_bins = numpy.histogram(X_index, bins=bins)
    index = 0

    mobility = numpy.zeros([len(X), 
                                X[0]['dataStruct']['data'][0][0].shape[1] ])

    complexity = numpy.zeros([len(X), 
                                X[0]['dataStruct']['data'][0][0].shape[1] ])

    
    for bin in xrange(bins):
        start = time.time()
        print('batch number: %s' %(bin))

        X_batch = create_X_batch(X, data_bins[0][bin], index)

        mobility_batch = hjorth_mobility(X_batch)
        mobility[index:index + data_bins[0][bin], :] = mobility_batch

        complexity_batch = hjorth_complexity(X_batch)
        complexity[index:index + data_bins[0][bin], :] = mobility_batch

        index = index + data_bins[0][bin]

        print('time elapsed: %s sec' %(time.time() - start))


    # save features
    data_name = os.path.split(path)[0]
    data_name = os.path.split(data_name)[1]
    for feature_name in feature_array:
        save_feature(save_path = 'F:/Kaggle/Seizure Prediction/features/FEATURE_%s_%s',
                    data_name = data_name,
                    feature_name = feature_name,
                    feature = eval(feature_name))
    
    #delete data set to clear room for next data set
    del X[:]
    
        
    print('')
    print('done')