## reference link (PYEEG)
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3070217/

In [1]:
import scipy.io as sio
import glob
import numpy
import time

In [2]:
def load_patient_train_data(paths):
# Load training data for patient


    X = []
    Y = []

    print('...loading train data')
    start = time.time()

    for path in sorted(glob.glob(paths), key=numericalSort):
        X.append(sio.loadmat(path))
        Y.append(int(path[-5]))
    
    Y = numpy.array(Y)
    print('time elapsed: %s sec' %(time.time() - start))
    
    
    return(X, Y)

In [3]:
def load_patient_test_data(paths):
# Load training data for patient


    X = []
    file_array = []

    print('...loading test data')
    start = time.time()

    for path in sorted(glob.glob(paths), key=numericalSort):
        X.append(sio.loadmat(path))
        file_array.append(os.path.split(path)[1])
    
    print('time elapsed: %s sec' %(time.time() - start))
    
    
    return(X, file_array)

In [4]:
def get_channel1_data(X):
# extract data from channel 1    
    
    
    data_channel1 = numpy.zeros([240000, len(X)])

    for i in xrange(len(X)):
        data_channel1[:,i] = X[i]['dataStruct']['data'][0][0][:,0]
    
    
    return(data_channel1)

In [5]:
# The numericalSort function splits out any digits in a filename, 
# turns it into an actual number, and returns the result for sorting

import re
numbers = re.compile(r'(\d+)')
def numericalSort(value):
    parts = numbers.split(value)
    parts[1::2] = map(int, parts[1::2])
    return parts

In [6]:
def n_sample_fft(X, samples, index):
    
    num_freq= 240000
    channels = 16
    X_fft = numpy.zeros([samples, num_freq, channels])

    for i in xrange(samples):
            X_fft[i,:,:] = numpy.absolute(numpy.fft.fft(X[i+index]['dataStruct']['data'][0][0], axis=0))
            
    return X_fft

In [7]:
def n_sample_half_power_freq(X_fft, samples):

    channels = 16
    power_percentage = 0.5
    half_pwr_freq = numpy.zeros([samples, channels])

    for i in xrange(samples):

        #X_sample_pwr = numpy.linalg.norm(X_fft[i,:,j:j+1], axis=1)*numpy.linalg.norm(X_fft[i,:,j:j+1], axis=1)/float(X_fft.shape[1])
        X_sample_pwr = X_fft[i,:,:]*X_fft[i,:,:]/float(X_fft.shape[1])
        X_sample_pwr_sum = numpy.sum(X_sample_pwr[0:N/2], axis=0)

        for j in xrange(channels):
            S = 0
            for k in xrange(X_sample_pwr.shape[0]):
                if S >= X_sample_pwr_sum[j]*power_percentage:
                    half_pwr_freq[i,j] = xf[k]
                    break 
                S = S + X_sample_pwr[k,j]
                
    return half_pwr_freq

In [8]:
# load training data
X_train1, Y_train1 = load_patient_train_data('F:/Kaggle/Seizure Prediction/train_1/*.mat')

...loading train data
time elapsed: 106.374000072 sec


In [9]:
# frequency bands of interest
# delta, theta, alpha, beta, gamma respectively 
frequency_bins = numpy.array([0.5, 4, 7, 12, 30, 100])

In [11]:
# compute half power frequency for each sample (1-3000) and each channel (1-16)
bins = 10
X_train1_len = 1300
channels = 16

N = 240000
# sample spacing
T = 1.0 / 400.0
xf = numpy.linspace(0.0, 1.0/(2.0*T), N/2)

# create list (1 to 1300)
X_train1_index = numpy.linspace(0, X_train1_len-1, X_train1_len-1)
# Separate created list into 10 bins. The size of these bins is used to perform batch computation of FFT
# and half-power frequency calulation
# Calculation is far from optimized
data_bins = numpy.histogram(X_train1_index, bins=bins)
index = 0
PSI = numpy.zeros([X_train1_len, len(frequency_bins)-1, channels])
RIR = numpy.zeros([X_train1_len, len(frequency_bins)-1, channels])


for bin in xrange(bins):
    print('batch number: %s' %(bin))
    fft_batch = n_sample_fft(X_train1, data_bins[0][bin], index)
    
    for i in xrange(len(frequency_bins)-1):
        bin_start = numpy.where((frequency_bins[i+1] >= xf) & (xf >= frequency_bins[i]))[0][0]
        bin_end = numpy.where((frequency_bins[i+1] >= xf) & (xf >= frequency_bins[i]))[0][-1]
        PSI[index:index + data_bins[0][bin], i, :] = numpy.real(numpy.sum(numpy.absolute(fft_batch[:, bin_start: bin_end, :]), 
                                                                       axis=1))
    
    
    PSI_batch_sum = numpy.sum(PSI[index:index + data_bins[0][bin], :, :], axis=1)
    PSI_batch_sum_array = numpy.zeros([data_bins[0][bin], len(frequency_bins) -1, channels])
    
    for i in xrange(len(frequency_bins) -1):
        PSI_batch_sum_array[:, i, :] = PSI_batch_sum 
    
    RIR[index:index + data_bins[0][bin], :, :] = PSI[index:index + data_bins[0][bin], :, :]/PSI_batch_sum_array
    
    
    index = index + data_bins[0][i]

batch number: 0




batch number: 1
batch number: 2
batch number: 3
batch number: 4
batch number: 5
batch number: 6
batch number: 7
batch number: 8
batch number: 9
