In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pysa.emd as emddev
import pysa.eemd as eemddev
import pysa.visualization as plotter
import pysa.utils as utils
import pysa.nhht as nhht
import copy
from multiprocessing import Pool
from timeit import default_timer as timer

from scipy import signal
import scipy
import os
from scipy import fft
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
%matplotlib inline
test = 1
plt.rcParams['figure.figsize'] = (12,8)



In [3]:
def IMF_filter(signal):
    max_modes = 15
    ensembles = 100
    ensembles_per_process = 10
    max_siftings = 200
    end_time = 10
    sample_freq = 250

    max_data = max(signal)
    min_data = min(signal)
    
    imfs = emddev.emd(signal, min_data, max_data, max_modes, max_siftings)
    
    imf1 = utils.reverse_normalization(imfs[0], min_data, max_data, len(signal))
    imf2 = utils.reverse_normalization(imfs[1], min_data, max_data, len(signal))
    residue = utils.reverse_normalization(imfs[-1], min_data, max_data, len(signal))
    return signal - imf1 - imf2 - residue

In [4]:
# Output imf x data
def get_IMF(signal, max_modes = 15):
    ensembles = 100
    ensembles_per_process = 10
    max_siftings = 200
    end_time = 10
    sample_freq = 250

    max_data = max(signal)
    min_data = min(signal)
    
    return emddev.emd(signal, min_data, max_data, max_modes, max_siftings)

In [5]:
def window_average(x, window = 5):
    avg_x = np.zeros(len(x) // window)
    for i in range(0, (len(x) // window)):
        w_step = i * window
        avg_x[i] = np.average(x[w_step:(w_step + 4)])
    return avg_x

In [6]:
def calculate_IMFs(data, num_imfs=4):
    # Calculate all IMFs for all sensors
    list_imfs = np.zeros((data.shape[0], data.shape[2], num_imfs, data.shape[1]))
    for m, measurement in enumerate(data):
        for s, sensor in enumerate(measurement.T):
            imfs = get_IMF(sensor)
            num_cols = 4
            if imfs.shape[0] < 4:
                num_cols = imfs.shape[0]
            list_imfs[m, s, :num_cols, :] = imfs[0:num_cols] 
    return list_imfs

In [7]:
def calculate_IMFs_alt(data, num_imfs=4):
    # Calculate all IMFs for all sensors
    list_imfs = np.zeros((data.shape[0], data.shape[2], num_imfs, data.shape[1]))
    for m, measurement in enumerate(data):
        for s, sensor in enumerate(measurement.T):
            imfs = get_IMF(sensor)
            imfs = imfs[2:-1,:]
            num_cols = 4
            if imfs.shape[0] < 4:
                num_cols = imfs.shape[0]
            list_imfs[m, s, :num_cols, :] = imfs[0:num_cols] 
    return list_imfs

In [8]:
# Takes in an array of form measurement x data x sensor
# Return array of form measurement x data x sensor
def filter_signals(data):
    f = np.zeros(data.shape)
    # Calculate all IMFs for all sensors
    for m, measurement in enumerate(data):
        for s, sensor in enumerate(measurement.T):
            f[m, :,s] = IMF_filter(sensor)
    return f

In [9]:
def ERP(amplitudes, divider=3, initial=1, cutoff=1, fs=250):
    
    a = np.square(amplitudes)
    # Calculate mean before event
    r_i = a[:, (initial * fs):(divider * fs)].mean(axis=1)
    a_j = np.zeros(a[:, (divider * fs):-(cutoff * fs)].shape)
    for r, row in enumerate(a[:, (divider * fs):-(cutoff * fs)]):
        a_j[r, :] = [100 * ((a_j_t - r_i[r]) / r_i[r]) for a_j_t in row]
    # Calculate mean after event
    #a_j = a[:, (divider * fs):-(cutoff * fs)].mean(axis=1)
    # Calculate event related potential for given amplitudes
    #erp =  np.mean(100 * ((a_j - r_i) / r_i))
    erp = a_j.mean(axis=1)
    
    return erp

In [10]:
def load_all_signals(folder):
    path = 'Data/' + folder
    files = []
    count = 0
    for i in os.listdir(path):
        files.append(i)
    data = []
    for file in files:
        df = pd.read_csv(os.path.join(path, file), index_col=0)
        data.append(df.as_matrix())
    return np.asarray(data)

In [11]:
def signal_to_erp(signal):
    err_idx = np.array([])
    #print("Calculating fd")
    #fd = filter_signals(signal)
    #fd = fd[:,100:-100,:]
    print("Calculating imfs")
    imfs = calculate_IMFs_alt(signal)
    imfs = imfs[:,:,:,100:-100]
    print("Calculating erps")
    erps = np.zeros((imfs.shape[0] - 3, imfs.shape[1], imfs.shape[2]))
    for m, measurement in enumerate(imfs[2:-1,:,:]):
        for s, sensor in enumerate(measurement):
                try:
                    _, a = nhht.nhht(sensor, 250)
                except:
                    err_idx = np.append(err_idx, m)
                erps[m,s,:] = ERP(a)
    return imfs, erps, err_idx

In [12]:
def prepare_erps(erps, err_idx):
    new_erps = []
    for i, erp in enumerate(erps):
        new_erps += [prepare_erp(erp, i, err_idx)]
    return new_erps

In [13]:
def prepare_erp(erp, i, err_idx):
    new_erp = copy.deepcopy(erp)
    new_erp = np.delete(new_erp, err_idx, 0)
    new_erp = new_erp.reshape((new_erp.shape[0], new_erp.shape[1] * new_erp.shape[2]))
    new_erp = np.hstack((new_erp, np.full((new_erp.shape[0], 1), i)))
    return new_erp

In [14]:
def classify_SVM(erps):
    train = np.vstack(tuple(erps))
    X, Y = train[:, 0:-1], train[:, -1].astype(int)
    clf = make_pipeline(preprocessing.StandardScaler(), svm.SVC(kernel='rbf'))
    scores = cross_val_score(clf, X, Y, cv=3, scoring='accuracy')
    print("Accuracy: ", scores.mean())

In [15]:
from pprint import pprint
def grid_search_SVM(erps):
    train = np.vstack(tuple(erps))
    np.random.shuffle(train)
    X_train, Y_train = train[:, 0:-1], train[:, -1].astype(int)
    
    tuned_parameters2 = [{'kernel': ['rbf'], 'gamma': [0.0001,0.001,0.01,1.0,2],
                         'C': [1,2,4,6,8,10]},
                        {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
    
    tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1, 0.1, 1e-2, 1e-3, 1e-4, 1e-5],
                         'C': [0.1, 1, 10, 100, 1000, 10000]},
                        {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
    scaler = StandardScaler()
    scaler.fit(X_train)

    grid = GridSearchCV(estimator=svm.SVC(C=1), param_grid=tuned_parameters, cv=10, scoring='accuracy', n_jobs=-1) 
    grid.fit(scaler.transform(X_train), Y_train)

    # View the accuracy score
    print('Best score for data1:', grid.best_score_)
    # View the best parameters for the model found using grid search
    print('Best C:',grid.best_estimator_.C) 
    print('Best Kernel:',grid.best_estimator_.kernel)
    print('Best Gamma:',grid.best_estimator_.gamma)
    pprint(grid.grid_scores_)

In [16]:
right = load_all_signals('j_r_o_5')
neutral = load_all_signals('j_n_o_5')

In [17]:
right[:,250:-250,:].shape

(70, 1000, 8)

In [18]:
elapse_time = timer()
p = Pool(2)
t = p.map(signal_to_erp, [neutral, right])
print("Time elapsed is ", timer() - elapse_time)

Calculating imfs
Calculating imfs
Calculating erps
Calculating erps
Time elapsed is  213.49888394100003


In [19]:
all_imfs_1, all_erp_1, ei1 = t[0]
all_imfs_2, all_erp_2, ei2 = t[1]

In [20]:
ei = np.unique(np.append(ei1, ei2))

In [21]:
ei.shape

(22,)

In [22]:
erps = prepare_erps([all_erp_1, all_erp_2], ei)

  This is separate from the ipykernel package so we can avoid doing imports until


In [23]:
erps

[array([[ -2.85854732e+01,  -1.30468500e+01,   6.39540969e+01, ...,
           5.40350838e+02,   1.03275229e+02,   0.00000000e+00],
        [  1.53022333e+01,   3.28183275e+01,   1.61052724e+02, ...,
          -5.07848355e+01,   3.37631614e+00,   0.00000000e+00],
        [ -8.78583866e+00,  -1.37793893e+00,  -2.92955635e+01, ...,
          -5.48940442e+01,   1.93982569e+02,   0.00000000e+00],
        ..., 
        [  1.92610099e+01,  -7.30003860e+01,  -7.01661100e+01, ...,
          -6.70732438e+01,   3.23681666e+00,   0.00000000e+00],
        [ -4.14251524e+01,   6.91624902e+01,  -6.19116967e+00, ...,
          -9.14337715e+01,  -6.34293684e-06,   0.00000000e+00],
        [ -3.45157009e+01,   4.94937146e+01,   7.30684409e+01, ...,
          -5.29478487e+01,   1.33489115e+02,   0.00000000e+00]]),
 array([[  1.30671189e+02,   8.90640766e+02,   2.13357328e+02, ...,
           2.44373999e+02,   4.85629377e+01,   1.00000000e+00],
        [  6.81752331e+01,  -3.59763785e+00,   2.78764436e+0

In [24]:
grid_search_SVM(erps)

Best score for data1: 0.6
Best C: 0.1
Best Kernel: rbf
Best Gamma: 0.1
[mean: 0.54444, std: 0.05250, params: {'kernel': 'rbf', 'C': 0.1, 'gamma': 1},
 mean: 0.60000, std: 0.12916, params: {'kernel': 'rbf', 'C': 0.1, 'gamma': 0.1},
 mean: 0.55556, std: 0.10515, params: {'kernel': 'rbf', 'C': 0.1, 'gamma': 0.01},
 mean: 0.52222, std: 0.12600, params: {'kernel': 'rbf', 'C': 0.1, 'gamma': 0.001},
 mean: 0.48889, std: 0.06344, params: {'kernel': 'rbf', 'C': 0.1, 'gamma': 0.0001},
 mean: 0.48889, std: 0.03000, params: {'kernel': 'rbf', 'C': 0.1, 'gamma': 1e-05},
 mean: 0.50000, std: 0.09839, params: {'kernel': 'rbf', 'C': 1, 'gamma': 1},
 mean: 0.51111, std: 0.09290, params: {'kernel': 'rbf', 'C': 1, 'gamma': 0.1},
 mean: 0.57778, std: 0.06708, params: {'kernel': 'rbf', 'C': 1, 'gamma': 0.01},
 mean: 0.52222, std: 0.12600, params: {'kernel': 'rbf', 'C': 1, 'gamma': 0.001},
 mean: 0.48889, std: 0.06344, params: {'kernel': 'rbf', 'C': 1, 'gamma': 0.0001},
 mean: 0.48889, std: 0.03000, params: 

