In [1]:
import numpy as np
import pandas as pd
from biosppy.signals import *
import matplotlib.pyplot as plt
import keras
from keras import backend as K
from keras.models import Model, Sequential
from sklearn.preprocessing import StandardScaler
from keras.layers import *
from keras.callbacks import *
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from sklearn.svm import SVC
from lightgbm import LGBMClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import RidgeCV, LassoCV, Ridge, Lasso
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

Using TensorFlow backend.
This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [2]:
def standardization(x):
    x -= np.mean(x)  
    x /= np.std(x)
    return x
    
    

In [3]:
y_traindata = pd.read_csv("train_labels.csv", header=0)   
y_train = y_traindata.iloc[:,1].values

testdata = pd.read_csv("test_eeg1.csv", header=0)   
y_testid = testdata.iloc[:,0].values

In [4]:

def read_data(subject, file):
    x_eeg1 = []
    with open("{}_eeg1.csv".format(file)) as f:
        for line in f.readlines()[21600*(subject-1)+1:21600*subject+1]:
            s = list(map(float, line.split(',')[1:]))
            x_eeg1.append(s)
    x_eeg1= np.array(x_eeg1)

    x_eeg2 = []
    with open("{}_eeg2.csv".format(file)) as f:
        for line in f.readlines()[21600*(subject-1)+1:21600*subject+1]:
            s = list(map(float, line.split(',')[1:]))
            x_eeg2.append(s)
    x_eeg2  = np.array(x_eeg2)

    x_emg = []
    with open("{}_emg.csv".format(file)) as f:
        for line in f.readlines()[21600*(subject-1)+1:21600*subject+1]:
            s = list(map(float, line.split(',')[1:]))
            x_emg.append(s)
    x_emg= np.array(x_emg)
    return  x_eeg1,x_eeg2,x_emg 


In [5]:
x_train_1_eeg1,  x_train_1_eeg2, x_train_1_emg  = read_data(1,'train')
x_train_2_eeg1,  x_train_2_eeg2, x_train_2_emg  = read_data(2,'train')
x_train_3_eeg1,  x_train_3_eeg2, x_train_3_emg  = read_data(3, 'train')

In [None]:
x_test_1_eeg1,  x_test_1_eeg2, x_test_1_emg  = read_data(1, 'test')
x_test_2_eeg1,  x_test_2_eeg2, x_test_2_emg  = read_data(2, 'test')


In [76]:
label_1 = y_train[0:21600]-1
label_2 = y_train[21600:43200]-1
label_3 = y_train[43200:64800]-1

In [83]:

def eeg_feature_extraction(x):
    X = np.zeros((512))
    [ts, filtered_sig, features_ts,theta,alpha_low, alpha_high,beta, gamma, plf_pairs, plf]  = eeg.eeg(signal=x, sampling_rate=128.0, show=False)    
    X = filtered_sig.reshape(512)

        
    return X



def apply_fun(x):
    result = []
    for i in range(x.shape[0]):
        result.append(eeg_feature_extraction(x[i]))
    result = np.array(result)
    return result

def concatenate_eeg(x1, x2):
    eeg = np.zeros((x1.shape[0], x1.shape[1], 2))
    for i in range(x1.shape[0]):
        eeg[i,:,0] = x1[i]
        eeg[i,:,1] = x2[i]
    return eeg

def concatenate_total(eeg, emg):
    data = np.zeros((eeg.shape[0], eeg.shape[1], 3))
    for i in range(eeg.shape[0]):
        data[i,:,:2] = eeg[i]
        data[i,:,2] = emg[i]
    return data

In [38]:

x_train_1_eeg1_extracted = apply_fun(x_train_1_eeg1.reshape(21600, 512, 1))
x_train_1_eeg2_extracted = apply_fun(x_train_1_eeg2.reshape(21600, 512, 1))
x_train_1_eeg = concatenate_eeg(x_train_1_eeg1_extracted , x_train_1_eeg2_extracted)
x_train_1_eeg_stand = standardization(x_train_1_eeg)

In [75]:


x_train_2_eeg1_extracted = apply_fun(x_train_2_eeg1.reshape(21600, 512, 1))
x_train_2_eeg2_extracted = apply_fun(x_train_2_eeg2.reshape(21600, 512, 1))
x_train_2_eeg = concatenate_eeg(x_train_2_eeg1_extracted , x_train_2_eeg2_extracted)
x_train_2_eeg_stand = standardization(x_train_2_eeg)

x_train_3_eeg1_extracted = apply_fun(x_train_3_eeg1.reshape(21600, 512, 1))
x_train_3_eeg2_extracted = apply_fun(x_train_3_eeg2.reshape(21600, 512, 1))
x_train_3_eeg = concatenate_eeg(x_train_3_eeg1_extracted , x_train_3_eeg2_extracted)
x_train_3_eeg_stand = standardization(x_train_3_eeg)



In [84]:
x_train_1_emg_stand = standardization(x_train_1_emg)
x_train_1 = concatenate_total(x_train_1_eeg_stand, x_train_1_emg_stand)


In [92]:
x_train_2_emg_stand = standardization(x_train_2_emg)
x_train_2 = concatenate_total(x_train_2_eeg_stand, x_train_2_emg_stand)
x_train_3_emg_stand = standardization(x_train_3_emg)
x_train_3 = concatenate_total(x_train_3_eeg_stand, x_train_3_emg_stand)

In [None]:
##################################################################

In [95]:


nn_train_emg = np.concatenate((x_train_1,x_train_3),axis = 0)
label_total = np.concatenate((label_1, label_3), axis = 0)



In [96]:
print(sum(label_total == 0))
print(sum(label_total == 1))
print(sum(label_total == 2))

22265
18486
2449


In [107]:
np.random.seed(1)
class0_idx = np.where(label_total == 0)[0]
class1_idx = np.where(label_total == 1)[0]
del_class0_idx = np.random.choice(class0_idx, size = len(class0_idx)-sum(label_total  == 2), replace=False) 
del_class1_idx = np.random.choice(class1_idx, size = len(class1_idx)-sum(label_total  == 2), replace=False) 
print('del_0_idx:', del_class0_idx[:3])
print('del_1_idx:', del_class1_idx[:3])
nn_train_deleted = np.delete(nn_train_emg, (np.hstack((del_class0_idx, del_class1_idx))), axis = 0) 
label_total_deleted = np.delete(label_total, (np.hstack((del_class0_idx, del_class1_idx))), axis = 0) 

del_0_idx: [8667  279 7146]
del_1_idx: [ 7856   561 18067]


In [102]:
label_total_trans = keras.utils.to_categorical(label_total_deleted, 3)


In [108]:
print(label_total_trans.shape)
print(nn_train_deleted.shape)

(7347, 3)
(7347, 512, 3)


In [138]:

nn = Sequential()
nn.add(InputLayer((512,3)))
# nn.add(Conv1D(100, 30, strides=1, activation='relu'))
# nn.add(Dropout(0.3))
nn.add(Conv1D(10, 5, strides=1, activation='relu'))
nn.add(Dropout(0.3))
nn.add(Conv1D(50, 10, strides=1, activation='relu'))
nn.add(Dropout(0.3))
# nn.add(Conv1D(100, 20, strides=1, activation='relu'))
# nn.add(Dropout(0.3))
nn.add(MaxPooling1D(2))
# nn.add(Conv1D(50, 30, strides=1, activation='relu'))
# nn.add(Dropout(0.3))
# nn.add(Conv1D(10, 30, strides=1, activation='relu'))
# nn.add(Dropout(0.3))
nn.add(LSTM(20, activation ='relu', return_sequences= True, return_state= False))
nn.add(Dropout(0.1))
nn.add(Flatten())
nn.add(Dense(1000,activation = 'relu'))
nn.add(Dropout(0.3))
# nn.add(Dense(200,activation = 'relu'))
# nn.add(Dropout(0.3))
nn.add(Dense(500,activation = 'relu'))
nn.add(Dropout(0.3))
nn.add(Dense(100,activation = 'relu'))
nn.add(Dropout(0.3))
nn.add(Dense(3,activation = 'softmax'))
optim = keras.optimizers.Adadelta()
nn.compile(optimizer=optim,
          loss='categorical_crossentropy',
          metrics=['categorical_accuracy'])

nn.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_50 (Conv1D)           (None, 508, 10)           160       
_________________________________________________________________
dropout_116 (Dropout)        (None, 508, 10)           0         
_________________________________________________________________
conv1d_51 (Conv1D)           (None, 499, 50)           5050      
_________________________________________________________________
dropout_117 (Dropout)        (None, 499, 50)           0         
_________________________________________________________________
max_pooling1d_23 (MaxPooling (None, 249, 50)           0         
_________________________________________________________________
lstm_23 (LSTM)               (None, 249, 20)           5680      
_________________________________________________________________
dropout_118 (Dropout)        (None, 249, 20)           0         
__________

In [139]:
nn.fit(nn_train_deleted, label_total_trans, epochs=200, verbose=2, batch_size = 200,callbacks=[EarlyStopping(monitor='loss', patience=8)])


Epoch 1/200
 - 73s - loss: 0.9681 - categorical_accuracy: 0.4846
Epoch 2/200
 - 64s - loss: 0.6839 - categorical_accuracy: 0.6746
Epoch 3/200
 - 64s - loss: 0.6361 - categorical_accuracy: 0.7264
Epoch 4/200
 - 66s - loss: 0.5396 - categorical_accuracy: 0.7735
Epoch 5/200
 - 66s - loss: 0.4716 - categorical_accuracy: 0.8138
Epoch 6/200
 - 65s - loss: 0.4183 - categorical_accuracy: 0.8312
Epoch 7/200
 - 64s - loss: 0.3768 - categorical_accuracy: 0.8578
Epoch 8/200
 - 64s - loss: 0.3160 - categorical_accuracy: 0.8857
Epoch 9/200
 - 65s - loss: 0.2865 - categorical_accuracy: 0.8986
Epoch 10/200
 - 66s - loss: 0.2845 - categorical_accuracy: 0.8976
Epoch 11/200
 - 67s - loss: 0.2726 - categorical_accuracy: 0.8998
Epoch 12/200
 - 64s - loss: 0.2524 - categorical_accuracy: 0.9077
Epoch 13/200
 - 64s - loss: 0.2638 - categorical_accuracy: 0.9039
Epoch 14/200
 - 65s - loss: 0.2558 - categorical_accuracy: 0.9058
Epoch 15/200
 - 66s - loss: 0.2459 - categorical_accuracy: 0.9098
Epoch 16/200
 - 64s

<keras.callbacks.History at 0x1d3a0ce390>

In [140]:
y_pred_ifweak = np.argmax(nn.predict(x_train_2), axis=1)

In [141]:
balanced_accuracy_score(label_2, y_pred_ifweak)

0.7281816412116401

In [None]:
results = np.zeros(y_testid.shape[0])
#results = np.zeros(y_testid.shape[0])

In [None]:
results[test_nrem_idx[0]]=2
results[test_weak_idx_original]=1
results[test_rem_idx_original]=3

In [None]:
if sum(results == 0) == 0:
    print('ok')
#    print(balanced_accuracy_score(label_2, results))

In [None]:
##################### write to file ######################################

In [None]:
with open('output.csv', 'w') as f:
    f.write("{},{}\n".format("Id", "y"))
    for i in range(len(y_testid)):
        f.write("{},{}\n".format(y_testid[i], results[i]))

In [None]:

#######################  visualization feature  ######################################################

In [None]:
class1_emg = x_train_emg[class1_idx[0][1]]
class2_emg = x_train_emg[class2_idx[0][1]]
class3_emg = x_train_emg[class3_idx[0][1]]


plt.figure(figsize=(10, 3))
plt.subplot(131)
plt.plot(class1_emg)
plt.subplot(132)
plt.plot(class2_emg)
plt.subplot(133)
plt.plot(class3_emg)
plt.suptitle('emg')
plt.show()


In [None]:
class1_eeg1 = x_train_eeg1[class1_idx[0][2],:].reshape((512,1))
class1_eeg2 = x_train_eeg2[class1_idx[0][2],:].reshape((512,1))
class1_eeg = np.hstack((class1_eeg1, class1_eeg2))
class2_eeg1 = x_train_eeg1[class2_idx[0][2],:].reshape((512,1))
class2_eeg2 = x_train_eeg2[class2_idx[0][2],:].reshape((512,1))
class2_eeg = np.hstack((class2_eeg1, class2_eeg2))
class3_eeg1 = x_train_eeg1[class3_idx[0][2],:].reshape((512,1))
class3_eeg2 = x_train_eeg2[class3_idx[0][2],:].reshape((512,1))
class3_eeg = np.hstack((class3_eeg1, class3_eeg2))

def feature_egg(x):
    [ts, filtered_sig, features_ts,theta,alpha_low, alpha_high,beta, gamma, plf_pairs, plf]  = eeg.eeg(signal=x, sampling_rate=128.0, show=False)    
    return ts, filtered_sig, features_ts,theta,alpha_low, alpha_high,beta, gamma, plf_pairs, plf
    
def plot_class(x1, x2, x3):
    [ts_1, filtered_sig_1, features_ts_1,theta_1,alpha_low_1, alpha_high_1,beta_1, gamma_1, plf_pairs_1, plf_1] = feature_egg(x1)  
    [ts_2, filtered_sig_2, features_ts_2,theta_2,alpha_low_2, alpha_high_2,beta_2, gamma_2, plf_pairs_2, plf_2] = feature_egg(x2)  
    [ts_3, filtered_sig_3, features_ts_3,theta_3,alpha_low_3, alpha_high_3,beta_3, gamma_3, plf_pairs_3, plf_3] = feature_egg(x3) 
    
   
    plt.figure(figsize=(10, 3))
    plt.subplot(131)
    plt.plot(features_ts_1,plf_1)
    plt.subplot(132)
    plt.plot(features_ts_2,plf_2)
    plt.subplot(133)
    plt.plot(features_ts_3,plf_3)
    plt.suptitle('plf')
    plt.show()