In [22]:
import numpy as np
from scipy import linalg

import pandas as pd

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.cluster import KMeans

import matplotlib.pyplot as plt

import gc

from keras.utils import plot_model

%matplotlib inline

In [2]:
sample = 128 #hz
trial_time = 3 #s

origin_channel = 5 #5 channel eeg


In [3]:
def cov_mat(X):
    return np.matmul(X , X.T)/np.trace(np.matmul(X , X.T))

#计算每种样本的平均协方差矩阵
def average_norm_cov_mat(data):
    count = data.shape[0]
    sum_mat = np.zeros(shape=(data[0].shape[0] , data[0].shape[0]))
    
    for i in range(count):
        sum_mat += cov_mat(data[i])
    
    return sum_mat/count

def load_data(file_name):
    #pink and white
    
    temp = pd.read_csv(file_name)
    
    #删除前3秒和后2秒数据
    temp = temp.iloc[ : temp.shape[0] - 2*128] #后2秒 2s sample:128hz
    temp = temp.iloc[3*128 : ] #前3秒 3s sample:128hz
    
    for column in temp.columns:
        temp[column] = (temp[column] - temp[column].mean())/temp[column].std() #norm
    
    #5 channels data
    return temp[['AF3' , 'T7','Pz' , 'T8' , 'AF4']]

def sep(one_data , label):
    train_data = []
    train_labels = []
    
    size = sample*trial_time #384
    
    for i in range(one_data.shape[0] - size):
        train_data.append(one_data.iloc[i : i+size].values) #add one train sample
        train_labels.append(label) #corresponding label
    
    return train_data , train_labels

In [4]:
def concat_eeg_csv(file_names):
    #concat a big csv file
    first_file = load_data(file_name = file_names[0])
    
    file_names.remove(file_names[0])
    
    for file_name in file_names:
        first_file = first_file.append(load_data(file_name = file_name) , ignore_index = True)
    
    return first_file

In [5]:
#low pass filter
#50Hz

def low_pass(data):
    point = 50 #highest freq = 50hz
    length = sample * trial_time #256
    
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            a = np.fft.fft(data[i][j]).real
            b = np.fft.fft(data[i][j]).imag
            a[point : length-point] = 0
            b[point : length-point] = 0
            #重建频谱
            new_freq = [np.complex(a[i] , b[i]) for i in range(length)]
            new_freq = np.array(new_freq)
            
            data[i][j] = np.fft.ifft(new_freq)
        
    return data


In [6]:
#使用白色
data_1 = concat_eeg_csv(['data/train_1/fei_white_1.csv' , 'data/train_1/fei_white_2.csv'])
data_2 = concat_eeg_csv(['data/train_1/sen_white_1.csv' , 'data/train_1/sen_white_2.csv'])

#使用粉色
#data_1 = concat_eeg_csv(['data/train_1/fei_pink_1.csv' , 'data/train_1/fei_pink_2.csv'])
#data_2 = concat_eeg_csv(['data/train_1/sen_pink_1.csv' , 'data/train_1/sen_pink_2.csv'])

train_data_1 , train_labels_1 = sep(data_1 , 0)
train_data_2 , train_labels_2 = sep(data_2 , 1)

train_data_1 = np.array(train_data_1)
train_data_2 = np.array(train_data_2)

train_labels_1 = np.array(train_labels_1)
train_labels_2 = np.array(train_labels_2)

train_data_1 = np.transpose(train_data_1 , axes=(0 , 2 , 1))
train_data_2 = np.transpose(train_data_2 , axes=(0 , 2 , 1))

In [7]:
print(train_data_1.shape , train_data_2.shape)

(7752, 5, 384) (18980, 5, 384)


In [38]:
#=============
#滤波阶段 此时一个小样本为3秒的数据量  此时大致认为信号为平稳的
#=============

#train_data_1 = low_pass(train_data_1)
#train_data_2 = low_pass(train_data_2)



In [32]:
#train_data_features = np.transpose(train_data_features , axes=(0 , 2 , 1))

In [8]:
train_data = np.concatenate((train_data_1 , train_data_2))

train_labels = np.concatenate((train_labels_1 , train_labels_2))

In [9]:
print(train_data.shape , train_labels.shape)

(26732, 5, 384) (26732,)


In [10]:
#============
#============
#构建autoencoder

from keras.models import Sequential , Model
from keras.layers import Dense , Dropout , Conv2D , MaxPooling2D , Reshape , BatchNormalization , Flatten
from keras.layers import Input

Using TensorFlow backend.


In [11]:
flatten_train_data = np.reshape(train_data , newshape=(train_data.shape[0] , -1))

In [12]:
flatten_train_data.shape

(26732, 1920)

In [13]:
input_eeg = Input(shape = (sample*trial_time*origin_channel,))

#encoder
encoder = Dense(units=1024 , activation='elu')(input_eeg)
encoder = Dense(units=512 , activation='elu')(encoder)
encoder = Dense(units=256 , activation='elu')(encoder)
encoder = Dense(units=128 , activation='elu')(encoder)
encoder = Dense(units=64 , activation='elu')(encoder)
encoder = Dense(units=32 , activation='elu')(encoder)
encoder = Dense(units=16 , activation='elu')(encoder)
encoder = Dense(units=8 , activation='elu')(encoder)
encoder_output = Dense(units=4 , activation='elu')(encoder)

#decoder
decoder = Dense(units=8 , activation='elu')(encoder_output)
decoder = Dense(units=16 , activation='elu')(decoder)
decoder = Dense(units=32 , activation='elu')(decoder)
decoder = Dense(units=64 , activation='elu')(decoder)
decoder = Dense(units=128 , activation='elu')(decoder)
decoder = Dense(units=256 , activation='elu')(decoder)
decoder = Dense(units=512 , activation='elu')(decoder)
decoder = Dense(units=1024 , activation='elu')(decoder)
decoder_output = Dense(units=sample*trial_time*origin_channel , activation='elu')(decoder)

In [14]:
autoencoder = Model(inputs=input_eeg , outputs=decoder_output)

encoder = Model(inputs=input_eeg , outputs=encoder_output)

In [24]:
plot_model(autoencoder , to_file='csp-3.3-autoencoder.png' , show_shapes=True , rankdir='LR')

In [22]:
#encoder.summary()

In [15]:
autoencoder.compile(optimizer='adam' , loss = 'mse')

In [16]:
#训练AE
autoencoder.fit(x=flatten_train_data[0 , np.newaxis] , y=flatten_train_data[0 , np.newaxis] , batch_size=1 , epochs=1)

Epoch 1/1


<keras.callbacks.History at 0x1f5f52e76d8>

In [18]:
#autoencoder降维后的新训练集 4维特征
new_train_data = encoder.predict(x = flatten_train_data)

In [None]:
#============
#============
#使用降维后的训练集进行训练

In [46]:
svc = SVC(verbose = True)
svc.fit(new_train_data , train_labels)

[LibSVM]

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=True)

In [47]:
svc.score(new_train_data , train_labels)

0.9988959837679776

In [48]:
rf = RandomForestClassifier(verbose=False)

rf.fit(new_train_data , train_labels)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=False, warm_start=False)

In [49]:
rf.score(new_train_data , train_labels)

1.0

In [50]:
adaboost = AdaBoostClassifier()
adaboost.fit(new_train_data , train_labels)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None)

In [51]:
adaboost.score(new_train_data , train_labels)

1.0

In [None]:
#============
#============

In [None]:
#============
#验证集阶段

In [19]:
#==============
#==============
#val step
#白色
data_1_val = load_data('data/val_1/fei_white_3.csv')
data_2_val = load_data('data/val_1/sen_white_3.csv')

#粉色
#data_1_val = load_data('data/val_1/fei_pink_3.csv')
#data_2_val = load_data('data/val_1/sen_pink_3.csv')

val_data_1 , val_labels_1 = sep(data_1_val , 0)
val_data_2 , val_labels_2 = sep(data_2_val , 1)

val_data_1 = np.array(val_data_1)
val_data_2 = np.array(val_data_2)

val_labels_1 = np.array(val_labels_1)
val_labels_2 = np.array(val_labels_2)

val_data_1 = np.transpose(val_data_1 , axes=(0 , 2 , 1))
val_data_2 = np.transpose(val_data_2 , axes=(0 , 2 , 1))

In [53]:
#================
#验证集滤波

#val_data_1 = low_pass(val_data_1)
#val_data_2 = low_pass(val_data_2)



In [20]:
#===================
#concat

val_data = np.concatenate((val_data_1 , val_data_2))
val_labels = np.concatenate((val_labels_1 , val_labels_2))

In [21]:
print(val_data.shape , val_labels.shape)

(12877, 5, 384) (12877,)


In [None]:
#展平以满足autoencoder
flatten_val_data = np.reshape(val_data , newshape=(val_data.shape[0] , -1))

In [None]:
#使用降维后的验证集进行模型测试
new_val_data = encoder.predict(x=flatten_val_data)

In [None]:
#==============

In [None]:
#==============
#==============
#验证集在模型上测试

In [56]:
#SVM
svc.score(new_val_data , val_labels)

0.9207003089598352

In [57]:
#RandomForest
rf.score(new_val_data , val_labels)

0.9216695947173926

In [58]:
adaboost.score(new_val_data , val_labels)

0.9050705761192221

In [59]:
#使用CNN进行处理
#最后的输出设置为1个输出单元 激活为sigmoid
#========
#========


from keras.models import Sequential
from keras.layers import Dense , Dropout , Conv2D , MaxPooling2D , Reshape , BatchNormalization , Flatten
from keras.regularizers import l2

Using TensorFlow backend.


In [60]:
keep_prob = 0.5

model = Sequential()
#sample*trial_time=384
#网络输入为384维

model.add(Dense(units=30 , input_shape=(sample*trial_time , 4) , activation='elu' , kernel_regularizer=l2()))
model.add(Reshape((30 , sample*trial_time , 1)))

#conv pool 1
model.add(Conv2D(60 , kernel_size=(1,15) , strides=(1,3) , padding='valid' , activation='elu' , kernel_regularizer=l2()))
model.add(MaxPooling2D(pool_size=(1,2) , strides=(1,2) , padding='valid'))

#dropout
model.add(Dropout(1 - keep_prob))
#batch norm
model.add(BatchNormalization())

#conv pool 2
model.add(Conv2D(60 , kernel_size=(1,4) , strides=(1,3) , padding='valid' , activation='elu' , kernel_regularizer=l2()))
model.add(MaxPooling2D(pool_size=(1,2) , strides=(1,2) , padding='valid'))
#dropout
model.add(Dropout(1 - keep_prob))
#batch norm
model.add(BatchNormalization())

#conv 3
model.add(Conv2D(60 , kernel_size=(30,1) , strides=(1,3) , padding='valid' , activation='elu' , kernel_regularizer=l2()))
#dropout
model.add(Dropout(1 - keep_prob))
#batch norm
model.add(BatchNormalization())

#conv pool 4
model.add(Conv2D(90 , kernel_size=(1,3) , strides=(1,1) , padding='same' , activation='elu' , kernel_regularizer=l2()))
model.add(MaxPooling2D(pool_size=(1,2) , strides=(1,2) , padding='valid'))
#dropout
model.add(Dropout(1 - keep_prob))
#batch norm
model.add(BatchNormalization())

#conv pool 5
model.add(Conv2D(120 , kernel_size=(1,3) , strides=(1,1) , padding='same' , activation='elu' , kernel_regularizer=l2()))
model.add(MaxPooling2D(pool_size=(1,2) , strides=(1,2) , padding='valid'))
#dropout
model.add(Dropout(1 - keep_prob))
#batch norm
model.add(BatchNormalization())

#flatten
model.add(Flatten())

#fc
model.add(Dense(units=24 , activation='elu' , kernel_regularizer=l2()))

#extra for cluster layer
#model.add(Dense(units=12 , activation='elu' , kernel_regularizer=l2() , name = 'feature1'))
#model.add(Dense(units=4 , activation='elu' , kernel_regularizer=l2() , name = 'feature2'))
#model.add(Dense(units=2 ,  activation='elu' , kernel_regularizer=l2() , name = 'feature3'))

#fc last layer
model.add(Dense(units=2 , activation='softmax'))

In [45]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 384, 30)           150       
_________________________________________________________________
reshape_3 (Reshape)          (None, 30, 384, 1)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 30, 124, 60)       960       
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 30, 62, 60)        0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 30, 62, 60)        0         
_________________________________________________________________
batch_normalization_6 (Batch (None, 30, 62, 60)        240       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 30, 20, 60)        14460     
__________

In [None]:
model.compile(optimizer='adam' , loss='binary_crossentropy' , metrics=['accuracy'])

In [None]:
model.fit(train_data_features , train_labels , batch_size=16 , epochs=100 , shuffle=True , validation_data=(val_data_features , val_labels))

In [None]:
val_data_features.shape