In [7]:
import numpy as np
from scipy import linalg

import pandas as pd


from sklearn.cluster import KMeans

import matplotlib.pyplot as plt

import gc

from keras.utils import plot_model

%matplotlib inline

In [2]:
sample = 128 #hz
trial_time = 3 #s

origin_channel = 5 #5 channel eeg


In [3]:
def cov_mat(X):
    return np.matmul(X , X.T)/np.trace(np.matmul(X , X.T))

#计算每种样本的平均协方差矩阵
def average_norm_cov_mat(data):
    count = data.shape[0]
    sum_mat = np.zeros(shape=(data[0].shape[0] , data[0].shape[0]))
    
    for i in range(count):
        sum_mat += cov_mat(data[i])
    
    return sum_mat/count

def load_data(file_name):
    #pink and white
    
    temp = pd.read_csv(file_name)
    
    #删除前3秒和后2秒数据
    temp = temp.iloc[ : temp.shape[0] - 2*128] #后2秒 2s sample:128hz
    temp = temp.iloc[3*128 : ] #前3秒 3s sample:128hz
    
    for column in temp.columns:
        temp[column] = (temp[column] - temp[column].mean())/temp[column].std() #norm
    
    #5 channels data
    return temp[['AF3' , 'T7','Pz' , 'T8' , 'AF4']]

def sep(one_data , label):
    train_data = []
    train_labels = []
    
    size = sample*trial_time #384
    
    for i in range(one_data.shape[0] - size):
        train_data.append(one_data.iloc[i : i+size].values) #add one train sample
        train_labels.append(label) #corresponding label
    
    return train_data , train_labels

In [4]:
def concat_eeg_csv(file_names):
    #concat a big csv file
    first_file = load_data(file_name = file_names[0])
    
    file_names.remove(file_names[0])
    
    for file_name in file_names:
        first_file = first_file.append(load_data(file_name = file_name) , ignore_index = True)
    
    return first_file

In [8]:
#low pass filter
#50Hz

def low_pass(data):
    point = 50 #highest freq = 50hz
    length = sample * trial_time #256
    
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            a = np.fft.fft(data[i][j]).real
            b = np.fft.fft(data[i][j]).imag
            a[point : length-point] = 0
            b[point : length-point] = 0
            #重建频谱
            new_freq = [np.complex(a[i] , b[i]) for i in range(length)]
            new_freq = np.array(new_freq)
            
            data[i][j] = np.fft.ifft(new_freq)
        
    return data


In [46]:
#使用白色
data_1 = concat_eeg_csv(['data/train_1/fei_white_1.csv' , 'data/train_1/fei_white_2.csv'])
data_2 = concat_eeg_csv(['data/train_1/sen_white_1.csv' , 'data/train_1/sen_white_2.csv'])

#使用粉色
#data_1 = concat_eeg_csv(['data/train_1/fei_pink_1.csv' , 'data/train_1/fei_pink_2.csv'])
#data_2 = concat_eeg_csv(['data/train_1/sen_pink_1.csv' , 'data/train_1/sen_pink_2.csv'])

#为了使用sigmoid激活函数 需要将标签调整为0和1
train_data_1 , train_labels_1 = sep(data_1 , 0)
train_data_2 , train_labels_2 = sep(data_2 , 1)

train_data_1 = np.array(train_data_1)
train_data_2 = np.array(train_data_2)

train_labels_1 = np.array(train_labels_1)
train_labels_2 = np.array(train_labels_2)

train_data_1 = np.transpose(train_data_1 , axes=(0 , 2 , 1))
train_data_2 = np.transpose(train_data_2 , axes=(0 , 2 , 1))

In [47]:
print(train_data_1.shape , train_data_2.shape)

(7752, 5, 384) (18980, 5, 384)


In [38]:
#=============
#滤波阶段 此时一个小样本为3秒的数据量  此时大致认为信号为平稳的
#train_data_1 = low_pass(train_data_1)
#train_data_2 = low_pass(train_data_2)



In [48]:
train_data = np.concatenate((train_data_1 , train_data_2))

train_labels = np.concatenate((train_labels_1 , train_labels_2))

In [49]:
print(train_data.shape , train_labels.shape)

(26732, 5, 384) (26732,)


In [51]:
#使用CNN进行处理
#最后的输出设置为1个输出单元 激活为sigmoid
#========
#========


from keras.models import Sequential
from keras.layers import Dense , Dropout , Conv2D , MaxPooling2D , Reshape , BatchNormalization , Flatten
from keras.regularizers import l2


In [52]:
#增加一维 作为last channel 满足卷积要求
train_data = train_data[:,:,:, np.newaxis] # [None,origin channel,EEG length,artificial channel]


In [64]:
print(train_data.shape , train_labels.shape)

(26732, 5, 384, 1) (26732,)


In [36]:
#train_data = np.transpose(train_data , axes=(0 , 2 , 1 , 3)) #[None,EEG length,origin_channel,artificial channel]

In [37]:
#print(train_data.shape , train_labels.shape)

(26732, 384, 5, 1) (26732,)


In [66]:
keep_prob = 0.5

model = Sequential()
#sample*trial_time=384
#网络输入为384维

#CNNs需要的输入的样本集 形式为（样本数量，5,384，1）5为原始特征数量 1为人为增加的通道

#conv pool 1
model.add(Conv2D(filters=60 , kernel_size=(1,4) , strides=(1,3) , padding='valid' , activation='elu' , input_shape=(origin_channel , sample*trial_time , 1) , name='conv1'))
model.add(MaxPooling2D(pool_size=(1,2) , strides=(1,2) , padding='valid' , name='pool1'))
#dropout
model.add(Dropout(1 - keep_prob , name='dropout1'))
#batch norm
model.add(BatchNormalization(name='batch-norm1'))

#conv pool 2
model.add(Conv2D(60 , kernel_size=(1,4) , strides=(1,3) , padding='valid' , activation='elu' , kernel_regularizer=l2() , name='conv2'))
model.add(MaxPooling2D(pool_size=(1,2) , strides=(1,2) , padding='valid' , name='pool2'))
#dropout
model.add(Dropout(1 - keep_prob , name='dropout2'))
#batch norm
model.add(BatchNormalization(name='batch-norm2'))

#conv 3
model.add(Conv2D(60 , kernel_size=(1,4) , strides=(1,3) , padding='valid' , activation='elu' , kernel_regularizer=l2() , name='conv3'))
#no pooling
#dropout
model.add(Dropout(1 - keep_prob , name='dropout3'))
#batch norm
model.add(BatchNormalization(name='batch-norm3'))

#conv pool 4
model.add(Conv2D(90 , kernel_size=(1,3) , strides=(1,1) , padding='same' , activation='elu' , kernel_regularizer=l2() , name='conv4'))
model.add(MaxPooling2D(pool_size=(1,2) , strides=(1,2) , padding='valid' , name='pool4'))
#dropout
model.add(Dropout(1 - keep_prob , name='dropout4'))
#batch norm
model.add(BatchNormalization(name='batch-norm4'))

#conv pool 5
model.add(Conv2D(120 , kernel_size=(1,3) , strides=(1,1) , padding='same' , activation='elu' , kernel_regularizer=l2() , name='conv5'))
#model.add(MaxPooling2D(pool_size=(1,2) , strides=(1,2) , padding='valid' , name='pool5'))
#dropout
model.add(Dropout(1 - keep_prob , name='dropout5'))
#batch norm
model.add(BatchNormalization(name='batch-norm5'))

#flatten
model.add(Flatten(name='flatten'))

#fc
model.add(Dense(units=256 , activation='elu' , name='fc1'))
model.add(Dense(units=128 , activation='elu' , name='fc2'))
model.add(Dense(units=64 , activation='elu' , name='fc3'))
model.add(Dense(units=32 , activation='elu' , name='f4'))
model.add(Dense(units=8 , activation='elu' , name='fc5'))

#fc last layer
model.add(Dense(units=1 , activation='sigmoid' , name='fc6'))

In [67]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv2D)               (None, 5, 127, 60)        300       
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 5, 63, 60)         0         
_________________________________________________________________
dropout1 (Dropout)           (None, 5, 63, 60)         0         
_________________________________________________________________
batch-norm1 (BatchNormalizat (None, 5, 63, 60)         240       
_________________________________________________________________
conv2 (Conv2D)               (None, 5, 20, 60)         14460     
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 5, 10, 60)         0         
_________________________________________________________________
dropout2 (Dropout)           (None, 5, 10, 60)         0         
__________

In [68]:
model.compile(optimizer='adam' , loss='binary_crossentropy' , metrics=['accuracy'])

In [72]:
model.fit(train_data[0,np.newaxis] , train_labels[0 , np.newaxis] , batch_size=1 , epochs=1)# , validation_data=(val_data_features , val_labels))

Epoch 1/1


<keras.callbacks.History at 0x1a7949502b0>

In [73]:
#==============
#==============
#val step
#使用验证集进行测试
#白色
data_1_val = load_data('data/val_1/fei_white_3.csv')
data_2_val = load_data('data/val_1/sen_white_3.csv')

#粉色
#data_1_val = load_data('data/val_1/fei_pink_3.csv')
#data_2_val = load_data('data/val_1/sen_pink_3.csv')

val_data_1 , val_labels_1 = sep(data_1_val , 0)
val_data_2 , val_labels_2 = sep(data_2_val , 1)

val_data_1 = np.array(val_data_1)
val_data_2 = np.array(val_data_2)

val_labels_1 = np.array(val_labels_1)
val_labels_2 = np.array(val_labels_2)

val_data_1 = np.transpose(val_data_1 , axes=(0 , 2 , 1))
val_data_2 = np.transpose(val_data_2 , axes=(0 , 2 , 1))

In [None]:
#================
#验证集滤波 依次进行
#val_data_1 = low_pass(val_data_1)
#val_data_2 = low_pass(val_data_2)

In [74]:
#===================
#concat
val_data = np.concatenate((val_data_1 , val_data_2))

val_labels = np.concatenate((val_labels_1 , val_labels_2))

In [75]:
print(val_data.shape , val_labels.shape)

(12877, 5, 384) (12877,)


In [76]:
val_data = val_data[:,:,:, np.newaxis] # [None,origin channel,EEG length,artificial channel]

In [77]:
print(val_data.shape , val_labels.shape)

(12877, 5, 384, 1) (12877,)


In [78]:
#loss accu
model.evaluate(val_data[0 , np.newaxis] , val_labels[0 , np.newaxis] , batch_size=1)



[3.5816268920898438, 1.0]

In [79]:
#model.predict(val_data[0 , np.newaxis])

array([[0.48345894]], dtype=float32)

In [80]:
val_labels[0]

0

In [82]:
model.evaluate(train_data[0 , np.newaxis] , train_labels[0 , np.newaxis])



[3.5959973335266113, 1.0]

In [83]:
train_labels[0]

0