In [2]:
#use GRU to classify EEG filter and no filter

In [3]:
import numpy as np
from scipy import linalg

import pandas as pd

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.cluster import KMeans

import matplotlib.pyplot as plt

import gc

from keras.utils import np_utils

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [4]:
sample = 128 #hz
trial_time = 3 #s

origin_channel = 5 #5 channel eeg


In [5]:
def cov_mat(X):
    return np.matmul(X , X.T)/np.trace(np.matmul(X , X.T))

#计算每种样本的平均协方差矩阵
def average_norm_cov_mat(data):
    count = data.shape[0]
    sum_mat = np.zeros(shape=(data[0].shape[0] , data[0].shape[0]))
    
    for i in range(count):
        sum_mat += cov_mat(data[i])
    
    return sum_mat/count

def load_data(file_name):
    #pink and white
    
    temp = pd.read_csv(file_name)
    
    #删除前3秒和后2秒数据
    temp = temp.iloc[ : temp.shape[0] - 2*128] #后2秒 2s sample:128hz
    temp = temp.iloc[3*128 : ] #前3秒 3s sample:128hz
    
    for column in temp.columns:
        temp[column] = (temp[column] - temp[column].mean())/temp[column].std() #norm
    
    #5 channels data
    return temp[['AF3' , 'T7','Pz' , 'T8' , 'AF4']]

def sep(one_data , label):
    train_data = []
    train_labels = []
    
    size = sample*trial_time #384
    
    for i in range(one_data.shape[0] - size):
        train_data.append(one_data.iloc[i : i+size].values) #add one train sample
        train_labels.append(label) #corresponding label
    
    return train_data , train_labels

In [6]:
def concat_eeg_csv(file_names):
    #concat a big csv file
    first_file = load_data(file_name = file_names[0])
    
    file_names.remove(file_names[0])
    
    for file_name in file_names:
        first_file = first_file.append(load_data(file_name = file_name) , ignore_index = True)
    
    return first_file

In [7]:
#low pass filter
#50Hz

def low_pass(data):
    point = 50 #highest freq = 50hz
    length = sample * trial_time #256
    
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            a = np.fft.fft(data[i][j]).real
            b = np.fft.fft(data[i][j]).imag
            a[point : length-point] = 0
            b[point : length-point] = 0
            #重建频谱
            new_freq = [np.complex(a[i] , b[i]) for i in range(length)]
            new_freq = np.array(new_freq)
            
            data[i][j] = np.fft.ifft(new_freq)
        
    return data


In [8]:
#使用白色
data_1 = concat_eeg_csv(['data/train_1/fei_white_1.csv' , 'data/train_1/fei_white_2.csv'])
data_2 = concat_eeg_csv(['data/train_1/sen_white_1.csv' , 'data/train_1/sen_white_2.csv'])

#使用粉色
#data_1 = concat_eeg_csv(['data/train_1/fei_pink_1.csv' , 'data/train_1/fei_pink_2.csv'])
#data_2 = concat_eeg_csv(['data/train_1/sen_pink_1.csv' , 'data/train_1/sen_pink_2.csv'])

train_data_1 , train_labels_1 = sep(data_1 , 0)
train_data_2 , train_labels_2 = sep(data_2 , 1)

train_data_1 = np.array(train_data_1)
train_data_2 = np.array(train_data_2)

train_labels_1 = np.array(train_labels_1)
train_labels_2 = np.array(train_labels_2)

train_data_1 = np.transpose(train_data_1 , axes=(0 , 2 , 1))
train_data_2 = np.transpose(train_data_2 , axes=(0 , 2 , 1))

In [9]:
#=============
#滤波阶段 此时一个小样本为3秒的数据量  此时大致认为信号为平稳的
#train_data_1 = low_pass(train_data_1)
#train_data_2 = low_pass(train_data_2)

In [10]:
train_data = np.concatenate((train_data_1 , train_data_2))

train_labels = np.concatenate((train_labels_1 , train_labels_2))

In [11]:
train_data = np.transpose(train_data , axes = (0 , 2 , 1))
train_labels = np_utils.to_categorical(train_labels , num_classes=2)

In [12]:
from keras.models import Sequential
from keras.layers import GRU , Dense
from keras.optimizers import Adam



In [13]:
model = Sequential()
model.add(GRU(units=32 , input_shape=(sample*trial_time , origin_channel) , return_sequences=False))
model.add(Dense(units=8 , activation='tanh'))
model.add(Dense(units=2 , activation='sigmoid'))


In [14]:
model.compile(optimizer=Adam(lr=0.002) , loss='categorical_crossentropy' , metrics=['accuracy'])

In [15]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_1 (GRU)                  (None, 32)                3648      
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 264       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 18        
Total params: 3,930
Trainable params: 3,930
Non-trainable params: 0
_________________________________________________________________


In [16]:
#==============
#==============
#val step
#白色
data_1_val = load_data('data/val_1/fei_white_3.csv')
data_2_val = load_data('data/val_1/sen_white_3.csv')

#粉色
#data_1_val = load_data('data/val_1/fei_pink_3.csv')
#data_2_val = load_data('data/val_1/sen_pink_3.csv')

val_data_1 , val_labels_1 = sep(data_1_val , 0)
val_data_2 , val_labels_2 = sep(data_2_val , 1)

val_data_1 = np.array(val_data_1)
val_data_2 = np.array(val_data_2)

val_labels_1 = np.array(val_labels_1)
val_labels_2 = np.array(val_labels_2)

val_data_1 = np.transpose(val_data_1 , axes=(0 , 2 , 1))
val_data_2 = np.transpose(val_data_2 , axes=(0 , 2 , 1))
#===============
#===============

In [17]:
#================
#验证集滤波
#val_data_1 = low_pass(val_data_1)
#val_data_2 = low_pass(val_data_2)
#================

In [18]:
#================
val_data = np.concatenate((val_data_1 , val_data_2))
val_labels = np.concatenate((val_labels_1 , val_labels_2))
#================

In [19]:
#================
val_data = np.transpose(val_data , axes = (0 , 2 , 1))
val_labels = np_utils.to_categorical(val_labels , num_classes=2)
#================

In [20]:
model.fit(train_data , train_labels , batch_size=32 , epochs=10 , validation_data=(val_data , val_labels) , shuffle=True)

Train on 26732 samples, validate on 12877 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1db5ed73438>

In [21]:
model.evaluate(val_data , val_labels , batch_size=32)



[0.8853930871239919, 0.9237400015531568]

In [22]:
#GRU no filter white