In [1]:
import numpy as np
from scipy import linalg

import pandas as pd

from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture

import matplotlib.pyplot as plt

import gc

from keras.utils import plot_model

%matplotlib inline

Using TensorFlow backend.


In [2]:
sample = 128 #hz
trial_time = 3 #s

origin_channel = 5 #5 channel eeg


In [3]:
def cov_mat(X):
    return np.matmul(X , X.T)/np.trace(np.matmul(X , X.T))

#计算每种样本的平均协方差矩阵
def average_norm_cov_mat(data):
    count = data.shape[0]
    sum_mat = np.zeros(shape=(data[0].shape[0] , data[0].shape[0]))
    
    for i in range(count):
        sum_mat += cov_mat(data[i])
    
    return sum_mat/count

def load_data(file_name):
    #pink and white
    
    temp = pd.read_csv(file_name)
    
    #删除前3秒和后2秒数据
    temp = temp.iloc[ : temp.shape[0] - 2*128] #后2秒 2s sample:128hz
    temp = temp.iloc[3*128 : ] #前3秒 3s sample:128hz
    
    for column in temp.columns:
        temp[column] = (temp[column] - temp[column].mean())/temp[column].std() #norm
    
    #5 channels data
    return temp[['AF3' , 'T7','Pz' , 'T8' , 'AF4']]

def sep(one_data , label):
    train_data = []
    train_labels = []
    
    size = sample*trial_time #384
    
    for i in range(one_data.shape[0] - size):
        train_data.append(one_data.iloc[i : i+size].values) #add one train sample
        train_labels.append(label) #corresponding label
    
    return train_data , train_labels

In [4]:
def concat_eeg_csv(file_names):
    #concat a big csv file
    first_file = load_data(file_name = file_names[0])
    
    file_names.remove(file_names[0])
    
    for file_name in file_names:
        first_file = first_file.append(load_data(file_name = file_name) , ignore_index = True)
    
    return first_file

In [None]:
#low pass filter
#50Hz

def low_pass(data):
    point = 50 #highest freq = 50hz
    length = sample * trial_time #256
    
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            a = np.fft.fft(data[i][j]).real
            b = np.fft.fft(data[i][j]).imag
            a[point : length-point] = 0
            b[point : length-point] = 0
            #重建频谱
            new_freq = [np.complex(a[i] , b[i]) for i in range(length)]
            new_freq = np.array(new_freq)
            
            data[i][j] = np.fft.ifft(new_freq)
        
    return data


In [6]:
#使用白色
data_1 = concat_eeg_csv(['data/train_1/fei_white_1.csv' , 'data/train_1/fei_white_2.csv' , 'data/val_1/fei_white_3.csv'])
data_2 = concat_eeg_csv(['data/train_1/sen_white_1.csv' , 'data/train_1/sen_white_2.csv' , 'data/val_1/sen_white_3.csv'])

#使用粉色
#data_1 = concat_eeg_csv(['data/train_1/fei_pink_1.csv' , 'data/train_1/fei_pink_2.csv' , 'data/val_1/fei_pink_3.csv'])
#data_2 = concat_eeg_csv(['data/train_1/sen_pink_1.csv' , 'data/train_1/sen_pink_2.csv' , 'data/val_1/sen_pink_3.csv'])

train_data_1 , train_labels_1 = sep(data_1 , 0)
train_data_2 , train_labels_2 = sep(data_2 , 1)

train_data_1 = np.array(train_data_1)
train_data_2 = np.array(train_data_2)

train_labels_1 = np.array(train_labels_1)
train_labels_2 = np.array(train_labels_2)

train_data_1 = np.transpose(train_data_1 , axes=(0 , 2 , 1))
train_data_2 = np.transpose(train_data_2 , axes=(0 , 2 , 1))

In [7]:
print(train_data_1.shape , train_data_2.shape)

(7752, 5, 384) (18980, 5, 384)


In [None]:
#=============
#滤波阶段 此时一个小样本为3秒的数据量  此时大致认为信号为平稳的
train_data_1 = low_pass(train_data_1)
train_data_2 = low_pass(train_data_2)

In [32]:
#train_data_features = np.transpose(train_data_features , axes=(0 , 2 , 1))

In [8]:
train_data = np.concatenate((train_data_1 , train_data_2))

train_labels = np.concatenate((train_labels_1 , train_labels_2))

In [9]:
print(train_data.shape , train_labels.shape)

(26732, 5, 384) (26732,)


In [10]:
#============
#============
#构建autoencoder

from keras.models import Sequential , Model
from keras.layers import Dense , Dropout , Conv2D , MaxPooling2D , Reshape , BatchNormalization , Flatten
from keras.layers import Input

Using TensorFlow backend.


In [11]:
flatten_train_data = np.reshape(train_data , newshape=(train_data.shape[0] , -1))

In [12]:
flatten_train_data.shape

(26732, 1920)

In [13]:
input_eeg = Input(shape = (sample*trial_time*origin_channel,))

#encoder
encoder = Dense(units=1024 , activation='elu')(input_eeg)
encoder = Dense(units=512 , activation='elu')(encoder)
encoder = Dense(units=256 , activation='elu')(encoder)
encoder = Dense(units=128 , activation='elu')(encoder)
encoder = Dense(units=64 , activation='elu')(encoder)
encoder = Dense(units=32 , activation='elu')(encoder)
encoder = Dense(units=16 , activation='elu')(encoder)
encoder = Dense(units=8 , activation='elu')(encoder)
encoder = Dense(units=4 , activation='elu')(encoder)

encoder_output = Dense(units=2 , activation='elu')(encoder) #聚类需要使用的2维特征

#decoder
decoder = Dense(units=4 , activation='elu')(encoder_output)
decoder = Dense(units=8 , activation='elu')(decoder)
decoder = Dense(units=16 , activation='elu')(decoder)
decoder = Dense(units=32 , activation='elu')(decoder)
decoder = Dense(units=64 , activation='elu')(decoder)
decoder = Dense(units=128 , activation='elu')(decoder)
decoder = Dense(units=256 , activation='elu')(decoder)
decoder = Dense(units=512 , activation='elu')(decoder)
decoder = Dense(units=1024 , activation='elu')(decoder)
decoder_output = Dense(units=sample*trial_time*origin_channel , activation='elu')(decoder)

In [14]:
autoencoder = Model(inputs=input_eeg , outputs=decoder_output)

encoder = Model(inputs=input_eeg , outputs=encoder_output)

In [24]:
#plot_model(autoencoder , to_file='csp-3.3-autoencoder.png' , show_shapes=True , rankdir='LR')

In [22]:
#encoder.summary()

In [15]:
autoencoder.compile(optimizer='adam' , loss = 'mse')

In [16]:
#训练AE
autoencoder.fit(x=flatten_train_data[0 , np.newaxis] , y=flatten_train_data[0 , np.newaxis] , batch_size=1 , epochs=1)

Epoch 1/1


<keras.callbacks.History at 0x1f5f52e76d8>

In [18]:
new_train_data = encoder.predict(x = flatten_train_data) #输出2维新特征 在xOy坐标系绘制

In [5]:
#=========
#=========
#cluster
flag = 1


In [None]:
if flag==1:
    kmeans = KMeans(n_clusters=2)
    kmeans.fit(new_train_data , train_labels)
    
    plt.figure(figsize=(10 , 8))
    plt.scatter(new_train_data[: , 0] , new_train_data[: , 1]  , c = kmeans.labels_)
    #plt.title('cluster eeg(cluster label)')
elif flag == 2:
    dbscan = DBSCAN()
    dbscan.fit(new_train_data , train_labels)
    
    plt.figure(figsize=(10 , 8))
    plt.scatter(new_train_data[: , 0] , new_train_data[: , 1]  , c = dbscan.labels_)
    #plt.title('cluster eeg(cluster label)')
else:
    gmm = GaussianMixture(n_components=2)
    gmm.fit(new_train_data , train_labels)
    
    plt.figure(figsize=(10 , 8))
    plt.scatter(new_train_data[: , 0] , new_train_data[: , 1]  , c = gmm.predict(new_train_data))
    #plt.title('cluster eeg(cluster label)')
    

In [None]:
plt.figure(figsize=(10 , 8))
plt.scatter(new_train_data[: , 0] , new_train_data[: , 1]  , c = train_labels)
#plt.title('cluster eeg(cluster label)')