In [55]:
import gumpy as gp
import numpy as np
import pywt

import sklearn
import os

import warnings

warnings.simplefilter('ignore') #忽略警告

In [56]:
import scipy
import scipy.io as sio

from scipy import linalg

import pandas as pd

#分类器
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.lda import LDA
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

import xgboost
import lightgbm

#模型集成
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier

#模型调节
from sklearn.model_selection import GridSearchCV #参数搜索
from mlxtend.feature_selection import SequentialFeatureSelector #特征选择函数 选择合适的feature

#结果可视化
from sklearn.metrics import classification_report , confusion_matrix #混淆矩阵


#二分类其多分类化
#from sklearn.multiclass import OneVsOneClassifier
#from sklearn.multiclass import OneVsRestClassifier

#from sklearn.preprocessing import StandardScaler
#from sklearn.cluster import KMeans

#距离函数 度量向量距离
from sklearn.metrics.pairwise import manhattan_distances
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import cosine_distances
from sklearn.metrics.pairwise import cosine_similarity #余弦相似度

#one-hot使用
from keras.utils import to_categorical

#绘图
import matplotlib.pyplot as plt

import scipy.linalg as la

import gc

%matplotlib inline

In [57]:
sample_rate = 256 #hz
origin_channel = 16 #5 channel eeg

#采集的通道
#共16 channel
#未使用的channel使用none代替
#reference:a study on performance increasing in ssvep based bci application
SAMPLE_CHANNEL = ['Pz' , 'PO3' , 'PO4' , 'O1' , 'O2' , 'Oz' , 'O9' , 'FP2' ,
                  'C4' , 'C6' , 'CP3' , 'CP1' ,
                  'CPZ' , 'CP2' , 'CP4' , 'PO8']

LABEL2STR = {0:'sen' , 1:'hong' , 2:'zhao',
             3:'fen' , 4:'xiao' , 5:'yu' , 
             6:'bin' , 7:'wang' , 8:'wei' , 
             9:'fei'}

# 减去前多少秒数据 second
# 减去后多少秒数据 second
CLIP_FORWARD = 2
CLIP_BACKWARD = 1

# 单个小段的实验时长
trial_time = 3 #second

trial_offset = 0 #second
start_trial_time = 0 #真正的实验开始时刻
end_trial_time = 2 #真正的实验结束时刻(<trial_time)

#是否进行归一化
#reference:a study on performance increasing in ssvep based bci application
#IS_NORMALIZE = True

#是否进行滤波
#IS_FILTER = False
#EEG频率范围
#reference:a study on performance increasing in ssvep based bci application
LO_FREQ = 0.5
HI_FREQ = 40

#是否陷波
#IS_NOTCH = False
NOTCH_FREQ = 50 #陷波 工频



# load data step

In [58]:
# defined

#def butter_worth(data , lowcut , highcut , order=6):
#    nyq = 0.5 * sample_rate
#    
#    lo = lowcut / nyq
#    hi = highcut / nyq
#    
#    b,a = scipy.signal.butter(order , [lo , hi] , btype='bandpass')
#
#    return np.array([scipy.signal.filtfilt(b , a , data[: , i]) for i in range(data.shape[1])]).reshape((-1 , origin_channel))

In [59]:
def load_data(filename):
    
    #extra_overlap = 1500
    
    data = sio.loadmat(file_name=filename)['data_received'] #length*16 matrix

    #此通道没有采集 置为0
    #全通道均使用时 不需要
    #for i in range(len(SAMPLE_CHANNEL)):
    #    if SAMPLE_CHANNEL[i] == 'none':
    #        data[: , i] = 0.0

    #删除前x秒和后x秒数据
    
    
    #是否进行裁剪 【如果进行裁剪 由于sen的第一次数据 将extra_overlap调整为1500】
    data = data[CLIP_FORWARD * sample_rate : - CLIP_BACKWARD * sample_rate]
    
    
    #data = np.concatenate((data , data[ -extra_overlap : , :]) , axis=0)
    #
    #data_filter = butter_worth(data , 0.5 , 40 , order=3)
    #
    #return data_filter[extra_overlap : , :] #将边界效应去掉

    return data

In [60]:
def separate(data , label , overlap_length = 128):
    '''
    最长重叠长度为size长 256*3 个数据点
    '''
    train_data = []
    train_labels = []

    size = sample_rate * trial_time #一小段 256*3 个数据点
    data_length = data.shape[0]

    idx = 0

    while idx<data_length-size:
        train_data.append(data[idx : idx+size , :])
        train_labels.append(label)

        idx = idx + (size - overlap_length)

    return np.array(train_data) , np.array(train_labels)

In [67]:
os.listdir('real_data/eeg_final/circle/0/')

['10', '15', '20', '25']

In [None]:
'real_data/eeg_final/circle/0/10/'

In [87]:
def train_val(data , ratio = 0.9):
    '''
    将数据分为 训练集 和 验证集
    '''
    
    seg = int(ratio * data.shape[0])
    
    return data[ : seg] , data[seg : ]


def combine(freq = 10):
    '''
    训练数据与验证数据
    :freq: 指定闪烁的频率
    
    '''
    
    if freq not in [10 , 15 , 20 , 25]:
        print('freq must in 10,15,20,25')
        return 
    
    ratio = 0.9 #训练集的占比
    overlap_length = 2*256 #重叠2秒数据
    
    person_0_filenames = os.listdir('real_data/eeg_final/circle/0/%s/' % freq)
    person_1_filenames = os.listdir('real_data/eeg_final/circle/1/%s/' % freq)
    person_2_filenames = os.listdir('real_data/eeg_final/circle/2/%s/' % freq)
    person_3_filenames = os.listdir('real_data/eeg_final/circle/3/%s/' % freq)
    person_4_filenames = os.listdir('real_data/eeg_final/circle/4/%s/' % freq)
    person_5_filenames = os.listdir('real_data/eeg_final/circle/5/%s/' % freq)
    person_6_filenames = os.listdir('real_data/eeg_final/circle/6/%s/' % freq)
    person_7_filenames = os.listdir('real_data/eeg_final/circle/7/%s/' % freq)
    person_8_filenames = os.listdir('real_data/eeg_final/circle/8/%s/' % freq)
    person_9_filenames = os.listdir('real_data/eeg_final/circle/9/%s/' % freq)
    

    #打开信号文件 并 合并
    person_0 = np.concatenate([load_data('real_data/eeg_final/circle/0/%s/' % freq + filename) for filename in person_0_filenames] , axis = 0)
    person_1 = np.concatenate([load_data('real_data/eeg_final/circle/1/%s/' % freq + filename) for filename in person_1_filenames] , axis = 0)
    person_2 = np.concatenate([load_data('real_data/eeg_final/circle/2/%s/' % freq + filename) for filename in person_2_filenames] , axis = 0)
    person_3 = np.concatenate([load_data('real_data/eeg_final/circle/3/%s/' % freq + filename) for filename in person_3_filenames] , axis = 0)
    person_4 = np.concatenate([load_data('real_data/eeg_final/circle/4/%s/' % freq + filename) for filename in person_4_filenames] , axis = 0)
    person_5 = np.concatenate([load_data('real_data/eeg_final/circle/5/%s/' % freq + filename) for filename in person_5_filenames] , axis = 0)
    person_6 = np.concatenate([load_data('real_data/eeg_final/circle/6/%s/' % freq + filename) for filename in person_6_filenames] , axis = 0)
    person_7 = np.concatenate([load_data('real_data/eeg_final/circle/7/%s/' % freq + filename) for filename in person_7_filenames] , axis = 0)
    person_8 = np.concatenate([load_data('real_data/eeg_final/circle/8/%s/' % freq + filename) for filename in person_8_filenames] , axis = 0)
    person_9 = np.concatenate([load_data('real_data/eeg_final/circle/9/%s/' % freq + filename) for filename in person_9_filenames] , axis = 0)
    
    person_0_train , person_0_val = train_val(person_0)
    person_1_train , person_1_val = train_val(person_1)
    person_2_train , person_2_val = train_val(person_2)
    person_3_train , person_3_val = train_val(person_3)
    person_4_train , person_4_val = train_val(person_4)
    person_5_train , person_5_val = train_val(person_5)
    person_6_train , person_6_val = train_val(person_6)
    person_7_train , person_7_val = train_val(person_7)
    person_8_train , person_8_val = train_val(person_8)
    person_9_train , person_9_val = train_val(person_9)
    
    #数据分段阶段
    
    #============
    #训练数据分段
    train_person_data_0 , train_person_labels_0 = separate(person_0_train , label = 0 , overlap_length=overlap_length)
    train_person_data_1 , train_person_labels_1 = separate(person_1_train , label = 1 , overlap_length=overlap_length)
    train_person_data_2 , train_person_labels_2 = separate(person_2_train , label = 2 , overlap_length=overlap_length)
    train_person_data_3 , train_person_labels_3 = separate(person_3_train , label = 3 , overlap_length=overlap_length)
    train_person_data_4 , train_person_labels_4 = separate(person_4_train , label = 4 , overlap_length=overlap_length)
    train_person_data_5 , train_person_labels_5 = separate(person_5_train , label = 5 , overlap_length=overlap_length)
    train_person_data_6 , train_person_labels_6 = separate(person_6_train , label = 6 , overlap_length=overlap_length)
    train_person_data_7 , train_person_labels_7 = separate(person_7_train , label = 7 , overlap_length=overlap_length)
    train_person_data_8 , train_person_labels_8 = separate(person_8_train , label = 8 , overlap_length=overlap_length)
    train_person_data_9 , train_person_labels_9 = separate(person_9_train , label = 9 , overlap_length=overlap_length)

    #合并数据
    train_data = np.concatenate((train_person_data_0 , train_person_data_1 , train_person_data_2 ,
                                 train_person_data_3 , train_person_data_4 , train_person_data_5 ,
                                 train_person_data_6 , train_person_data_7 , train_person_data_8 ,
                                 train_person_data_9 ))
    
    train_labels = np.concatenate((train_person_labels_0 , train_person_labels_1 , train_person_labels_2 ,
                                   train_person_labels_3 , train_person_labels_4 , train_person_labels_5 ,
                                   train_person_labels_6 , train_person_labels_7 , train_person_labels_8 ,
                                   train_person_labels_9 ))
    
    #产生索引并置乱
    idx_train_data = list(range(train_data.shape[0]))
    np.random.shuffle(idx_train_data)

    #将训练数据置乱
    train_data = train_data[idx_train_data]
    train_labels = train_labels[idx_train_data]
    
    #============
    #验证数据分段
    val_person_data_0 , val_person_labels_0 = separate(person_0_val , label = 0 , overlap_length=0)
    val_person_data_1 , val_person_labels_1 = separate(person_1_val , label = 1 , overlap_length=0)
    val_person_data_2 , val_person_labels_2 = separate(person_2_val , label = 2 , overlap_length=0)
    val_person_data_3 , val_person_labels_3 = separate(person_3_val , label = 3 , overlap_length=0)
    val_person_data_4 , val_person_labels_4 = separate(person_4_val , label = 4 , overlap_length=0)
    val_person_data_5 , val_person_labels_5 = separate(person_5_val , label = 5 , overlap_length=0)
    val_person_data_6 , val_person_labels_6 = separate(person_6_val , label = 6 , overlap_length=0)
    val_person_data_7 , val_person_labels_7 = separate(person_7_val , label = 7 , overlap_length=0)
    val_person_data_8 , val_person_labels_8 = separate(person_8_val , label = 8 , overlap_length=0)
    val_person_data_9 , val_person_labels_9 = separate(person_9_val , label = 9 , overlap_length=0)
    
    #合并数据
    val_data = np.concatenate((val_person_data_0 , val_person_data_1 , val_person_data_2 ,
                               val_person_data_3 , val_person_data_4 , val_person_data_5 ,
                               val_person_data_6 , val_person_data_7 , val_person_data_8 ,
                               val_person_data_9 ))
    
    val_labels = np.concatenate((val_person_labels_0 , val_person_labels_1 , val_person_labels_2 ,
                                 val_person_labels_3 , val_person_labels_4 , val_person_labels_5 ,
                                 val_person_labels_6 , val_person_labels_7 , val_person_labels_8 ,
                                 val_person_labels_9 ))

    #产生索引并置乱
    idx_val_data = list(range(val_data.shape[0]))
    np.random.shuffle(idx_val_data)

    #将训练数据置乱
    val_data = val_data[idx_val_data]
    val_labels = val_labels[idx_val_data]
    
    return train_data , train_labels , val_data , val_labels

In [88]:
def shuffle(train_data , train_labels , val_data , val_labels):
    #置乱一次数据
    idx_train_data = list(range(train_data.shape[0]))
    np.random.shuffle(idx_train_data)
    
    idx_val_data = list(range(val_data.shape[0]))
    np.random.shuffle(idx_val_data)
    
    return train_data[idx_train_data] , train_labels[idx_train_data] , val_data[idx_val_data] , val_labels[idx_val_data]

In [89]:
train_X_ , train_y , val_X_ , val_y = combine(freq = 10) #10 15 20 25 hz

In [90]:
#如果没有进行前后裁剪 则输出数据会变多

print(train_X_.shape , train_y.shape , val_X_.shape , val_y.shape)

(1500, 768, 16) (1500,) (50, 768, 16) (50,)


# extract feature step

## method 1 sub band power

In [91]:
def butter_worth(data , lowcut , highcut , order=6):
    nyq = 0.5 * sample_rate
    
    lo = lowcut / nyq
    hi = highcut / nyq
    
    b,a = scipy.signal.butter(order , [lo , hi] , btype='bandpass')

    return np.array([scipy.signal.filtfilt(b , a , data[: , i]) for i in range(data.shape[1])]).reshape((-1 , origin_channel))

In [92]:
def alpha_subBP_features(data):
    alpha1 = butter_worth(data , 8.5 , 11.5)
    alpha2 = butter_worth(data , 9.0 , 12.5)    
    alpha3 = butter_worth(data , 9.5 , 13.5)   #11.5 后
    alpha4 = butter_worth(data , 8.0 , 10.5)   
    
    return np.array([alpha1 , alpha2 , alpha3 , alpha4])

def beta_subBP_features(data):
    beta1 = butter_worth(data , 15.0 , 30.0) #14.0 前
    beta2 = butter_worth(data , 16.0 , 17.0)    
    beta3 = butter_worth(data , 17.0 , 18.0)    
    beta4 = butter_worth(data , 18.0 , 19.0)    
    
    return np.array([beta1 , beta2 , beta3 , beta4])

def powermean(data):
    #官方demo跳4秒 前4秒为准备阶段
    return np.power(data[ : , 0] , 2).mean(), \
            np.power(data[ : , 1] , 2).mean(), \
            np.power(data[ : , 2] , 2).mean(), \
            np.power(data[ : , 3] , 2).mean(), \
            np.power(data[ : , 4] , 2).mean(), \
            np.power(data[ : , 5] , 2).mean(), \
            np.power(data[ : , 6] , 2).mean(), \
            np.power(data[ : , 7] , 2).mean(), \
            np.power(data[ : , 8] , 2).mean(), \
            np.power(data[ : , 9] , 2).mean(), \
            np.power(data[ : , 10] , 2).mean(), \
            np.power(data[ : , 11] , 2).mean(), \
            np.power(data[ : , 12] , 2).mean(), \
            np.power(data[ : , 13] , 2).mean(), \
            np.power(data[ : , 14] , 2).mean(), \
            np.power(data[ : , 15] , 2).mean()       

In [93]:
def log_subBP_feature_extraction(alpha , beta):
    #alpha
    power_1_a = powermean(alpha[0])
    power_2_a = powermean(alpha[1])
    power_3_a = powermean(alpha[2])
    power_4_a = powermean(alpha[3])
    
    #beta
    power_1_b = powermean(beta[0])
    power_2_b = powermean(beta[1])
    power_3_b = powermean(beta[2])
    power_4_b = powermean(beta[3])
    
    X= np.array(
        [np.log(power_1_a) ,
         np.log(power_2_a) ,
         np.log(power_3_a) ,
         np.log(power_4_a) ,
         np.log(power_1_b) ,
         np.log(power_2_b) ,
         np.log(power_3_b) ,
         np.log(power_4_b)
        ]
        ).flatten()

    return X

In [94]:
def feature_extraction_sub_band_power(data):
    n_features = 128
    X = np.zeros((data.shape[0] , n_features))
    
    for i , datum in enumerate(data):
        alpha = alpha_subBP_features(datum)
        beta = beta_subBP_features(datum)
            
        X[i, :] = log_subBP_feature_extraction(alpha , beta)

    return X

In [95]:
train_X = feature_extraction_sub_band_power(train_X_)
val_X = feature_extraction_sub_band_power(val_X_)

In [97]:
print(train_X.shape , train_y.shape , val_X.shape , val_y.shape)

(1500, 128) (1500,) (50, 128) (50,)


## method 2 DWT

In [18]:
def feature_extraction_dwt_meta(data , n):
    n_features = 48
    
    X = np.zeros((data.shape[0] , n_features))
    
    level = 5
    wavelet = 'db4'
    
    #n=3 or 4
    
    for i , datum in enumerate(data):
        coeffs_Pz  = pywt.wavedec(data = datum[:,0], wavelet=wavelet, level=level)
        coeffs_PO3 = pywt.wavedec(data = datum[:,1], wavelet=wavelet, level=level)
        coeffs_PO4 = pywt.wavedec(data = datum[:,2], wavelet=wavelet, level=level)
        coeffs_O1  = pywt.wavedec(data = datum[:,3], wavelet=wavelet, level=level)
        coeffs_O2  = pywt.wavedec(data = datum[:,4], wavelet=wavelet, level=level)
        coeffs_Oz  = pywt.wavedec(data = datum[:,5], wavelet=wavelet, level=level)
        coeffs_O9  = pywt.wavedec(data = datum[:,6], wavelet=wavelet, level=level)
        coeffs_FP2 = pywt.wavedec(data = datum[:,7], wavelet=wavelet, level=level)
        coeffs_C4  = pywt.wavedec(data = datum[:,8], wavelet=wavelet, level=level)
        coeffs_C6  = pywt.wavedec(data = datum[:,9], wavelet=wavelet, level=level)
        coeffs_CP3 = pywt.wavedec(data = datum[:,10], wavelet=wavelet, level=level)
        coeffs_CP1 = pywt.wavedec(data = datum[:,11], wavelet=wavelet, level=level)
        coeffs_CPZ = pywt.wavedec(data = datum[:,12], wavelet=wavelet, level=level)
        coeffs_CP2 = pywt.wavedec(data = datum[:,13], wavelet=wavelet, level=level)
        coeffs_CP4 = pywt.wavedec(data = datum[:,14], wavelet=wavelet, level=level)
        coeffs_PO8 = pywt.wavedec(data = datum[:,15], wavelet=wavelet, level=level)

        X[i , :] = np.array([
            np.std(coeffs_Pz [n]),   
            np.std(coeffs_PO3[n]),  
            np.std(coeffs_PO4[n]),   
            np.std(coeffs_O1 [n]),  
            np.std(coeffs_O2 [n]),  
            np.std(coeffs_Oz [n]),   
            np.std(coeffs_O9 [n]),  
            np.std(coeffs_FP2[n]),  
            np.std(coeffs_C4 [n]),
            np.std(coeffs_C6 [n]),
            np.std(coeffs_CP3[n]),
            np.std(coeffs_CP1[n]),
            np.std(coeffs_CPZ[n]),
            np.std(coeffs_CP2[n]),
            np.std(coeffs_CP4[n]),
            np.std(coeffs_PO8[n]),
            
            np.mean(coeffs_Pz [n]**2),
            np.mean(coeffs_PO3[n]**2),
            np.mean(coeffs_PO4[n]**2),
            np.mean(coeffs_O1 [n]**2),
            np.mean(coeffs_O2 [n]**2),
            np.mean(coeffs_Oz [n]**2),
            np.mean(coeffs_O9 [n]**2),
            np.mean(coeffs_FP2[n]**2),            
            np.mean(coeffs_C4 [n]**2),
            np.mean(coeffs_C6 [n]**2),
            np.mean(coeffs_CP3[n]**2),
            np.mean(coeffs_CP1[n]**2),
            np.mean(coeffs_CPZ[n]**2),
            np.mean(coeffs_CP2[n]**2),
            np.mean(coeffs_CP4[n]**2),
            np.mean(coeffs_PO8[n]**2),
            
            np.mean(coeffs_Pz [n]),
            np.mean(coeffs_PO3[n]), 
            np.mean(coeffs_PO4[n]),
            np.mean(coeffs_O1 [n]),
            np.mean(coeffs_O2 [n]),
            np.mean(coeffs_Oz [n]),
            np.mean(coeffs_O9 [n]),
            np.mean(coeffs_FP2[n]),        
            np.mean(coeffs_C4 [n]),
            np.mean(coeffs_C6 [n]),
            np.mean(coeffs_CP3[n]),
            np.mean(coeffs_CP1[n]),
            np.mean(coeffs_CPZ[n]),
            np.mean(coeffs_CP2[n]),
            np.mean(coeffs_CP4[n]),
            np.mean(coeffs_PO8[n])]).flatten()
        
    return X

def normalize(data , normalization_type = 'mean_std'):
    
    def _norm_mean_std(data):
        _mean = np.mean(data , axis=0)
        _std = np.std(data , axis=0)
        
        return (data - _mean) / _std
    
    def _norm_min_max(data):
        return (data - np.min(data)) / (np.max(data) - np.min(data))
    
    if normalization_type == 'mean_std':
        return _norm_mean_std(data)
    elif normalization_type == 'min_max':
        return _norm_min_max(data)
    else:
        raise Exception('wrong normalization type')
    
def feature_extraction_dwt(data , is_normalize = True):
    data_3 = feature_extraction_dwt_meta(data , 3) #4
    data_4 = feature_extraction_dwt_meta(data , 4) #5
    
    data_concat = np.concatenate((data_3 , data_4) , axis = -1)
    
    if is_normalize:
        return normalize(data_concat)
    else:
        return data_concat

In [363]:
train_X = feature_extraction_dwt(train_X_)

val_X = feature_extraction_dwt(val_X_)

In [365]:
print(train_X.shape , train_y.shape , val_X.shape , val_y.shape)

(360, 96) (360,) (12, 96) (12,)


In [277]:
#coeff 长度为6
#coeff  = pywt.wavedec(data = train_X_[0 , : , 0] , wavelet='db4', level=5)


# method 3 CSP

In [292]:
def covarianceMatrix(A):
    """
    协方差矩阵
    
    A如果为 通道数*数据点 则 np.dot(A , A.T)
    """
    #Ca = np.cov(A)
    A_dot_A_T = np.dot(A.T , A.T.T)
    
    return A_dot_A_T / np.trace(A_dot_A_T)

In [293]:
def spatialFilter(Ra,Rb):
    """
    获取空间过滤器
    
    :Ra: 平均协方差矩阵
    :Rb: 平均协方差矩阵    
    """

    R = Ra + Rb    
    E,U = la.eig(R) #E:特征值 U:特征向量

    ord = np.argsort(E) #升序索引
    ord = ord[::-1] #降序索引
    E = E[ord]
    U = U[:,ord]

    #白化矩阵
    P = np.dot(np.sqrt(la.inv(np.diag(E))) , np.transpose(U)) #矩阵的逆 ...

    #转换平均协方差矩阵
    Sa = np.dot(P , np.dot(Ra,np.transpose(P)))
    Sb = np.dot(P , np.dot(Rb,np.transpose(P)))

    #求广义特征值和特征向量 并进行排序（降序）
    E1,U1 = la.eig(Sa,Sb)
    ord1 = np.argsort(E1)
    ord1 = ord1[::-1]
    E1 = E1[ord1]
    U1 = U1[:,ord1]

    #计算投影矩阵（空间滤波器）
    SFa = np.dot(np.transpose(U1),P)
    
    return SFa

In [294]:
def mean_covarianceMatrix(A):
    '''
    计算平均协方差矩阵
    '''
    Rx = covarianceMatrix(A[0])
    
    for i in range(1 , len(A)):
        Rx += covarianceMatrix(A[i])

    return Rx / len(A)
    
def CSP(task_a , task_b):
    """
    :task_a: 一个segment信号矩阵
    :task_b: 一个segment信号矩阵
    
    获取CSP特征
    """
    mean_cov_a = mean_covarianceMatrix(task_a)
    mean_cov_b = mean_covarianceMatrix(task_b)

    filters = spatialFilter(mean_cov_a , mean_cov_b)

    return np.array(filters)

In [295]:
def combine_CSP():
    '''
    CSP使用的读取数据函数
    '''
    ratio = 0.9 #训练集的占比
    overlap_length = 2*256 #重叠2秒数据
    
    person_0_filenames = os.listdir('real_data/eeg_11.24/circle/1/10hz/')
    person_1_filenames = os.listdir('real_data/eeg_11.24/circle/2/10hz/')
    person_2_filenames = os.listdir('real_data/eeg_11.24/circle/3/10hz/')

    #打开信号文件 并 合并
    person_0 = np.concatenate([load_data('real_data/eeg_11.24/circle/1/10hz/' + filename) for filename in person_0_filenames] , axis = 0)
    person_1 = np.concatenate([load_data('real_data/eeg_11.24/circle/2/10hz/' + filename) for filename in person_1_filenames] , axis = 0)
    person_2 = np.concatenate([load_data('real_data/eeg_11.24/circle/3/10hz/' + filename) for filename in person_2_filenames] , axis = 0)

    person_0_train , person_0_val = train_val(person_0)
    person_1_train , person_1_val = train_val(person_1)
    person_2_train , person_2_val = train_val(person_2)
    
    '''++++'''
    #数据分段阶段
    
    #============
    #训练数据分段
    train_person_data_0 , train_person_labels_0 = separate(person_0_train , label = 0 , overlap_length=overlap_length)
    train_person_data_1 , train_person_labels_1 = separate(person_1_train , label = 1 , overlap_length=overlap_length)
    train_person_data_2 , train_person_labels_2 = separate(person_2_train , label = 2 , overlap_length=overlap_length)

    #合并数据
    #train_data = np.concatenate((train_person_data_0 , train_person_data_1 , train_person_data_2))
    #train_labels = np.concatenate((train_person_labels_0 , train_person_labels_1 , train_person_labels_2))
    
    #============
    #验证数据分段
    val_person_data_0 , val_person_labels_0 = separate(person_0_val , label = 0 , overlap_length=0)
    val_person_data_1 , val_person_labels_1 = separate(person_1_val , label = 1 , overlap_length=0)
    val_person_data_2 , val_person_labels_2 = separate(person_2_val , label = 2 , overlap_length=0)
    
    #合并数据
    val_data = np.concatenate((val_person_data_0 , val_person_data_1 , val_person_data_2))
    val_labels = np.concatenate((val_person_labels_0 , val_person_labels_1 , val_person_labels_2))

    '''++++'''
    
    return [train_person_data_0 , train_person_data_1 , train_person_data_2] , val_data , val_labels
    

In [296]:
def get_ovo_idx(data):
    '''
    获取不同label的数据组合情况 ovo方式
    '''
    ovo_idx = []

    for i in range(len(data) - 1):
        for j in range(i+1 , len(data)):
            ovo_idx.append( (i , j) )
         
    return ovo_idx

In [297]:
def get_spatial_filter(data):
    '''
    only tasks_train
    只从训练数据中产生spatial filter matrix
    '''
    # ovo方式 需要两两信号的空间滤波器
    ovo_idx = get_ovo_idx(data) #获取两两组合的信号索引对
    sf_s = [] #多少个两两信号对 就有多少的空间滤波器
    
    for i , j in ovo_idx:
        sf = CSP(data[i] , data[j])
        
        sf_s.append(sf)
        
    return sf_s #3*channels*channels

In [341]:
'''
丢弃虚部
'''

def CSP_feature(data , sf , label):
    '''
    训练数据使用
    '''
    m = 2 # or 3
    double_m = 2*m # 4 or 6
    
    sum_var_z_iterator = 0
    sum_var_z = []
    
    z = []
    
    for datum in data:
        var = np.var(np.dot(sf , datum.T).T.real) #768*16 > 1
        
        if double_m > 0:
            sum_var_z_iterator = sum_var_z_iterator + var
            
            sum_var_z.append(sum_var_z_iterator)
            
            double_m = double_m - 1
        
        z.append(var) #只取实部
        
    z = np.array(z)
    
    z_csp = []
    
    for var_z in sum_var_z:
        z_csp.append(z/var_z)
       
    z_csp = np.array(z_csp) #4*mm mm为样本数量
    
    z_csp = z_csp.T #mm*4
    
    #传入机器学习算法
    return np.log(z_csp) , np.array([label] * len(data))


def CSP_feature_v(data , sf):
    '''
    验证数据使用
    '''
    m = 2 # or 3
    double_m = 2*m # 4 or 6
    
    sum_var_z_iterator = 0
    sum_var_z = []
    
    z = []
    
    for datum in data:
        var = np.var(np.dot(sf , datum.T).T.real) #768*16 > 1
        
        if double_m > 0:
            sum_var_z_iterator = sum_var_z_iterator + var
            
            sum_var_z.append(sum_var_z_iterator)
            
            double_m = double_m - 1
        
        z.append(var) #只取实部
    
    z = np.array(z)
    
    z_csp = []
    
    for var_z in sum_var_z:
        z_csp.append(z/var_z)
       
    z_csp = np.array(z_csp) #4*mm mm为样本数量
    
    z_csp = z_csp.T #mm*4
            
    #传入机器学习算法
    return np.log(z_csp)

In [317]:
def feature_extraction_CSP(data , sf_s):
    '''
    训练数据使用
    :data: 数据
    :sf_s: spatial filter列表（ovo形式）
    '''
    ovo_idx = get_ovo_idx(data)
    z_x = {}
      
    for sf_idx , (i , j) in enumerate(ovo_idx):
        #两个脑电信号数据向共同的空间滤波器 投影
        #标签使用各自的索引
        z_i , labels_i = CSP_feature(data[i] , sf_s[sf_idx] , i)
        z_j , labels_j = CSP_feature(data[j] , sf_s[sf_idx] , j)
        
        z = np.concatenate((z_i , z_j))
        labels = np.concatenate((labels_i , labels_j))
        
        #数据置乱
        idx_z = list(range(z.shape[0]))
        np.random.shuffle(idx_z)

        #将训练数据置乱
        z = z[idx_z]
        labels = labels[idx_z]
        
        z_x[(i , j)] = (z , labels)
        
    return z_x

def feature_extraction_CSP_v(data , sf_s):
    '''
    测试数据使用
    :data: 数据
    :sf_s: spatial filter列表（ovo形式）
    '''
    ovo_idx = get_ovo_idx(tasks_train) #勿修改
    
    z_v = {}
      
    for sf_idx , (i , j) in enumerate(ovo_idx):
        z = CSP_feature_v(data , sf_s[sf_idx])
        
        z_v[(i , j)] = z
        
    return z_v

In [318]:
def ovo_train(Classifier_Meta_Class , z_x):
    '''
    :Classifier_Meta_Class: 分类器元类（构造函数）
    :z_x: 向空间滤波器投影后的训练数据
    '''
    # 有几个信号对 就训练多少分类器
    models = {}
    
    for key , _ in z_x.items():
        model = Classifier_Meta_Class() #创建一个分类器
        
        model.fit(z_x[key][0] , z_x[key][1])
        
        models[key] = model
    
    return models

def ovo_predict(models , z_v):
    '''
    :models: 训练好的模型
    :z_v: CSP特征的验证集
    '''
    
    labels_hat = {}
    
    for key , _ in models.items():
        labels_hat[key] = models[key].predict(z_v[key])
            
    labels_mat = np.concatenate( [np.expand_dims(value , axis=0) for _ , value in labels_hat.items()] )
    
    return np.array( [ np.argmax( np.bincount(labels_mat[: , j]) ) for j in range(labels_mat.shape[1]) ] )
    
def con_mat_v(_real_labels , _labels):
    '''
    打印训练结果(CSP edition)
    '''
    print('CSP')
    print('val score:%f' % ( np.sum(np.equal(_real_labels , _labels)) / len(_real_labels) ) )
    print('real')
    
    print(confusion_matrix(_real_labels , _labels))
    print(classification_report(_real_labels , _labels))
    


In [342]:
tasks_train , val_data , val_labels = combine_CSP()

In [343]:
sf_s = get_spatial_filter(tasks_train) #由训练数据获取空间滤波器

In [344]:
#训练数据 投影
z_x = feature_extraction_CSP(tasks_train , sf_s) #训练数据向空间滤波器投影

In [345]:
#验证阶段

#验证数据 投影
z_v = feature_extraction_CSP_v(val_data , sf_s)

In [346]:
classifiers_meta_class = [xgboost.XGBClassifier , lightgbm.LGBMClassifier , GradientBoostingClassifier ,
                          RandomForestClassifier , SVC , DecisionTreeClassifier , LDA , AdaBoostClassifier ,
                          MLPClassifier , GaussianNB , KNeighborsClassifier ]

def ensemble_voting_CSP_train(data):
    '''
    :data: 训练数据 向空间滤波器投影后的数据 z_x
    '''
    models = []
    
    for classifier_meta_class in classifiers_meta_class:
        model = ovo_train(classifier_meta_class , data)
        
        models.append(model)
        
    return models

def ensemble_voting_CSP_predict(data , models):
    '''
    :data: 验证数据 向空间滤波器投影后的数据 z_v
    '''
    
    labels = []
    
    for model in models:
        label = ovo_predict(model , data)
        
        labels.append(label)
    
    labels_mat = np.concatenate( [ np.expand_dims(value , axis=0) for value in labels ] )
    
    return np.array( [ np.argmax( np.bincount(labels_mat[: , j]) ) for j in range(labels_mat.shape[1]) ] )
    

In [347]:
models = ensemble_voting_CSP_train(z_x)

In [348]:
z_v_y = ensemble_voting_CSP_predict(z_v , models)

In [349]:
con_mat_v(val_labels , z_v_y)

CSP
val score:0.416667
real
[[0 0 2]
 [0 0 5]
 [0 0 5]]
             precision    recall  f1-score   support

          0       0.00      0.00      0.00         2
          1       0.00      0.00      0.00         5
          2       0.42      1.00      0.59         5

avg / total       0.17      0.42      0.25        12



# method 4 rms feature

In [286]:
def feature_extraction_RMS(data):
    def rms(datum):
        '''
        :datum: 一段信号 shape : (3*256) * 16
        '''
        return [ np.sqrt(np.mean(np.square( d ))) for d in datum.T ]
    
    feature_rms = []
    
    for datum in data:
        feature_rms.append(rms(datum))
    
    return np.array(feature_rms)

# feature selection （CSP不使用）

# classify step
# ensemble voting

In [98]:
def con_mat(_feature , _labels , model):
    '''
    打印训练结果
    '''
    
    print('val score:%f' % model.score(_feature , _labels))
    print('real')
    
    print(confusion_matrix(_labels , model.predict(_feature)))
    print(classification_report(_labels , model.predict(_feature)))
    


In [99]:
def feature_selection(data , labels , model , num_features , cv=10):
    '''
    :model: classify model
    :num_features: features count you expect(integer or tuple)
    '''
    
    '''[8 20]'''
    
    sfs = SequentialFeatureSelector(model , k_features=num_features , cv=cv , verbose = 2 , n_jobs=-1) #all cpu cores
    
    sfs.fit(data , labels)
    
    #最优秀的特征索引
    return sfs.k_feature_idx_



def choose_common_feature_idx(num_features = (8 , 20) , num_features_threshold = 8):
    '''
    sub_band_power使用该函数 进行筛选特征
    
    :num_features:integer or tuple 期望的特征数量（待选择的数量）
    :min_num_features: 特征数量阈值 小于时 停止选择
    选择适合所有分类器的特征索引值
    集合 与 运算
    '''
    
    feature_idxes = set(list(range(train_X.shape[1]))) #初始化为所有的特征索引值
    
    for classifier in classifiers:
        
        idx = feature_selection(train_X , train_y , classifier , num_features)
        idx = set(idx)
        
        #寻找共同的特征索引
        #寻找之前先测试 如果小于阈值 直接停止
        if len(feature_idxes & idx) < num_features_threshold:
            break
            
        feature_idxes = feature_idxes & idx
        
    return np.array(list(feature_idxes))

In [100]:
#初始化所有分类器
svc = SVC(probability=True)
rf =  RandomForestClassifier()
adaboost = AdaBoostClassifier()
xgb = xgboost.XGBClassifier()
lgbm = lightgbm.LGBMClassifier()
gbc = GradientBoostingClassifier()
knn = KNeighborsClassifier()
dt = DecisionTreeClassifier()
lda = LDA()
nb = GaussianNB()
mlp = MLPClassifier()

classifiers = [xgb , lgbm , gbc , rf , svc , dt , lda , adaboost , mlp , nb , knn ]

In [41]:
chosen_idx = choose_common_feature_idx()

[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   14.3s
[Parallel(n_jobs=-1)]: Done 128 out of 128 | elapsed:   19.1s finished

[2018-12-20 15:03:46] Features: 1/20 -- score: 0.861111111111111[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   13.7s
[Parallel(n_jobs=-1)]: Done 127 out of 127 | elapsed:   19.5s finished

[2018-12-20 15:04:07] Features: 2/20 -- score: 0.9222222222222222[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   14.7s
[Parallel(n_jobs=-1)]: Done 126 out of 126 | elapsed:   21.3s finished

[2018-12-20 15:04:30] Features: 3/20 -- score: 0.9333333333333332[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   14.4s
[Parallel(n_jobs=-1)]: Done 125 out of 125 | elapsed:   21.1s finished

[2018-12-20 15:04:52] Features: 4/20 -- score: 0.9472222222222222[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   14.1s
[Parallel(n_jobs=-1)]: Done 124 out of 124 | elapsed:   21.6s finished

[2018-12-20 15:05:15] Features: 5/20 -- score: 0.9583333333333334[Paral

In [42]:
chosen_idx

array([ 2, 34, 10, 11, 74, 13, 53, 22])

In [101]:
#*****************
#*****************
chosen_idx=np.array(list(range(train_X.shape[1])))
#*****************
#*****************

In [102]:
train_X_chosen = train_X[: , chosen_idx]

In [103]:
train_X_chosen.shape

(1500, 128)

In [104]:
voting = VotingClassifier(estimators=[('xgboost' , xgb) , ('lightgbm' , lgbm) , ('gradientboosting' , gbc) ,
                             ('randomforest' , rf) , ('svm' , svc) , ('decisiontree' , dt) , ('lda' , lda) ,
                             ('adaboost' , adaboost) , ('mlp' , mlp) , ('naive bayes' , nb) , ('kneighbors' , knn)] ,
                voting='soft' , n_jobs=-1)

In [105]:
voting.fit(train_X_chosen , train_y)

VotingClassifier(estimators=[('xgboost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', ra...owski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'))],
         n_jobs=-1, voting='soft', weights=None)

In [107]:
val_X_chosen = val_X[: , chosen_idx]

In [108]:
val_X_chosen.shape

(50, 128)

In [109]:
con_mat(val_X_chosen , val_y , voting)

val score:0.880000
real
[[5 0 0 0 0 0 0 0 0 0]
 [0 5 0 0 0 0 0 0 0 0]
 [0 0 4 0 0 0 1 0 0 0]
 [0 0 0 5 0 0 0 0 0 0]
 [0 0 0 0 3 1 0 1 0 0]
 [0 0 0 0 0 5 0 0 0 0]
 [0 0 0 0 0 0 5 0 0 0]
 [0 0 0 0 1 1 0 3 0 0]
 [0 0 0 0 0 0 0 1 4 0]
 [0 0 0 0 0 0 0 0 0 5]]
             precision    recall  f1-score   support

          0       1.00      1.00      1.00         5
          1       1.00      1.00      1.00         5
          2       1.00      0.80      0.89         5
          3       1.00      1.00      1.00         5
          4       0.75      0.60      0.67         5
          5       0.71      1.00      0.83         5
          6       0.83      1.00      0.91         5
          7       0.60      0.60      0.60         5
          8       1.00      0.80      0.89         5
          9       1.00      1.00      1.00         5

avg / total       0.89      0.88      0.88        50



In [49]:
'''测试阶段 进行分类器的测试 使用测试数据'''

test_data , test_labels = combine_test()

test_X = feature_extraction_sub_band_power(test_data)

#test_X = feature_extraction_dwt(test_data) #效果差


'''测试阶段 进行分类器的测试 使用测试数据'''

'测试阶段 进行分类器的测试 使用测试数据'

In [50]:
test_X_chosen = test_X[: , chosen_idx]

In [51]:
test_X_chosen.shape

(168, 8)

In [52]:
con_mat(test_X_chosen , test_labels , voting)

val score:0.636905
real
[[52  2  2]
 [ 1 55  0]
 [ 0 56  0]]
             precision    recall  f1-score   support

          0       0.98      0.93      0.95        56
          1       0.49      0.98      0.65        56
          2       0.00      0.00      0.00        56

avg / total       0.49      0.64      0.54       168



In [192]:
voting.predict(test_X_chosen)

array([2, 1, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2,
       0, 2, 2, 1, 0, 2, 1, 2, 2, 2, 0, 1, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 0, 2, 1, 2, 2, 1, 2, 1, 2, 2, 0,
       2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 0, 2,
       0, 2, 1, 1, 2, 2, 2, 2, 1, 2, 0, 2, 2, 1, 1, 2, 1, 0, 1, 0, 1, 1,
       1, 2, 2, 2, 2, 1, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1])

In [193]:
test_labels

array([0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1,
       0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0])

### rms voting

In [777]:
voting.fit(feature_extraction_RMS(train_X_) , train_y)

VotingClassifier(estimators=[('xgboost', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', ra...owski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'))],
         n_jobs=-1, voting='soft', weights=None)

In [778]:
con_mat(feature_extraction_RMS(val_X_) , val_y , voting)

val score:1.000000
real
[[2 0 0]
 [0 5 0]
 [0 0 5]]
             precision    recall  f1-score   support

          0       1.00      1.00      1.00         2
          1       1.00      1.00      1.00         5
          2       1.00      1.00      1.00         5

avg / total       1.00      1.00      1.00        12



# simple classifier test

## SVM

In [503]:
svc.fit(train_X[ : , idx_good] , train_y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [224]:
con_mat(val_X[: , idx_good] , val_y , svc)

val score:0.916667
real
[[1 0 1]
 [0 5 0]
 [0 0 5]]
             precision    recall  f1-score   support

          0       1.00      0.50      0.67         2
          1       1.00      1.00      1.00         5
          2       0.83      1.00      0.91         5

avg / total       0.93      0.92      0.91        12



## RandomForest

In [226]:
rf.fit(train_X[: , idx_good] , train_y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

In [228]:
con_mat(val_X[: , idx_good] , val_y , rf)

val score:0.916667
real
[[1 0 1]
 [0 5 0]
 [0 0 5]]
             precision    recall  f1-score   support

          0       1.00      0.50      0.67         2
          1       1.00      1.00      1.00         5
          2       0.83      1.00      0.91         5

avg / total       0.93      0.92      0.91        12



## Adaboost

In [231]:
adaboost.fit(train_X[: , idx_good] , train_y)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None,
          learning_rate=1.0, n_estimators=50, random_state=None)

In [232]:
con_mat(val_X[: , idx_good] , val_y , adaboost)

val score:0.833333
real
[[0 0 2]
 [0 5 0]
 [0 0 5]]
             precision    recall  f1-score   support

          0       0.00      0.00      0.00         2
          1       1.00      1.00      1.00         5
          2       0.71      1.00      0.83         5

avg / total       0.71      0.83      0.76        12



  'precision', 'predicted', average, warn_for)


## XGBoost

In [121]:
xgb.fit(train_X , train_y)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='multi:softprob', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [122]:
con_mat(val_X , val_y , xgb)

val score:0.583333
real
[[2 0 0]
 [0 5 0]
 [5 0 0]]
             precision    recall  f1-score   support

          0       0.29      1.00      0.44         2
          1       1.00      1.00      1.00         5
          2       0.00      0.00      0.00         5

avg / total       0.46      0.58      0.49        12



  if diff:
  if diff:
  if diff:
  'precision', 'predicted', average, warn_for)


## LightGBM

In [124]:
lgbm.fit(train_X , train_y)

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
        importance_type='split', learning_rate=0.1, max_depth=-1,
        min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
        n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
        subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [125]:
con_mat(val_X , val_y , lgbm)

val score:0.583333
real
[[2 0 0]
 [0 5 0]
 [5 0 0]]
             precision    recall  f1-score   support

          0       0.29      1.00      0.44         2
          1       1.00      1.00      1.00         5
          2       0.00      0.00      0.00         5

avg / total       0.46      0.58      0.49        12



  if diff:
  if diff:
  if diff:
  'precision', 'predicted', average, warn_for)


## GradientBoosting

In [127]:
gbc.fit(train_X , train_y)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_split=1e-07, min_samples_leaf=1,
              min_samples_split=2, min_weight_fraction_leaf=0.0,
              n_estimators=100, presort='auto', random_state=None,
              subsample=1.0, verbose=0, warm_start=False)

In [128]:
con_mat(val_X , val_y , gbc)

val score:0.583333
real
[[2 0 0]
 [0 5 0]
 [5 0 0]]
             precision    recall  f1-score   support

          0       0.29      1.00      0.44         2
          1       1.00      1.00      1.00         5
          2       0.00      0.00      0.00         5

avg / total       0.46      0.58      0.49        12



  'precision', 'predicted', average, warn_for)


## KNeighbors

In [130]:
knn.fit(train_X , train_y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [131]:
con_mat(val_X , val_y , knn)

val score:0.416667
real
[[0 2 0]
 [0 5 0]
 [0 5 0]]
             precision    recall  f1-score   support

          0       0.00      0.00      0.00         2
          1       0.42      1.00      0.59         5
          2       0.00      0.00      0.00         5

avg / total       0.17      0.42      0.25        12



  'precision', 'predicted', average, warn_for)


## DecisionTree

In [133]:
dt.fit(train_X , train_y)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=None, splitter='best')

In [134]:
con_mat(val_X , val_y , dt)

val score:0.583333
real
[[2 0 0]
 [0 5 0]
 [5 0 0]]
             precision    recall  f1-score   support

          0       0.29      1.00      0.44         2
          1       1.00      1.00      1.00         5
          2       0.00      0.00      0.00         5

avg / total       0.46      0.58      0.49        12



  'precision', 'predicted', average, warn_for)


## LDA

In [136]:
lda.fit(train_X , train_y)



LDA(n_components=None, priors=None, shrinkage=None, solver='svd',
  store_covariance=False, tol=0.0001)

In [137]:
con_mat(val_X , val_y , lda)

val score:1.000000
real
[[2 0 0]
 [0 5 0]
 [0 0 5]]
             precision    recall  f1-score   support

          0       1.00      1.00      1.00         2
          1       1.00      1.00      1.00         5
          2       1.00      1.00      1.00         5

avg / total       1.00      1.00      1.00        12



## Naive Bayes

In [139]:
nb.fit(train_X , train_y)

GaussianNB(priors=None)

In [140]:
con_mat(val_X , val_y , nb)

val score:0.416667
real
[[0 2 0]
 [0 5 0]
 [0 5 0]]
             precision    recall  f1-score   support

          0       0.00      0.00      0.00         2
          1       0.42      1.00      0.59         5
          2       0.00      0.00      0.00         5

avg / total       0.17      0.42      0.25        12



  'precision', 'predicted', average, warn_for)


## MLP

In [142]:
mlp.fit(train_X , train_y)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [143]:
con_mat(val_X , val_y , mlp)

val score:0.416667
real
[[0 2 0]
 [0 5 0]
 [0 5 0]]
             precision    recall  f1-score   support

          0       0.00      0.00      0.00         2
          1       0.42      1.00      0.59         5
          2       0.00      0.00      0.00         5

avg / total       0.17      0.42      0.25        12



  'precision', 'predicted', average, warn_for)


# 匹配法
 
## 前向特征选择之后 使用匹配法进行

In [446]:
def train_val(data , ratio = 0.9):
    '''
    将数据分为 训练集 和 验证集
    '''
    
    seg = int(ratio * data.shape[0])
    
    return data[ : seg] , data[seg : ]


def combine_match():
    '''
    匹配法中使用
    训练数据与验证数据
    数据不进行置乱
    '''
    ratio = 0.9 #训练集的占比
    overlap_length = 2*256 #重叠2秒数据
    
    person_0_filenames = os.listdir('real_data/eeg_11.24/circle/1/10hz/')
    person_1_filenames = os.listdir('real_data/eeg_11.24/circle/2/10hz/')
    person_2_filenames = os.listdir('real_data/eeg_11.24/circle/3/10hz/')

    #打开信号文件 并 合并
    person_0 = np.concatenate([load_data('real_data/eeg_11.24/circle/1/10hz/' + filename) for filename in person_0_filenames] , axis = 0)
    person_1 = np.concatenate([load_data('real_data/eeg_11.24/circle/2/10hz/' + filename) for filename in person_1_filenames] , axis = 0)
    person_2 = np.concatenate([load_data('real_data/eeg_11.24/circle/3/10hz/' + filename) for filename in person_2_filenames] , axis = 0)

    person_0_train , person_0_val = train_val(person_0)
    person_1_train , person_1_val = train_val(person_1)
    person_2_train , person_2_val = train_val(person_2)
    
    #数据分段阶段
    
    #============
    #训练数据分段
    train_person_data_0 , train_person_labels_0 = separate(person_0_train , label = 0 , overlap_length=overlap_length)
    train_person_data_1 , train_person_labels_1 = separate(person_1_train , label = 1 , overlap_length=overlap_length)
    train_person_data_2 , train_person_labels_2 = separate(person_2_train , label = 2 , overlap_length=overlap_length)

    #合并数据
    train_data = [train_person_data_0 , train_person_data_1 , train_person_data_2]
    train_labels = [train_person_labels_0 , train_person_labels_1 , train_person_labels_2]
    
    #============
    #验证数据分段
    val_person_data_0 , val_person_labels_0 = separate(person_0_val , label = 0 , overlap_length=0)
    val_person_data_1 , val_person_labels_1 = separate(person_1_val , label = 1 , overlap_length=0)
    val_person_data_2 , val_person_labels_2 = separate(person_2_val , label = 2 , overlap_length=0)
    
    #合并数据
    val_data = [val_person_data_0 , val_person_data_1 , val_person_data_2]
    val_labels = [val_person_labels_0 , val_person_labels_1 , val_person_labels_2]

    return train_data , train_labels , val_data , val_labels


def combine_test_m():
    '''
    匹配使用
    测试模型效果
    不引
    '''
    
    person_0_filenames = os.listdir('real_data/model_validation/circle/1/10hz/')
    person_1_filenames = os.listdir('real_data/model_validation/circle/2/10hz/')

    #打开信号文件 并 合并
    person_0 = np.concatenate([load_data('real_data/model_validation/circle/1/10hz/' + filename) for filename in person_0_filenames] , axis = 0)
    person_1 = np.concatenate([load_data('real_data/model_validation/circle/2/10hz/' + filename) for filename in person_1_filenames] , axis = 0)
    
    #============
    #训练数据分段
    test_person_data_0 , test_person_labels_0 = separate(person_0 , label = 0 , overlap_length=0)
    test_person_data_1 , test_person_labels_1 = separate(person_1 , label = 1 , overlap_length=0)

    #合并数据
    test_data = [test_person_data_0 ,   test_person_data_1] 
    test_labels = [test_person_labels_0 , test_person_labels_1]
    
    return test_data , test_labels


def con_mat_m(_real_labels , _labels):
    '''
    匹配方法使用 可视化
    :_real_labels: 真实label
    :_labels: 预测的label
    '''
    
    print('match')
    print('val score:%f' % ( np.sum( np.equal( np.concatenate(_real_labels) , np.concatenate(_labels) ) ) / len( np.concatenate(_labels) ) ) )
    print('real')
    
    print(confusion_matrix( np.concatenate(_real_labels) , np.concatenate(_labels) ))
    print(classification_report( np.concatenate(_real_labels) , np.concatenate(_labels) ))

In [396]:
database_data , database_labels , oob_data , oob_labels = combine_match()

## method 1 : sub-band power

In [408]:
#3 个元素 每个元素为一个受试者的特征信息
database_data_sbp = [ feature_extraction_sub_band_power(database_datum) for database_datum in database_data ]

oob_data_sbp = [ feature_extraction_sub_band_power(oob_datum) for oob_datum in oob_data ]

In [426]:
#相似度函数 越相似 相似度越小（为了容易使用欧氏距离与曼哈顿距离定义相似度） 直接使用距离来度量相似性

def match(database_data_sbp , oob_data_sbp , distance_type = 'c'):
    '''
    :database_data_sbp: 充当数据库
    :oob_data_sbp: 来进行匹配的
    '''
    
    distances = [] 
    
    if distance_type == 'o':
        #欧氏距离
        distance_method = euclidean_distances
    elif distance_type == 'm':
        #曼哈顿距离
        distance_method = manhattan_distances
    elif distance_type == 'c':
        #余弦距离
        distance_method = cosine_distances
    else:
        pass
    
    labels = [] #数据库中匹配的标记
    
    for oob_datum_sbp in oob_data_sbp: #
        
        labels_one_subject = []
        for oob_datum_sbp_sub in oob_datum_sbp:
            #进行匹配的 逐个计算
            distance = []
            for database_datum_sbp in database_data_sbp:
                #与每一个受试者的特征进行距离计算
                distance.append( np.mean( distance_method(oob_datum_sbp_sub , database_datum_sbp) ) ) #距离的平均值
                
            label = np.argmin(distance)
            
            labels_one_subject.append(label)
        
        labels.append(labels_one_subject)
    
    return labels

In [427]:
oob_labels_hat = match(database_data_sbp , oob_data_sbp)

In [428]:
oob_labels_hat

[[0, 0], [1, 1, 1, 1, 1], [2, 2, 2, 0, 2]]

In [429]:
oob_labels

[array([0, 0]), array([1, 1, 1, 1, 1]), array([2, 2, 2, 2, 2])]

In [445]:
#验证
con_mat_m(oob_labels , oob_labels_hat)

match
val score:0.916667
real
[[2 0 0]
 [0 5 0]
 [1 0 4]]
             precision    recall  f1-score   support

          0       0.67      1.00      0.80         2
          1       1.00      1.00      1.00         5
          2       1.00      0.80      0.89         5

avg / total       0.94      0.92      0.92        12



In [447]:
#测试
oob_test_data , oob_test_labels = combine_test_m()

oob_test_data_sbp = [feature_extraction_sub_band_power(oob_test_datum) for oob_test_datum in oob_test_data]

In [448]:
oob_test_labels_hat = match(database_data_sbp , oob_test_data_sbp)

In [449]:
con_mat_m(oob_test_labels , oob_test_labels_hat)

match
val score:0.937500
real
[[49  3  4]
 [ 0 56  0]
 [ 0  0  0]]
             precision    recall  f1-score   support

          0       1.00      0.88      0.93        56
          1       0.95      1.00      0.97        56
          2       0.00      0.00      0.00         0

avg / total       0.97      0.94      0.95       112



## method 2 : DWT

## method 3 : RMS

In [168]:
class EEG(gp.data.Dataset):
    def __init__(self , base_dir='none' , identifier='none' , **kwargs):
        super(EEG, self).__init__(**kwargs)
        
        self.base_dir = 'none'
        self.data_id = 'none'
        self.data_dir = 'none'
        self.data_type = 'EEG'
        self.data_name = 'EEG'
        
        
        # length of a trial (in seconds)
        self.trial_len = trial_time
        # motor imagery appears in interval (in seconds)
        self.mi_interval = [start_trial_time , end_trial_time]
        # idle perior prior to start of signal (in seconds)
        self.trial_offset = trial_offset
        # total length of a trial (in seconds)
        self.trial_total = self.trial_len
        # sampling frequency (in Hz)
        self.sampling_freq = sample_rate #gtec为256
    
    def _sep(self , data , label):
        train_data = []
        train_labels = []

        size = self.sampling_freq * self.trial_len
        data_length = data.shape[0]

        idx = 0
        
        while idx<data_length-size:
            train_data.append(data[idx : idx+size , :])
            train_labels.append(label)

            #no overlap
            idx = idx + size
        
        return np.array(train_data) , np.array(train_labels)
    
    def _butter_worth(self , data):
        b,a = scipy.signal.butter(4 , [LO_FREQ / sample_rate , HI_FREQ / sample_rate] , btype='bandpass')
    
        return np.array([scipy.signal.filtfilt(b , a , data[: , i]) for i in range(data.shape[1])]).reshape((-1 , origin_channel))
    
    def _notch(self , data):
        cutoff = NOTCH_FREQ #50
        Q = 30

        nyq = 0.5 * sample_rate
        w0 = cutoff / nyq

        b , a = scipy.signal.iirnotch(w0 , Q)

        return np.array([scipy.signal.filtfilt(b , a , data[: , i]) for i in range(data.shape[1])]).reshape((-1 , origin_channel))
    
    def _normalize(self , data):
        mean = np.mean(data, axis=0)
        std_dev = np.std(data, axis=0)
        
        return (data - mean) / std_dev
    
    def _load_data(self , filename):
        data = sio.loadmat(file_name=filename)['data_received'] #length*16 matrix
        
        #此通道没有采集 置为0
        #全通道均使用时 不需要
        #for i in range(len(SAMPLE_CHANNEL)):
        #    if SAMPLE_CHANNEL[i] == 'none':
        #        data[: , i] = 0.0
        
        #删除前x秒和后x秒数据
        data = data[CLIP_FORWARD * self.sampling_freq : - CLIP_BACKWARD * self.sampling_freq]
                
        return data
    
    def _concatenate(self , eeg_matrix):
        return np.concatenate(eeg_matrix , axis=0)
    
    def _combine(self):
        person_0_filenames = os.listdir('real_data/eeg_11.24/circle/1/10hz/')
        person_1_filenames = os.listdir('real_data/eeg_11.24/circle/2/10hz/')
        person_2_filenames = os.listdir('real_data/eeg_11.24/circle/3/10hz/')

        person_0 = [self._load_data('real_data/eeg_11.24/circle/1/10hz/' + filename) for filename in person_0_filenames]
        person_1 = [self._load_data('real_data/eeg_11.24/circle/2/10hz/' + filename) for filename in person_1_filenames]
        person_2 = [self._load_data('real_data/eeg_11.24/circle/3/10hz/' + filename) for filename in person_2_filenames]

        person_0 = self._concatenate(person_0)
        person_1 = self._concatenate(person_1)
        person_2 = self._concatenate(person_2)
       
        #数据分段
        train_person_data_0 , train_person_labels_0 = self._sep(person_0 , label = 0)
        train_person_data_1 , train_person_labels_1 = self._sep(person_1 , label = 1)
        train_person_data_2 , train_person_labels_2 = self._sep(person_2 , label = 2)

        #产生索引并置乱
        idx_train_data = list(range(len(train_person_data_0) + len(train_person_data_1)+ len(train_person_data_2)))
        
        np.random.shuffle(idx_train_data)

        #合并数据
        train_data = np.concatenate((train_person_data_0 , train_person_data_1 , train_person_data_2))
        train_labels = np.concatenate((train_person_labels_0 , train_person_labels_1 , train_person_labels_2))

        #将训练数据打乱
        train_data = train_data[idx_train_data]
        train_labels = train_labels[idx_train_data] ##self.labels
        
        #只使用下面的成员
        self.data = train_data
        self.labels = train_labels
                
        #=================
        #=================
        #合并为sequence
        train_data_sequence = np.reshape(train_data , newshape=(-1 , origin_channel)) #length*channel

        train_data_count = train_data.shape[0]

        #trial索引
        trial_idx = np.array([i*(self.sampling_freq * self.trial_len) for i in range(train_data_count)]) ##self.trial
        
        #构建子类成员
        self.raw_data = train_data_sequence
        self.trials = trial_idx
        
        #=================
        
        
    def load(self, **kwargs):
        self._combine()

#引用方式       
dataset = EEG()
dataset.load()