In [2]:
import gumpy as gp
import numpy as np
import pywt

#绘制confusion matrix heatmap
import seaborn as sns

import sklearn
import os

import warnings

warnings.simplefilter('ignore') #忽略警告

In [3]:
import scipy
import scipy.io as sio

from scipy import linalg

import pandas as pd

#分类器
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.lda import LDA
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

import xgboost
import lightgbm

#模型集成
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier
from mlxtend.classifier import StackingClassifier

#模型调节
from sklearn.model_selection import GridSearchCV #参数搜索
from mlxtend.feature_selection import SequentialFeatureSelector #特征选择函数 选择合适的feature

#结果可视化
from sklearn.metrics import classification_report , confusion_matrix #混淆矩阵

#相关指标
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score


from sklearn.metrics import accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score

from sklearn.preprocessing import label_binarize

#二分类其多分类化
#from sklearn.multiclass import OneVsOneClassifier
#from sklearn.multiclass import OneVsRestClassifier

#from sklearn.preprocessing import StandardScaler
#from sklearn.cluster import KMeans

#距离函数 度量向量距离
from sklearn.metrics.pairwise import manhattan_distances
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import cosine_distances
from sklearn.metrics.pairwise import cosine_similarity #余弦相似度

#one-hot使用
#from keras.utils import to_categorical

#绘图
import matplotlib.pyplot as plt

import scipy.linalg as la

import gc

%matplotlib inline

In [4]:
sample_rate = 256 #hz
origin_channel = 16 #5 channel eeg

#采集的通道
#共16 channel
#未使用的channel使用none代替
#reference:a study on performance increasing in ssvep based bci application
SAMPLE_CHANNEL = ['Pz' , 'PO3' , 'PO4' , 'O1' , 'O2' , 'Oz' , 'O9' , 'FP2' ,
                  'C4' , 'C6' , 'CP3' , 'CP1' ,
                  'CPZ' , 'CP2' , 'CP4' , 'PO8']

LABEL2STR = {0:'sen' , 1:'hong' , 2:'zhao',
             3:'fen' , 4:'xiao' , 5:'yu' , 
             6:'bin' , 7:'wang' , 8:'wei' , 
             9:'fei'}

# 减去前多少秒数据 second
# 减去后多少秒数据 second
CLIP_FORWARD = 2
CLIP_BACKWARD = 1

# 单个小段的实验时长
trial_time = 3 #second

trial_offset = 0 #second
start_trial_time = 0 #真正的实验开始时刻
end_trial_time = 2 #真正的实验结束时刻(<trial_time)

#是否进行归一化
#reference:a study on performance increasing in ssvep based bci application
#IS_NORMALIZE = True

#是否进行滤波
#IS_FILTER = False
#EEG频率范围
#reference:a study on performance increasing in ssvep based bci application
LO_FREQ = 0.5
HI_FREQ = 40

#是否陷波
#IS_NOTCH = False
NOTCH_FREQ = 50 #陷波 工频



# load data step

In [5]:
# defined

def butter_worth(data , lowcut=0.5 , highcut=40 , order=6):
    nyq = 0.5 * sample_rate
    
    lo = lowcut / nyq
    hi = highcut / nyq
    
    b,a = scipy.signal.butter(order , [lo , hi] , btype='bandpass')

    return np.array([scipy.signal.filtfilt(b , a , data[: , i]) for i in range(data.shape[1])]).reshape((-1 , origin_channel))

In [6]:
def load_data(filename):
    
    extra_overlap = 2000
    
    data = sio.loadmat(file_name=filename)['data_received'] #length*16 matrix

    #此通道没有采集 置为0
    #全通道均使用时 不需要
    #for i in range(len(SAMPLE_CHANNEL)):
    #    if SAMPLE_CHANNEL[i] == 'none':
    #        data[: , i] = 0.0

    #删除前x秒和后x秒数据
    
    
    #是否进行裁剪 【如果进行裁剪 由于sen的第一次数据 将extra_overlap调整为1500】
    data = data[CLIP_FORWARD * sample_rate : - CLIP_BACKWARD * sample_rate]
        
    #data = np.concatenate((data , data[ -extra_overlap : , :]) , axis=0)
    
    data_filter = butter_worth(data , 0.5 , 40 , 3)
    
    return data_filter[extra_overlap : , :] #将边界效应去掉

    #return butter_worth(data)

In [7]:
def separate(data , label , overlap_length = 128):
    '''
    最长重叠长度为size长 256*3 个数据点
    '''
    train_data = []
    train_labels = []

    size = sample_rate * trial_time #一小段 256*3 个数据点
    data_length = data.shape[0]

    idx = 0

    while idx<data_length-size:
        train_data.append(data[idx : idx+size , :])
        train_labels.append(label)

        idx = idx + (size - overlap_length)

    return np.array(train_data) , np.array(train_labels)

In [8]:
def train_val(data , ratio = 0.9):
    '''
    将数据分为 训练集 和 验证集
    '''
    
    seg = int(ratio * data.shape[0])
    
    return data[ : seg] , data[seg : ]

def shuffle_t_v(filenames):
    np.random.shuffle(filenames)
    
    return filenames

def combine(freq = 10):
    '''
    训练数据与验证数据
    :freq: 指定闪烁的频率
    
    '''
    
    if freq not in [10 , 15 , 20 , 25]:
        print('freq must in 10,15,20,25')
        return 
    
    ratio = 0.9 #训练集的占比
    overlap_length = 2*256 #重叠2秒数据
    
    #保证随机性 进行置乱
    person_0_filenames = shuffle_t_v( os.listdir('real_data/eeg_final/circle/0/%s/' % freq) )
    person_1_filenames = shuffle_t_v( os.listdir('real_data/eeg_final/circle/1/%s/' % freq) )
    person_2_filenames = shuffle_t_v( os.listdir('real_data/eeg_final/circle/2/%s/' % freq) )
    person_3_filenames = shuffle_t_v( os.listdir('real_data/eeg_final/circle/3/%s/' % freq) )
    person_4_filenames = shuffle_t_v( os.listdir('real_data/eeg_final/circle/4/%s/' % freq) )
    person_5_filenames = shuffle_t_v( os.listdir('real_data/eeg_final/circle/5/%s/' % freq) )
    person_6_filenames = shuffle_t_v( os.listdir('real_data/eeg_final/circle/6/%s/' % freq) )
    person_7_filenames = shuffle_t_v( os.listdir('real_data/eeg_final/circle/7/%s/' % freq) )
    person_8_filenames = shuffle_t_v( os.listdir('real_data/eeg_final/circle/8/%s/' % freq) )
    person_9_filenames = shuffle_t_v( os.listdir('real_data/eeg_final/circle/9/%s/' % freq) )
    

    #打开信号文件 并 合并
    person_0 = np.concatenate([load_data('real_data/eeg_final/circle/0/%s/' % freq + filename) for filename in person_0_filenames] , axis = 0)
    person_1 = np.concatenate([load_data('real_data/eeg_final/circle/1/%s/' % freq + filename) for filename in person_1_filenames] , axis = 0)
    person_2 = np.concatenate([load_data('real_data/eeg_final/circle/2/%s/' % freq + filename) for filename in person_2_filenames] , axis = 0)
    person_3 = np.concatenate([load_data('real_data/eeg_final/circle/3/%s/' % freq + filename) for filename in person_3_filenames] , axis = 0)
    person_4 = np.concatenate([load_data('real_data/eeg_final/circle/4/%s/' % freq + filename) for filename in person_4_filenames] , axis = 0)
    person_5 = np.concatenate([load_data('real_data/eeg_final/circle/5/%s/' % freq + filename) for filename in person_5_filenames] , axis = 0)
    person_6 = np.concatenate([load_data('real_data/eeg_final/circle/6/%s/' % freq + filename) for filename in person_6_filenames] , axis = 0)
    person_7 = np.concatenate([load_data('real_data/eeg_final/circle/7/%s/' % freq + filename) for filename in person_7_filenames] , axis = 0)
    person_8 = np.concatenate([load_data('real_data/eeg_final/circle/8/%s/' % freq + filename) for filename in person_8_filenames] , axis = 0)
    person_9 = np.concatenate([load_data('real_data/eeg_final/circle/9/%s/' % freq + filename) for filename in person_9_filenames] , axis = 0)
    
    person_0_train , person_0_val = train_val(person_0)
    person_1_train , person_1_val = train_val(person_1)
    person_2_train , person_2_val = train_val(person_2)
    person_3_train , person_3_val = train_val(person_3)
    person_4_train , person_4_val = train_val(person_4)
    person_5_train , person_5_val = train_val(person_5)
    person_6_train , person_6_val = train_val(person_6)
    person_7_train , person_7_val = train_val(person_7)
    person_8_train , person_8_val = train_val(person_8)
    person_9_train , person_9_val = train_val(person_9)
    
    #数据分段阶段
    
    #============
    #训练数据分段
    train_person_data_0 , train_person_labels_0 = separate(person_0_train , label = 0 , overlap_length=overlap_length)
    train_person_data_1 , train_person_labels_1 = separate(person_1_train , label = 1 , overlap_length=overlap_length)
    train_person_data_2 , train_person_labels_2 = separate(person_2_train , label = 2 , overlap_length=overlap_length)
    train_person_data_3 , train_person_labels_3 = separate(person_3_train , label = 3 , overlap_length=overlap_length)
    train_person_data_4 , train_person_labels_4 = separate(person_4_train , label = 4 , overlap_length=overlap_length)
    train_person_data_5 , train_person_labels_5 = separate(person_5_train , label = 5 , overlap_length=overlap_length)
    train_person_data_6 , train_person_labels_6 = separate(person_6_train , label = 6 , overlap_length=overlap_length)
    train_person_data_7 , train_person_labels_7 = separate(person_7_train , label = 7 , overlap_length=overlap_length)
    train_person_data_8 , train_person_labels_8 = separate(person_8_train , label = 8 , overlap_length=overlap_length)
    train_person_data_9 , train_person_labels_9 = separate(person_9_train , label = 9 , overlap_length=overlap_length)

    #合并数据
    train_data = np.concatenate((train_person_data_0 , train_person_data_1 , train_person_data_2 ,
                                 train_person_data_3 , train_person_data_4 , train_person_data_5 ,
                                 train_person_data_6 , train_person_data_7 , train_person_data_8 ,
                                 train_person_data_9 ))
    
    train_labels = np.concatenate((train_person_labels_0 , train_person_labels_1 , train_person_labels_2 ,
                                   train_person_labels_3 , train_person_labels_4 , train_person_labels_5 ,
                                   train_person_labels_6 , train_person_labels_7 , train_person_labels_8 ,
                                   train_person_labels_9 ))
    
    #产生索引并置乱
    idx_train_data = list(range(train_data.shape[0]))
    np.random.shuffle(idx_train_data)

    #将训练数据置乱
    train_data = train_data[idx_train_data]
    train_labels = train_labels[idx_train_data]
    
    #============
    #验证数据分段
    val_person_data_0 , val_person_labels_0 = separate(person_0_val , label = 0 , overlap_length=0)
    val_person_data_1 , val_person_labels_1 = separate(person_1_val , label = 1 , overlap_length=0)
    val_person_data_2 , val_person_labels_2 = separate(person_2_val , label = 2 , overlap_length=0)
    val_person_data_3 , val_person_labels_3 = separate(person_3_val , label = 3 , overlap_length=0)
    val_person_data_4 , val_person_labels_4 = separate(person_4_val , label = 4 , overlap_length=0)
    val_person_data_5 , val_person_labels_5 = separate(person_5_val , label = 5 , overlap_length=0)
    val_person_data_6 , val_person_labels_6 = separate(person_6_val , label = 6 , overlap_length=0)
    val_person_data_7 , val_person_labels_7 = separate(person_7_val , label = 7 , overlap_length=0)
    val_person_data_8 , val_person_labels_8 = separate(person_8_val , label = 8 , overlap_length=0)
    val_person_data_9 , val_person_labels_9 = separate(person_9_val , label = 9 , overlap_length=0)
    
    #合并数据
    val_data = np.concatenate((val_person_data_0 , val_person_data_1 , val_person_data_2 ,
                               val_person_data_3 , val_person_data_4 , val_person_data_5 ,
                               val_person_data_6 , val_person_data_7 , val_person_data_8 ,
                               val_person_data_9 ))
    
    val_labels = np.concatenate((val_person_labels_0 , val_person_labels_1 , val_person_labels_2 ,
                                 val_person_labels_3 , val_person_labels_4 , val_person_labels_5 ,
                                 val_person_labels_6 , val_person_labels_7 , val_person_labels_8 ,
                                 val_person_labels_9 ))

    #产生索引并置乱
    idx_val_data = list(range(val_data.shape[0]))
    np.random.shuffle(idx_val_data)

    #将训练数据置乱
    val_data = val_data[idx_val_data]
    val_labels = val_labels[idx_val_data]
    
    return train_data , train_labels , val_data , val_labels

In [9]:
def shuffle(train_data , train_labels , val_data , val_labels):
    #置乱一次数据
    idx_train_data = list(range(train_data.shape[0]))
    np.random.shuffle(idx_train_data)
    
    idx_val_data = list(range(val_data.shape[0]))
    np.random.shuffle(idx_val_data)
    
    return train_data[idx_train_data] , train_labels[idx_train_data] , val_data[idx_val_data] , val_labels[idx_val_data]

In [13]:
#train_X_ , train_y , val_X_ , val_y = combine(freq = 10) #10 15 20 25 hz

In [14]:
#如果没有进行前后裁剪 则输出数据会变多

#print(train_X_.shape , train_y.shape , val_X_.shape , val_y.shape)

In [10]:
def feature_extraction_RMS(data):
    def rms(datum):
        '''
        :datum: 一段信号 shape : 768 * 16
        '''
        return [ np.sqrt(np.mean(np.square( d ))) for d in datum.T ]
    
    feature_rms = []
    
    for datum in data: 
        feature_rms.append(rms(datum))
    
    return np.array(feature_rms)

In [11]:
train_X_ , train_y , val_X_ , val_y = combine(freq = 10) #10 15 20 25 hz

In [12]:
train_X_.shape

(800, 768, 16)

In [14]:
feature_extraction_RMS(train_X_).shape

(800, 16)

In [16]:
def con_mat(_feature , _labels , model):
    '''
    打印训练结果
    '''
    
    print('val score:%f' % model.score(_feature , _labels))
    print('real')
    
    print(confusion_matrix(_labels , model.predict(_feature)))
    print(classification_report(_labels , model.predict(_feature)))
    
def con_mat_heatmap(_feature , _labels , model , color , png_path):
    
    _labels_hat = model.predict(_feature)
    
    mat = confusion_matrix( _labels , _labels_hat )
    
    sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False , cmap=color )#,
            #xticklabels=faces.target_names,
            #yticklabels=faces.target_names)
    
    precision = precision_score(_labels , _labels_hat , average='macro')
    recall = recall_score(_labels , _labels_hat , average='macro')
    f1 = f1_score(_labels , _labels_hat , average='macro')
    
    plt.xlabel('precision:%.2f recall:%.2f f1:%.2f' % (precision , recall , f1) )
    # plt.ylabel('predicted label');
    plt.savefig(png_path) #保存起来
    plt.close()

def feature_selection(data , labels , model , num_features , cv=10):
    '''
    :model: classify model
    :num_features: features count you expect(integer or tuple)
    '''
    
    '''[8 20]'''
    
    sfs = SequentialFeatureSelector(model , k_features=num_features , cv=cv , verbose = 2 , n_jobs=-1) #all cpu cores
    
    sfs.fit(data , labels)
    
    #最优秀的特征索引
    return sfs.k_feature_idx_


def choose_common_feature_idx(data , labels , classifiers , use_ratio = 0.25 , num_features = 10 , num_features_threshold = 8):
    '''
    sub_band_power使用该函数 进行筛选特征
    
    :num_features:integer or tuple 期望的特征数量（待选择的数量）
    :min_num_features: 特征数量阈值 小于时 停止选择
    选择适合所有分类器的特征索引值
    集合 与 运算
    '''
    
    idxes_ratio = np.random.randint(0 , data.shape[0] , size = int(use_ratio * data.shape[0]) )
    
    data_shuffle = data[idxes_ratio]
    labels_shuffle = labels[idxes_ratio]
    
    feature_idxes = set(list(range(data.shape[1]))) #初始化为所有的特征索引值
    
    #===========
    #随机使用原有索引中的 20%
    random_idxes = np.random.randint(0 , data.shape[1] , size = 25 )
    
    data_shuffle = data_shuffle[: , random_idxes]
    labels_shuffle = labels_shuffle #保持完整性
    #===========

    for classifier in classifiers:
        
        idx = feature_selection(data_shuffle , labels_shuffle , classifier , num_features)
        idx = set(idx)
        
        #寻找共同的特征索引
        #寻找之前先测试 如果小于阈值 直接停止
        if len(feature_idxes & idx) < num_features_threshold:
            break
            
        feature_idxes = feature_idxes & idx
        
    return np.array(list(feature_idxes))

In [17]:
def f1_score_(_feature , _labels , model):
    
    _labels_hat = model.predict(_feature)
    
    accu = accuracy_score(_labels , _labels_hat )
    precision = precision_score(_labels , _labels_hat , average='macro')
    recall = recall_score(_labels , _labels_hat , average='macro')
    f1 = f1_score(_labels , _labels_hat , average='macro')
    kappa = cohen_kappa_score(_labels , _labels_hat)
    
    return accu , precision, recall , f1 , kappa

def roc_auc(real_label , score):
    return roc_auc_score( label_binarize(real_label , classes=[0,1,2,3,4,5,6,7,8,9]) , score , average='micro')

def itrr(acc , N=10):
    return np.log2(N) + acc*np.log2(acc) + (1-acc)*np.log2((1-acc)/(N-1))

In [18]:
#初始化所有分类器
lgbm = lightgbm.LGBMClassifier()
xgb = xgboost.XGBClassifier()
gbc = GradientBoostingClassifier()
rf =  RandomForestClassifier()

#svc = SVC(probability=True)
#adaboost = AdaBoostClassifier()
#knn = KNeighborsClassifier()
#dt = DecisionTreeClassifier()
#lda = LDA()
#nb = GaussianNB()
#mlp = MLPClassifier()

#基分类器
classifiers = [lgbm , gbc]

#基分类器
estimators = [('lightgbm' , lgbm) , ('xgboost' , xgb) ,  ('gradientboosting' , gbc) ,
                             ('randomforest' , rf) ]


#下面的顺序按照三篇论文与经验得出
#voting = VotingClassifier(estimators = estimators ,
#                voting='soft' , n_jobs=-1)

In [19]:
for freq in [10 , 15 , 20 , 25]:
    #for iter_classifier in iter_classifiers:
        #estimators.append(iter_classifier)
    
    for (i , classifier) in enumerate(classifiers):
        acc_s = []
        pre_s = []
        rec_s = []
        f1s_s = []
        kap_s = []
        
        auc_s = []
        
        itr_s = []
                

        for t in range(10): #循环20次 取平均值 作为最终结果
            voting = classifier

            train_X_ , train_y , val_X_ , val_y = combine(freq=freq)

            #提取特征
            train_X = feature_extraction_RMS(train_X_)
            val_X = feature_extraction_RMS(val_X_)

            voting.fit(train_X , train_y)

            acc , pre, rec , f1s , kap = f1_score_(val_X , val_y , voting)
            
            auc = roc_auc(val_y , voting.predict_proba(val_X))
            
            itr = itrr(acc)
            
            acc_s.append(acc)
            #pre_s.append(pre)
            #rec_s.append(rec)
            #f1s_s.append(f1s)
            kap_s.append(kap)
            auc_s.append(auc)
            itr_s.append(itr)
            
            print('[%.6f %.6f %.6f %.6f]' % (acc , kap , auc , itr) )
            
        print('[%d %d] %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f' % (freq , i , np.max(acc_s) , np.min(acc_s) , np.mean(acc_s) , np.var(acc_s) , np.max(kap_s) , np.min(kap_s) , np.mean(kap_s) , np.var(kap_s) ,np.max(auc_s) , np.min(auc_s) , np.mean(auc_s) , np.var(auc_s) ,np.max(itr_s) , np.min(itr_s) , np.mean(itr_s) , np.var(itr_s)) )

[0.500000 0.444444 0.910864 0.736966]
[0.500000 0.444444 0.918025 0.736966]
[0.333333 0.259259 0.866049 0.290349]
[0.466667 0.407407 0.892716 0.634510]
[0.433333 0.370370 0.873333 0.538499]
[0.566667 0.518519 0.927160 0.961156]
[0.466667 0.407407 0.888765 0.634510]
[0.566667 0.518519 0.940864 0.961156]
[0.566667 0.518519 0.928148 0.961156]
[0.466667 0.407407 0.883951 0.634510]
[10 0] 0.566667 0.333333 0.486667 0.004711 0.518519 0.259259 0.429630 0.005816 0.940864 0.866049 0.902988 0.000585 0.961156 0.290349 0.708978 0.041330
[0.533333 0.481481 0.910123 0.845838]
[0.533333 0.481481 0.891481 0.845838]
[0.466667 0.407407 0.824321 0.634510]
[0.333333 0.259259 0.876667 0.290349]
[0.500000 0.444444 0.845432 0.736966]
[0.566667 0.518519 0.888519 0.961156]
[0.533333 0.481481 0.908148 0.845838]
[0.333333 0.259259 0.790617 0.290349]
[0.533333 0.481481 0.859259 0.845838]
[0.533333 0.481481 0.918395 0.845838]
[10 1] 0.566667 0.333333 0.486667 0.006489 0.518519 0.259259 0.429630 0.008011 0.918395 0

In [20]:
for freq in [10 , 15 , 20 , 25]:
    #for iter_classifier in iter_classifiers:
        #estimators.append(iter_classifier)
    
    acc_s = []
    pre_s = []
    rec_s = []
    f1s_s = []
    kap_s = []

    auc_s = []

    itr_s = []


    for t in range(10): #循环20次 取平均值 作为最终结果
        voting = VotingClassifier(estimators=estimators , voting='soft' , n_jobs=-1)

        train_X_ , train_y , val_X_ , val_y = combine(freq=freq)

        #提取特征
        train_X = feature_extraction_RMS(train_X_)
        val_X = feature_extraction_RMS(val_X_)

        voting.fit(train_X , train_y)

        acc , pre, rec , f1s , kap = f1_score_(val_X , val_y , voting)

        auc = roc_auc(val_y , voting.predict_proba(val_X))

        itr = itrr(acc)

        acc_s.append(acc)
        #pre_s.append(pre)
        #rec_s.append(rec)
        #f1s_s.append(f1s)
        kap_s.append(kap)
        auc_s.append(auc)
        itr_s.append(itr)

        print('[%.6f %.6f %.6f %.6f]' % (acc , kap , auc , itr) )

    print('%d %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f' % (freq , np.max(acc_s) , np.min(acc_s) , np.mean(acc_s) , np.var(acc_s) , np.max(kap_s) , np.min(kap_s) , np.mean(kap_s) , np.var(kap_s) ,np.max(auc_s) , np.min(auc_s) , np.mean(auc_s) , np.var(auc_s) ,np.max(itr_s) , np.min(itr_s) , np.mean(itr_s) , np.var(itr_s)) )

[0.400000 0.333333 0.893827 0.449022]
[0.366667 0.296296 0.867531 0.366231]
[0.466667 0.407407 0.891481 0.634510]
[0.600000 0.555556 0.897531 1.083007]
[0.300000 0.222222 0.834815 0.221690]
[0.466667 0.407407 0.890988 0.634510]
[0.333333 0.259259 0.867407 0.290349]
[0.533333 0.481481 0.915802 0.845838]
[0.400000 0.333333 0.876049 0.449022]
[0.533333 0.481481 0.927778 0.845838]
10 0.600000 0.300000 0.440000 0.008400 0.555556 0.222222 0.377778 0.010370 0.927778 0.834815 0.886321 0.000629 1.083007 0.221690 0.582002 0.069255
[0.366667 0.296296 0.889136 0.366231]
[0.466667 0.407407 0.885556 0.634510]
[0.500000 0.444444 0.918765 0.736966]
[0.400000 0.333333 0.886667 0.449022]
[0.400000 0.333333 0.918395 0.449022]
[0.500000 0.444444 0.894568 0.736966]
[0.466667 0.407407 0.927037 0.634510]
[0.500000 0.444444 0.892593 0.736966]
[0.533333 0.481481 0.901975 0.845838]
[0.433333 0.370370 0.906420 0.538499]
15 0.533333 0.366667 0.456667 0.002678 0.481481 0.296296 0.396296 0.003306 0.927037 0.885556 

In [21]:
for freq in [10 , 15 , 20 , 25]:
    #for iter_classifier in iter_classifiers:
        #estimators.append(iter_classifier)
    
    acc_s = []
    pre_s = []
    rec_s = []
    f1s_s = []
    kap_s = []

    auc_s = []

    itr_s = []


    for t in range(10): #循环20次 取平均值 作为最终结果
        voting = BaggingClassifier(base_estimator=lgbm)

        train_X_ , train_y , val_X_ , val_y = combine(freq=freq)

        #提取特征
        train_X = feature_extraction_RMS(train_X_)
        val_X = feature_extraction_RMS(val_X_)

        voting.fit(train_X , train_y)

        acc , pre, rec , f1s , kap = f1_score_(val_X , val_y , voting)

        auc = roc_auc(val_y , voting.predict_proba(val_X))

        itr = itrr(acc)

        acc_s.append(acc)
        #pre_s.append(pre)
        #rec_s.append(rec)
        #f1s_s.append(f1s)
        kap_s.append(kap)
        auc_s.append(auc)
        itr_s.append(itr)

        print('[%.6f %.6f %.6f %.6f]' % (acc , kap , auc , itr) )

    print('%d %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f' % (freq , np.max(acc_s) , np.min(acc_s) , np.mean(acc_s) , np.var(acc_s) , np.max(kap_s) , np.min(kap_s) , np.mean(kap_s) , np.var(kap_s) ,np.max(auc_s) , np.min(auc_s) , np.mean(auc_s) , np.var(auc_s) ,np.max(itr_s) , np.min(itr_s) , np.mean(itr_s) , np.var(itr_s)) )

[0.500000 0.444444 0.915926 0.736966]
[0.433333 0.370370 0.906173 0.538499]
[0.400000 0.333333 0.869506 0.449022]
[0.533333 0.481481 0.897778 0.845838]
[0.533333 0.481481 0.927654 0.845838]
[0.466667 0.407407 0.903827 0.634510]
[0.533333 0.481481 0.900988 0.845838]
[0.533333 0.481481 0.889630 0.845838]
[0.433333 0.370370 0.917778 0.538499]
[0.466667 0.407407 0.861481 0.634510]
10 0.533333 0.400000 0.483333 0.002278 0.481481 0.333333 0.425926 0.002812 0.927654 0.861481 0.899074 0.000391 0.845838 0.449022 0.691536 0.020946
[0.400000 0.333333 0.827160 0.449022]
[0.366667 0.296296 0.850494 0.366231]
[0.433333 0.370370 0.879012 0.538499]
[0.366667 0.296296 0.868765 0.366231]
[0.333333 0.259259 0.802593 0.290349]
[0.533333 0.481481 0.870370 0.845838]
[0.533333 0.481481 0.893704 0.845838]
[0.333333 0.259259 0.826296 0.290349]
[0.333333 0.259259 0.873704 0.290349]
[0.400000 0.333333 0.861975 0.449022]
15 0.533333 0.333333 0.403333 0.005211 0.481481 0.259259 0.337037 0.006433 0.893704 0.802593 

In [22]:
for freq in [10 , 15 , 20 , 25]:
    #for iter_classifier in iter_classifiers:
        #estimators.append(iter_classifier)
    
    acc_s = []
    pre_s = []
    rec_s = []
    f1s_s = []
    kap_s = []

    auc_s = []

    itr_s = []


    for t in range(10): #循环20次 取平均值 作为最终结果
        voting = StackingClassifier(classifiers=[xgb , gbc , rf] , meta_classifier=lgbm)

        train_X_ , train_y , val_X_ , val_y = combine(freq=freq)

        #提取特征
        train_X = feature_extraction_RMS(train_X_)
        val_X = feature_extraction_RMS(val_X_)

        voting.fit(train_X , train_y)

        acc , pre, rec , f1s , kap = f1_score_(val_X , val_y , voting)

        auc = roc_auc(val_y , voting.predict_proba(val_X))

        itr = itrr(acc)

        acc_s.append(acc)
        #pre_s.append(pre)
        #rec_s.append(rec)
        #f1s_s.append(f1s)
        kap_s.append(kap)
        auc_s.append(auc)
        itr_s.append(itr)

        print('[%.6f %.6f %.6f %.6f]' % (acc , kap , auc , itr) )

    print('%d %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f %.6f' % (freq , np.max(acc_s) , np.min(acc_s) , np.mean(acc_s) , np.var(acc_s) , np.max(kap_s) , np.min(kap_s) , np.mean(kap_s) , np.var(kap_s) ,np.max(auc_s) , np.min(auc_s) , np.mean(auc_s) , np.var(auc_s) ,np.max(itr_s) , np.min(itr_s) , np.mean(itr_s) , np.var(itr_s)) )

[0.466667 0.407407 0.753580 0.634510]
[0.466667 0.407407 0.690370 0.634510]
[0.333333 0.259259 0.572840 0.290349]
[0.533333 0.481481 0.709630 0.845838]
[0.600000 0.555556 0.741481 1.083007]
[0.400000 0.333333 0.628642 0.449022]
[0.400000 0.333333 0.590988 0.449022]
[0.500000 0.444444 0.730494 0.736966]
[0.433333 0.370370 0.613704 0.538499]
[0.333333 0.259259 0.602901 0.290349]
10 0.600000 0.333333 0.446667 0.006489 0.555556 0.259259 0.385185 0.008011 0.753580 0.572840 0.663463 0.004238 1.083007 0.290349 0.595207 0.055578
[0.266667 0.185185 0.588889 0.160676]
[0.466667 0.407407 0.734198 0.634510]
[0.466667 0.407407 0.680556 0.634510]
[0.466667 0.407407 0.771049 0.634510]
[0.433333 0.370370 0.759815 0.538499]
[0.400000 0.333333 0.721852 0.449022]
[0.466667 0.407407 0.783025 0.634510]
[0.400000 0.333333 0.717531 0.449022]
[0.566667 0.518519 0.783457 0.961156]
[0.300000 0.222222 0.637099 0.221690]
15 0.566667 0.266667 0.423333 0.006900 0.518519 0.185185 0.359259 0.008519 0.783457 0.588889 

In [13]:
#chosen_idx = choose_common_feature_idx()

In [14]:
#*****************
#*****************
#chosen_idx=np.array(list(range(train_X.shape[1])))
#*****************
#*****************

In [15]:
#train_X_chosen = train_X[: , chosen_idx]

In [16]:
#train_X_chosen.shape

In [17]:
#voting = VotingClassifier(estimators=[('xgboost' , xgb) , ('lightgbm' , lgbm) , ('gradientboosting' , gbc) ,
#                             ('randomforest' , rf) , ('svm' , svc) , ('decisiontree' , dt) , ('lda' , lda) ,
#                             ('adaboost' , adaboost) , ('mlp' , mlp) , ('naive bayes' , nb) , ('kneighbors' , knn)] ,
#                voting='soft' , n_jobs=-1)

In [18]:
#voting.fit(train_X_chosen , train_y)

In [19]:
#val_X_chosen = val_X[: , chosen_idx]

In [20]:
#con_mat_heatmap(val_X_chosen , val_y , voting , color=None , png_path='ttt.png')

In [21]:
for freq in [15 , 20 , 25]:
    for t in range(20): #每种频率随机测试 20 次
        #读取数据
        train_X_ , train_y , val_X_ , val_y = combine(freq=freq) #10 15 20 25 hz

        #提取特征
        train_X = feature_extraction_RMS(train_X_)
        val_X = feature_extraction_RMS(val_X_)

        #初始化所有分类器
        svc = SVC(probability=True)
        rf =  RandomForestClassifier()
        adaboost = AdaBoostClassifier()
        xgb = xgboost.XGBClassifier()
        lgbm = lightgbm.LGBMClassifier()
        gbc = GradientBoostingClassifier()
        knn = KNeighborsClassifier()
        dt = DecisionTreeClassifier()
        lda = LDA()
        nb = GaussianNB()
        mlp = MLPClassifier()

        classifiers = [xgb , lgbm , gbc , rf , svc , dt , lda , adaboost , mlp , nb , knn ]

        voting = VotingClassifier(estimators=[('xgboost' , xgb) , ('lightgbm' , lgbm) , ('gradientboosting' , gbc) ,
                                 ('randomforest' , rf) , ('svm' , svc) , ('decisiontree' , dt) , ('lda' , lda) ,
                                 ('adaboost' , adaboost) , ('mlp' , mlp) , ('naive bayes' , nb) , ('kneighbors' , knn)] ,
                    voting='soft' , n_jobs=-1)

        #特征选择
        #chosen_idx = choose_common_feature_idx(classifiers=classifiers)

        chosen_idx=np.array(list(range(train_X.shape[1])))

        train_X_chosen = train_X[: , chosen_idx]    

        voting.fit(train_X_chosen , train_y)    
        
        val_X_chosen = val_X[: , chosen_idx]

        con_mat_heatmap(val_X_chosen , val_y , voting , color=None ,
                        png_path = 'c://Users/qq122/Desktop/v1_paper_pic/RMS/无特征选择/%shz/%s.png' % (freq , t+1) )