In [1]:
import numpy as np
import pywt

import seaborn as sns #绘制confusion matrix heatmap

import os
import scipy.io as sio

from statsmodels.tsa.ar_model import AR

import tqdm
import  time

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import warnings
warnings.simplefilter('ignore') #忽略警告

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier

import xgboost

In [4]:
sample_rate = 256
origin_channel = 16


SAMPLE_CHANNEL = ['Pz' , 'PO3' , 'PO4' , 'O1' , 'O2' , 'Oz' , 'O9' , 'FP2' ,
                  'C4' , 'C6' , 'CP3' , 'CP1' ,
                  'CPZ' , 'CP2' , 'CP4' , 'PO8']

LABEL2STR = {0:'sen' , 1:'hong' , 2:'zhao',
             3:'fen' , 4:'xiao' , 5:'yu' , 
             6:'bin' , 7:'wang' , 8:'wei' , 
             9:'fei'}

CLIP_FORWARD = 1 #首部裁掉时间
CLIP_BACKWARD = 1 #尾部裁掉时间

trial_time = 3 #segment second


#是否进行归一化
#reference:a study on performance increasing in ssvep based bci application
#IS_NORMALIZE = True

#是否进行滤波
#IS_FILTER = False
#EEG频率范围
#reference:a study on performance increasing in ssvep based bci application
LO_FREQ = 0.5
HI_FREQ = 40

#是否陷波
#IS_NOTCH = False
NOTCH_FREQ = 50 #陷波 工频


In [5]:
from keras.utils import to_categorical

Using TensorFlow backend.


In [6]:
def load_data(filename):

    data = sio.loadmat(file_name=filename)['data_received'] #length*16 matrix

    data = data[CLIP_FORWARD * sample_rate : - CLIP_BACKWARD * sample_rate] #首部 尾部 进行裁剪
   
    return data 

In [7]:
def separate(data , label , overlap_length):
    train_data = []
    train_labels = []

    size = sample_rate * trial_time #一小段 256*3 个数据点
    data_length = data.shape[0]

    idx = 0
    
    while idx<=data_length-size:
        train_data.append(data[idx : idx+size , :])
        train_labels.append(label)

        idx = idx + (size - overlap_length)
        
    return np.array(train_data) , np.array(train_labels)

In [8]:
def shuffle_t_v(filenames):
    # np.random.shuffle(filenames)
    
    return np.random.choice(filenames , size=10) #20次的计算准确率中 每次随机选择10个样本进行训练测试

def combine(freq):    
    overlap_length = 2*256 #重叠2秒数据
    
    #保证随机性 进行置乱
    person_0_filenames = shuffle_t_v( os.listdir('data/base_rf/%s/0/' % freq) )
    person_1_filenames = shuffle_t_v( os.listdir('data/base_rf/%s/1/' % freq) )
    person_2_filenames = shuffle_t_v( os.listdir('data/base_rf/%s/2/' % freq) )
    person_3_filenames = shuffle_t_v( os.listdir('data/base_rf/%s/3/' % freq) )
    person_4_filenames = shuffle_t_v( os.listdir('data/base_rf/%s/4/' % freq) )
    person_5_filenames = shuffle_t_v( os.listdir('data/base_rf/%s/5/' % freq) )
    person_6_filenames = shuffle_t_v( os.listdir('data/base_rf/%s/6/' % freq) )
    person_7_filenames = shuffle_t_v( os.listdir('data/base_rf/%s/7/' % freq) )
    person_8_filenames = shuffle_t_v( os.listdir('data/base_rf/%s/8/' % freq) )

    #打开信号文件 并 合并
    person_0 = np.concatenate([load_data('data/base_rf/%s/0/' % freq + filename) for filename in person_0_filenames] , axis = 0)
    person_1 = np.concatenate([load_data('data/base_rf/%s/1/' % freq + filename) for filename in person_1_filenames] , axis = 0)
    person_2 = np.concatenate([load_data('data/base_rf/%s/2/' % freq + filename) for filename in person_2_filenames] , axis = 0)
    person_3 = np.concatenate([load_data('data/base_rf/%s/3/' % freq + filename) for filename in person_3_filenames] , axis = 0)
    person_4 = np.concatenate([load_data('data/base_rf/%s/4/' % freq + filename) for filename in person_4_filenames] , axis = 0)
    person_5 = np.concatenate([load_data('data/base_rf/%s/5/' % freq + filename) for filename in person_5_filenames] , axis = 0)
    person_6 = np.concatenate([load_data('data/base_rf/%s/6/' % freq + filename) for filename in person_6_filenames] , axis = 0)
    person_7 = np.concatenate([load_data('data/base_rf/%s/7/' % freq + filename) for filename in person_7_filenames] , axis = 0)
    person_8 = np.concatenate([load_data('data/base_rf/%s/8/' % freq + filename) for filename in person_8_filenames] , axis = 0)
    
    #============
    #训练数据分段
    train_person_data_0 , train_person_labels_0 = separate(person_0 , label = 0 , overlap_length=overlap_length)
    train_person_data_1 , train_person_labels_1 = separate(person_1 , label = 1 , overlap_length=overlap_length)
    train_person_data_2 , train_person_labels_2 = separate(person_2 , label = 2 , overlap_length=overlap_length)
    train_person_data_3 , train_person_labels_3 = separate(person_3 , label = 3 , overlap_length=overlap_length)
    train_person_data_4 , train_person_labels_4 = separate(person_4 , label = 4 , overlap_length=overlap_length)
    train_person_data_5 , train_person_labels_5 = separate(person_5 , label = 5 , overlap_length=overlap_length)
    train_person_data_6 , train_person_labels_6 = separate(person_6 , label = 6 , overlap_length=overlap_length)
    train_person_data_7 , train_person_labels_7 = separate(person_7 , label = 7 , overlap_length=overlap_length)
    train_person_data_8 , train_person_labels_8 = separate(person_8 , label = 8 , overlap_length=overlap_length)

    #合并数据
    train_data = np.concatenate((train_person_data_0 , train_person_data_1 , train_person_data_2 ,
                                 train_person_data_3 , train_person_data_4 , train_person_data_5 ,
                                 train_person_data_6 , train_person_data_7 , train_person_data_8 ,
                                 ))
    
    train_labels = np.concatenate((train_person_labels_0 , train_person_labels_1 , train_person_labels_2 ,
                                   train_person_labels_3 , train_person_labels_4 , train_person_labels_5 ,
                                   train_person_labels_6 , train_person_labels_7 , train_person_labels_8 ,
                                    ))
    
    #产生索引并置乱
    idx_train_data = list(range(train_data.shape[0]))
    np.random.shuffle(idx_train_data)

    #将训练数据置乱
    train_data = train_data[idx_train_data]
    train_labels = train_labels[idx_train_data]
        
    return train_data , train_labels


In [9]:
def session_data_labels(session_id , freq , is_training):
    if is_training:
        overlap_length = 256*2
    else:
        overlap_length = 0
    
    str_freq = str(freq)
    
    subjcets = os.listdir('data/incremental/%s/s%d/' % (str_freq , session_id)) #受试者ID
    
    data = []
    labels = []
    
    for subjcet in subjcets:
        filenames = os.listdir('data/incremental/%s/s%d/%s/' % (str_freq , session_id , subjcet))
        
        person = np.concatenate([load_data('data/incremental/%s/s%d/%s/%s' % (str_freq , session_id , subjcet , filename)) for filename in filenames] , axis = 0)
        
        person_data , person_label = separate( person , label = int(subjcet) , overlap_length = overlap_length)
        
        data.append(person_data)
        labels.append(person_label)
    
    #合并数据
    data = np.concatenate(data)
    labels = np.concatenate(labels)
    
    #shuffle
    idx_data = list(range(data.shape[0]))
    np.random.shuffle(idx_data)

    data = data[idx_data]
    labels = labels[idx_data]
    
    return data , labels


In [10]:
def feature_extraction_RMS(data):
    def rms(datum):
        '''
        :datum: 一段信号 shape : 768 * 16
        '''
        return [ np.sqrt(np.mean(np.square( d ))) for d in datum.T ]
    
    feature_rms = []
    
    for datum in data: 
        feature_rms.append(rms(datum))
    
    return np.array(feature_rms)

In [14]:
def concat_and_shuffle(orig_X , orig_y , session_id , freq):
    session_id_data , session_id_labels = session_data_labels(session_id , freq , is_training=True)
    session_id_data = feature_extraction_RMS(session_id_data)
    # session_id_labels = to_categorical(session_id_labels , num_classes=9)
    
    orig_X = np.concatenate((orig_X , session_id_data) , axis=0)
    orig_y = np.concatenate((orig_y , session_id_labels) , axis=0)
    
    idx = list(range(orig_X.shape[0]))
    np.random.shuffle(idx)
    
    orig_X = orig_X[idx]
    orig_y = orig_y[idx]
    
    return orig_X , orig_y

# naive bayes

In [15]:
def cal_time(ss):
    step = ss[1]-ss[0]
    
    for i in range(len(ss)):
        ss[i]-=step*i
    return ss

for freq in [6 , 7.5 , 8.5 , 10]:
    print('freq = ' , freq)
    
    times = []
    accus = []
    
    for i in range(20):
        times_sub = []
        accus_sub = []
    
        train_X_ , train_y = session_data_labels(1 , freq , is_training=True)
        #train_X_ , train_y = combine(freq)
        train_X = feature_extraction_RMS(train_X_) #SBP特征提取

        rf = RandomForestClassifier(n_estimators=16)
        rf.fit(train_X , train_y)

        for idx , session_id in enumerate([3,5,6,7,8,9,11,12,13]):
            session_N_data , session_N_labels = session_data_labels(session_id , freq , is_training=False)
            session_N_data = feature_extraction_RMS(session_N_data)

            start = time.clock()
            score = rf.score(session_N_data , session_N_labels)
            time1 = time.clock() - start
            
            accus_sub.append(score)

            #print( freq , session_id , score)

            #更新模型
            train_X , train_y = concat_and_shuffle(train_X , train_y , session_id , freq)

            rf = RandomForestClassifier(n_estimators=16)

            start = time.clock()
            rf.fit(train_X , train_y)
            time2 = time.clock() - start

            times_sub.append(time1 + time2)

        times_sub = cal_time(times_sub)
        #print(times_sub)
        
        times.append(times_sub)
        accus.append(accus_sub)
        
    times = np.array(times)
    accus = np.array(accus)
    
    print('aver time : ' , list(map(np.mean , times.T )) )
    print('aver accu : ' , list(map(np.mean , accus.T )) )

freq =  6
aver time :  [0.07709084999999724, 0.07709084999999724, 0.0954551250000005, 0.1033475700000082, 0.11233255500000325, 0.12434944000000314, 0.12388603000001197, 0.12346875000000282, 0.1306207500000081]
aver accu :  [0.2538647342995169, 0.23454106280193235, 0.1468599033816425, 0.825, 0.6678743961352656, 0.527536231884058, 0.8231884057971015, 0.8004830917874397, 0.6805555555555556]
freq =  7.5
aver time :  [0.06841061500000194, 0.06841061500000194, 0.07648676000000591, 0.08863412999999128, 0.09785799999999653, 0.11240270499999952, 0.12705591499999686, 0.12403938000001062, 0.13167374000000506]
aver accu :  [0.7086956521739131, 0.5516908212560387, 0.33671497584541055, 0.4398550724637681, 0.5968599033816424, 0.5221014492753623, 0.7543478260869565, 0.9118357487922705, 0.590096618357488]
freq =  8.5
aver time :  [0.07295142999996358, 0.07295142999996358, 0.08632505499996057, 0.09796234999993772, 0.09747609999992904, 0.09489111999988324, 0.0952502599999292, 0.10165756499989129, 0.11970

# 感知机

In [17]:
def cal_time(ss):
    step = ss[1]-ss[0]
    
    for i in range(len(ss)):
        ss[i]-=step*i
    return ss

for freq in [6 , 7.5 , 8.5 , 10]:
    print('freq = ' , freq)
    
    times = []
    accus = []
    
    for i in range(20):
        times_sub = []
        accus_sub = []
    
        train_X_ , train_y = session_data_labels(1 , freq , is_training=True)
        #train_X_ , train_y = combine(freq)
        train_X = feature_extraction_RMS(train_X_) #SBP特征提取

        rf = RandomForestClassifier(n_estimators=20)
        rf.fit(train_X , train_y)

        for idx , session_id in enumerate([3,5,6,7,8,9,11,12,13]):
            session_N_data , session_N_labels = session_data_labels(session_id , freq , is_training=False)
            session_N_data = feature_extraction_RMS(session_N_data)

            start = time.clock()
            score = rf.score(session_N_data , session_N_labels)
            time1 = time.clock() - start
            
            accus_sub.append(score)

            #print( freq , session_id , score)

            #更新模型
            train_X , train_y = concat_and_shuffle(train_X , train_y , session_id , freq)

            rf = RandomForestClassifier(n_estimators=20)

            start = time.clock()
            rf.fit(train_X , train_y)
            time2 = time.clock() - start

            times_sub.append(time1 + time2)

        times_sub = cal_time(times_sub)
        #print(times_sub)
        
        times.append(times_sub)
        accus.append(accus_sub)
        
    times = np.array(times)
    accus = np.array(accus)
    
    print('aver time : ' , list(map(np.mean , times.T )) )
    print('aver accu : ' , list(map(np.mean , accus.T )) )

freq =  6
aver time :  [0.09469644000000699, 0.09469644000000699, 0.10862856500006046, 0.11268674000004922, 0.11681907000015598, 0.11736423500009323, 0.11301141000020606, 0.11716198000024178, 0.11945227500018518]
aver accu :  [0.25555555555555554, 0.2335748792270531, 0.14480676328502418, 0.8252415458937199, 0.6695652173913043, 0.5478260869565218, 0.8143719806763287, 0.8249999999999998, 0.6634057971014492]
freq =  7.5
aver time :  [0.08928006500004812, 0.08928006500004812, 0.11123157499994249, 0.13329333999993195, 0.14163832000015192, 0.16757647000006273, 0.1809880249998514, 0.19692223499980627, 0.2247597199998154]
aver accu :  [0.7022946859903382, 0.5566425120772946, 0.3425120772946859, 0.4346618357487923, 0.5835748792270532, 0.533816425120773, 0.7643719806763285, 0.918599033816425, 0.5804347826086957]
freq =  8.5
aver time :  [0.09044490999997379, 0.09044490999997379, 0.10545898999992005, 0.12021774499980893, 0.11514265499986323, 0.11551947999967069, 0.11826538999955574, 0.12921274999