In [2]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/My Drive/colab_data

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/colab_data


In [2]:
!pip install scikit-learn==0.24

Collecting scikit-learn==0.24
[?25l  Downloading https://files.pythonhosted.org/packages/b1/ed/ab51a8da34d2b3f4524b21093081e7f9e2ddf1c9eac9f795dcf68ad0a57d/scikit_learn-0.24.0-cp37-cp37m-manylinux2010_x86_64.whl (22.3MB)
[K     |████████████████████████████████| 22.3MB 1.8MB/s 
[?25hCollecting threadpoolctl>=2.0.0
  Downloading https://files.pythonhosted.org/packages/f7/12/ec3f2e203afa394a149911729357aa48affc59c20e2c1c8297a60f33f133/threadpoolctl-2.1.0-py3-none-any.whl
Installing collected packages: threadpoolctl, scikit-learn
  Found existing installation: scikit-learn 0.22.2.post1
    Uninstalling scikit-learn-0.22.2.post1:
      Successfully uninstalled scikit-learn-0.22.2.post1
Successfully installed scikit-learn-0.24.0 threadpoolctl-2.1.0


In [3]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
sns.set_theme(style="whitegrid")

# from pyeeg.feature_extraction import power_spectral_density
# from pyeeg.utils import get_features

from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneGroupOut, cross_val_score, GridSearchCV, GroupKFold
from sklearn.cluster import KMeans

In [4]:
def power_spectral_density(data, sf=128, nperseg=128, band=(4, 8, 14, 31, 65)):
    band = np.array(band)

    freqs, power = signal.welch(data, sf, nperseg=nperseg)
    
    freqband = np.hsplit(freqs, band)[1:-1] # Remove the beginning and the end

    # Get the index of the corresponding frequency band
    pindex = []
    for fb in freqband:
        pindex.append(np.where(np.in1d(freqs, fb))[0])
    
    # Get features
    f = []
    for index in pindex:
        f.append(np.mean(power[..., index], axis=-1))
    
    f = np.stack(f, axis=-1)
    return f
    
def load_deap(s="01"):
    print(s)
    with open("data_preprocessed_python"+"/s{}.dat".format(s), "rb") as f:
        subject = pickle.load(f, encoding='latin1')
    data = subject['data'][:,:32]
    label = subject['labels'][:, [0]]
    return data, label

subjectList = np.array(('01','02','03','04','05','06','07','08',
               '09','10','11','12','13','14','15','16',
               '17','18','19','20','21','22','23','24',
               '25','26','27','28','29','30','31','32'))

def split_signal(data, label, windows=1, fs=128):
    if len(data.shape) != 3:
        raise ValueError

    sp = data.shape[-1] // fs // windows
    tmpData = np.stack(np.split(data, sp, axis=2), axis=1)
    tmpLabel = np.repeat(label, tmpData.shape[1], axis=1)
    return tmpData, tmpLabel

def remove_baseline(data, label, baseline=3):
    base = data[:, :baseline, ...]
    signal = data[:, baseline:, ...]

    base_label = label[:, :baseline]
    signal_label = label[:, baseline:]

    return base, base_label, signal, signal_label
def label_binarizer(label, threshold=5):
    tmp = np.copy(label)
    tmp[label<=threshold] = 0
    tmp[label> threshold] = 1
    return tmp
def combined_electrode(features):
    
    return features.reshape((*features.shape[:-2], -1))
def group_by_trial(data, label, shuffle=False):

    trials, slices, *features_shape = data.shape
    data = data.reshape(-1, *features_shape)
    label = np.ravel(label, order='C')
    groups = np.arange(1, trials+1)
    groups = np.repeat(groups, slices)
    
    if shuffle:
        index = np.arange(len(groups))
        np.random.shuffle(index)
        return groups[index], data[index], label[index]

    return groups, data, label 
def get_features(data, label, feature_func):
    """A boring code, just don't want to write repeated code every time
    """
    data, label = split_signal(data, label)
    label = label_binarizer(label)

    base, _, signal, signal_label = remove_baseline(data, label)

    basef = combined_electrode(feature_func(base))

    signalf = combined_electrode(feature_func(signal))

    base_mean = np.mean(basef, axis=1)[:, np.newaxis, :]

    X = signalf - base_mean
    Y = signal_label

    G, X, Y = group_by_trial(X, Y)
    return G, X, Y   

In [5]:
data,label = load_deap('01')
data.shape, label.shape

01


((40, 32, 8064), (40, 1))

In [6]:
aG = []
aX = []
aY = []
for i, subject in enumerate(subjectList):
  data, label = load_deap(subject)
  G, X, Y = get_features(data, label, power_spectral_density)
  aG.append(G+i*40)
  aX.append(X)
  aY.append(Y)
aG = np.concatenate(aG)
aX = np.concatenate(aX)
aY = np.concatenate(aY)

01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


In [30]:
from sklearn.model_selection import GroupKFold

In [7]:
aG.shape, aX.shape, aY.shape

((76800,), (76800, 128), (76800,))

In [8]:
index = np.arange(len(aG))
np.random.shuffle(index) # 把index打乱

In [9]:
aG = aG[index]
aX = aX[index]
aY = aY[index]
aG.shape

(76800,)

In [10]:
svc = SVC()
cv = GroupKFold(n_splits=4)
for i_tr, i_te in cv.split(aX, groups=aG):
  print(np.unique(aG[i_te]))# 看一下test乱不乱
  
  x_train, y_train = aX[i_tr],aY[i_tr]
  x_test, y_test = aX[i_te], aY[i_te]
  svc.fit(x_train, y_train)
  print(svc.score(x_test, y_test))

[   3    7   11   15   19   23   27   31   35   40   43   47   51   55
   59   63   67   71   75   79   83   87   91   95   99  103  107  111
  115  120  123  127  131  135  139  143  147  151  155  160  163  167
  171  175  179  183  187  191  195  200  203  207  211  215  219  223
  227  231  235  239  243  247  251  255  259  263  267  271  275  280
  283  287  291  295  299  303  307  311  315  320  323  327  331  335
  339  343  347  351  355  360  363  367  371  375  379  383  387  391
  395  400  403  407  411  415  420  424  428  432  436  440  444  448
  452  456  460  464  468  472  476  480  484  488  492  496  500  504
  508  512  516  521  524  528  532  536  540  544  548  552  556  560
  564  568  572  576  580  584  588  592  596  601  604  608  612  616
  620  624  628  632  636  640  644  648  652  656  660  664  668  672
  676  681  684  688  692  696  700  704  708  712  716  720  724  728
  732  736  740  744  748  752  756  761  764  768  772  776  780  784
  788 

In [16]:
# 直接划分

In [11]:
from sklearn.model_selection import train_test_split

In [17]:
group = np.unique(aG)
g_train, g_test = train_test_split(group, test_size=0.25, random_state=42)
g_test# 测试集，看看乱不乱

array([1241,  696,  900,  771,  744,  347,  240,  465,  406,  987,  707,
        573,   66,   45, 1099,  527, 1160,  478,  631, 1046, 1041,  197,
        232,  833,  334, 1069,  395,  343,  199, 1143,  366,  185, 1027,
        129,  924, 1278,   52,  241, 1177,  672,  443, 1147,  222,   24,
        424,  668,  298,   44, 1116,  713,  772,  853,   82,   64,  124,
       1053,   79,  984,  278, 1094,  755,  586,  353,  255,  709,  717,
       1078,  375,  211,  245,  742,  549, 1048,   33,   77,  340,  757,
        479,  913, 1007, 1070,  221,  307,  829,  894,  468, 1268,  886,
       1120,  219,  102,  543,  590,  559,  209,   59,  421,  910, 1113,
       1250,  600,  486,  515,  452,   50, 1011, 1225,  439,   87,  310,
        416,  835,  491,  329,  266,  827,  674,  550,  852,  842, 1086,
        803,  788,  309,  584,  679,  846,  287,  352,  523,  304,  598,
       1001,  753,   30, 1016,  156,   32, 1274,  414, 1222,   71,  262,
        807,  248,  454, 1084,  448,  591,  391,  5

In [None]:
X_train, Y_train = aX[np.in1d(aG, g_train)], aY[np.in1d(aG, g_train)]
X_test, Y_test = aX[np.in1d(aG, g_test)], aY[np.in1d(aG, g_test)]
svc = SVC()
svc.fit(X_train, Y_train)
svc.score(X_test, Y_test)