In [24]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.fft import irfft, rfft, rfftfreq
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn import svm
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression

In [2]:
files = ['eeg_fp1_all.csv', 'eeg_fp2_all.csv'] #файлы с парами симметричных каналов

result = []
for file in files:
    data = pd.read_csv(file,index_col=0)
    
    for j in range(len(data)):
        data_np = data.iloc[j].to_numpy()
        ans = []
        ans.append(data_np.mean()) #среднее
        ans.append(data_np.max())  #максимальное
        ans.append(data_np.min())  #минимальное
        ans.append(data_np.std())  #среднеквадратическое отклонение
        
        #пересечение 0
        count = 0
        for i in range(1, len(data_np)):
            if (data_np[i-1] >= 0 and data_np[i] < 0) or (data_np[i-1] <= 0 and data_np[i] > 0):
                count += 1
        ans.append(np.log(count))
        
        #преобразование Фурье
        y = np.abs(rfft(data_np)) # мощность волн определённой частоты 
        x = rfftfreq(len(data_np), 1 / 128) # список всех частот 
        
        #выделение ритмов
        alphaX = x.copy()
        alphaY = y.copy()
        betaX = x.copy()
        betaY = y.copy()
        gammaX = x.copy()
        gammaY = y.copy()
        thetaX = x.copy()
        thetaY = y.copy()
        deltaX = x.copy()
        deltaY = y.copy()


        for i in range(len(alphaX)):
            if (alphaX[i] < 8) or (alphaX[i] > 13):
                alphaY[i] = 0

        for i in range(len(betaX)):
            if (betaX[i] < 14) or (betaX[i] > 40):
                betaY[i] = 0

        for i in range(len(gammaX)):
            if gammaX[i] < 40:
                gammaY[i] = 0

        for i in range(len(thetaX)):
            if (thetaX[i] < 4) or (thetaX[i] > 8):
                thetaY[i] = 0

        for i in range(len(deltaX)):
            if deltaX[i] > 4:
                deltaY[i] = 0

        #мощность, минимальное, максимальное, среднеквадратическое отклонение
        #для каждого ритма
        alphaY = [elem for elem in alphaY if elem > 0]
        ans.append(np.sum(alphaY))
        ans.append(np.min(alphaY))
        ans.append(np.max(alphaY))
        ans.append(np.std(alphaY))

        betaY = [elem for elem in betaY if elem > 0]
        ans.append(np.sum(betaY))
        ans.append(np.min(betaY))
        ans.append(np.max(betaY))
        ans.append(np.std(betaY))

        gammaY = [elem for elem in gammaY if elem > 0]
        ans.append(np.sum(gammaY))
        ans.append(np.min(gammaY))
        ans.append(np.max(gammaY))
        ans.append(np.std(gammaY))
        

        thetaY = [elem for elem in thetaY if elem > 0]
        ans.append(np.sum(thetaY))
        ans.append(np.min(thetaY))
        ans.append(np.max(thetaY))
        ans.append(np.std(thetaY))

        deltaY = [elem for elem in deltaY if elem > 0]
        ans.append(np.sum(deltaY))
        ans.append(np.min(deltaY))
        ans.append(np.max(deltaY))
        ans.append(np.std(deltaY))

        #отношение мощностей ритмов друг к другу
        ans.append(np.sum(alphaY)/np.sum(betaY))
        ans.append(np.sum(alphaY)/np.sum(gammaY))
        ans.append(np.sum(alphaY)/np.sum(thetaY))
        ans.append(np.sum(alphaY)/np.sum(deltaY))
        ans.append(np.sum(betaY)/np.sum(gammaY))
        ans.append(np.sum(betaY)/np.sum(thetaY))
        ans.append(np.sum(betaY)/np.sum(deltaY))
        ans.append(np.sum(gammaY)/np.sum(thetaY))
        ans.append(np.sum(gammaY)/np.sum(deltaY))
        ans.append(np.sum(thetaY)/np.sum(deltaY))    
        
        result.append(ans)
        if j % 100==0:
            print(j)


0
100
200
300
400
500
600
700
800
900
1000
1100
1200
0
100
200
300
400
500
600
700
800
900
1000
1100
1200


In [7]:
data_fp1_fp2 = pd.DataFrame(result[:1280]).join(pd.DataFrame(result[1280:]), rsuffix='_') 
data_fp1_fp2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25_,26_,27_,28_,29_,30_,31_,32_,33_,34_
0,-0.028259,16.481104,-21.300121,3.889422,7.582229,184397.752704,56.389513,1918.256278,302.331620,424625.515731,...,0.447299,4.203771,0.909791,26.993846,9.398128,2.033967,60.348583,0.216423,6.421341,29.670385
1,-0.082712,17.495588,-21.785598,4.260994,7.583756,206373.978669,46.303658,1723.100135,335.499667,442735.645200,...,0.516039,4.351411,0.886788,20.156147,8.432330,1.718451,39.059352,0.203793,4.632095,22.729390
2,0.060689,20.255720,-25.717631,4.317223,7.553287,213100.525157,40.933989,1932.797237,355.249766,461431.080072,...,0.547275,5.090716,0.926878,19.001653,9.301939,1.693625,34.720505,0.182072,3.732609,20.500705
3,-0.027773,17.303013,-23.487709,3.961901,7.547502,189445.073326,23.804941,1702.297517,321.939043,412337.237965,...,0.489491,4.291089,0.825036,23.533134,8.766421,1.685496,48.076697,0.192267,5.484187,28.523771
4,-0.062537,15.558342,-16.637374,3.522773,7.605392,180425.627260,14.943784,1749.548503,292.406653,394931.920223,...,0.459411,4.516639,1.183565,32.865823,9.831368,2.576266,71.539044,0.262046,7.276611,27.768496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1275,-0.015068,17.973428,-20.996728,4.255723,7.721349,176377.949992,38.018807,1729.939499,324.032823,524708.781314,...,0.423426,2.550230,0.458850,11.030098,6.022840,1.083658,26.049623,0.179925,4.325139,24.038595
1276,-0.074093,20.405910,-30.047000,4.624853,7.782390,187317.194405,24.619376,1727.105607,326.241787,594520.243651,...,0.519095,3.404571,0.495414,8.118670,6.558665,0.954381,15.640040,0.145514,2.384638,16.387631
1277,-0.012218,19.687205,-20.918137,4.211918,7.735433,178690.631835,62.000548,1545.725836,269.183972,530299.228455,...,0.475112,2.526924,0.481724,5.423586,5.318587,1.013918,11.415388,0.190637,2.146320,11.258689
1278,0.048134,34.651109,-83.301852,5.774315,7.963808,195935.740642,22.013065,1949.490436,337.570686,885252.851963,...,0.351206,1.771482,0.547643,8.242653,5.043999,1.559323,23.469576,0.309144,4.652970,15.051134


In [7]:
data_fp1_fp2.to_csv('fp1_fp2_static_signs.csv')

In [8]:
data_all = pd.read_csv('fp1_fp2_static_signs.csv',index_col=0)
data_all

Unnamed: 0,mean,max,min,std,log_via_zero,sum_alpha,min_a,max_a,std_a,sum_beta,...,a/b1,a/g1,a/t1,a/d1,b/g1,b/t1,b/d1,g/t1,g/d1,t/d1
0,-0.028259,16.481104,-21.300121,3.889422,7.582229,184397.752704,56.389513,1918.256278,302.331620,424625.515731,...,0.447299,4.203771,0.909791,26.993846,9.398128,2.033967,60.348583,0.216423,6.421341,29.670385
1,-0.082712,17.495588,-21.785598,4.260994,7.583756,206373.978669,46.303658,1723.100135,335.499667,442735.645200,...,0.516039,4.351411,0.886788,20.156147,8.432330,1.718451,39.059352,0.203793,4.632095,22.729390
2,0.060689,20.255720,-25.717631,4.317223,7.553287,213100.525157,40.933989,1932.797237,355.249766,461431.080072,...,0.547275,5.090716,0.926878,19.001653,9.301939,1.693625,34.720505,0.182072,3.732609,20.500705
3,-0.027773,17.303013,-23.487709,3.961901,7.547502,189445.073326,23.804941,1702.297517,321.939043,412337.237965,...,0.489491,4.291089,0.825036,23.533134,8.766421,1.685496,48.076697,0.192267,5.484187,28.523771
4,-0.062537,15.558342,-16.637374,3.522773,7.605392,180425.627260,14.943784,1749.548503,292.406653,394931.920223,...,0.459411,4.516639,1.183565,32.865823,9.831368,2.576266,71.539044,0.262046,7.276611,27.768496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1275,-0.015068,17.973428,-20.996728,4.255723,7.721349,176377.949992,38.018807,1729.939499,324.032823,524708.781314,...,0.423426,2.550230,0.458850,11.030098,6.022840,1.083658,26.049623,0.179925,4.325139,24.038595
1276,-0.074093,20.405910,-30.047000,4.624853,7.782390,187317.194405,24.619376,1727.105607,326.241787,594520.243651,...,0.519095,3.404571,0.495414,8.118670,6.558665,0.954381,15.640040,0.145514,2.384638,16.387631
1277,-0.012218,19.687205,-20.918137,4.211918,7.735433,178690.631835,62.000548,1545.725836,269.183972,530299.228455,...,0.475112,2.526924,0.481724,5.423586,5.318587,1.013918,11.415388,0.190637,2.146320,11.258689
1278,0.048134,34.651109,-83.301852,5.774315,7.963808,195935.740642,22.013065,1949.490436,337.570686,885252.851963,...,0.351206,1.771482,0.547643,8.242653,5.043999,1.559323,23.469576,0.309144,4.652970,15.051134


In [9]:
labels = pd.read_csv('deap_all_labels.csv',index_col=0)

## Построение моделей

In [11]:
X_train, X_test, y_train, y_test = train_test_split(data_all, labels['valence'], test_size=0.25, random_state=249) 

In [12]:
results_table = pd.DataFrame()
results_table['criterion'] = ['accuracy_score', 'f1_score', 'precision_score', 'recall_score']
results_table

Unnamed: 0,criterion
0,accuracy_score
1,f1_score
2,precision_score
3,recall_score


In [13]:
clf = svm.SVC(kernel='rbf',C=10)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [14]:
results_table['svc'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc
0,accuracy_score,0.603125
1,f1_score,0.744467
2,precision_score,0.598706
3,recall_score,0.984043


In [1261]:
# #метод опорных векторов с gridsearch
# parameters_svc = {'kernel':('linear', 'poly', 'rbf', 'sigmoid'),'C':[0.0001, 1000], 'gamma':('scale', 'auto')} #'kernel':('linear', 'rbf'),
# svc = svm.SVC()
# clf = GridSearchCV(svc, parameters_svc, scoring = 'accuracy')
# clf.fit(X_train, y_train)
# results = clf.predict(X_test)
# clf.best_params_

In [1262]:
# results_table['svc_grid'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
# results_table

In [15]:
tree = DecisionTreeClassifier(criterion='entropy',
 max_depth=8,
 min_samples_split=15,
 min_samples_leaf=5)
tree.fit(X_train, y_train)
results = tree.predict(X_test)

In [16]:
results_table['tree'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,tree
0,accuracy_score,0.603125,0.578125
1,f1_score,0.744467,0.65995
2,precision_score,0.598706,0.626794
3,recall_score,0.984043,0.696809


In [17]:
clf = RandomForestClassifier(criterion='entropy',
 n_estimators=15,
 max_depth=8,
 min_samples_leaf= 12,
 min_samples_split = 5)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [18]:
results_table['forest'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,tree,forest
0,accuracy_score,0.603125,0.578125,0.6
1,f1_score,0.744467,0.65995,0.683168
2,precision_score,0.598706,0.626794,0.638889
3,recall_score,0.984043,0.696809,0.734043


In [19]:
clf = KNeighborsClassifier(n_neighbors=55,weights='distance',algorithm='auto')
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [20]:
results_table['knn'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn
0,accuracy_score,0.603125,0.578125,0.6,0.6375
1,f1_score,0.744467,0.65995,0.683168,0.743363
2,precision_score,0.598706,0.626794,0.638889,0.636364
3,recall_score,0.984043,0.696809,0.734043,0.893617


In [1269]:
# best = 0.52
# best_random= 0
# params = {}
# for i in range(0,500):
#     X_train, X_test, y_train, y_test = train_test_split(data_all, labels['valence'], test_size=0.25, random_state=i)
#     clf = KNeighborsClassifier(n_neighbors=25,weights='distance',algorithm='auto')
#     clf.fit(X_train, y_train)
#     results = clf.predict(X_test)
#     if accuracy_score(y_test,results) > best:
#         best = accuracy_score(y_test,results)
#         best_random = i
#     if i % 10 == 0:
#         print(i)

In [21]:
data_nb_x_train = MinMaxScaler().fit(X_train).transform(X_train)
data_nb_x_test = MinMaxScaler().fit(X_test).transform(X_test)
clf = MultinomialNB(alpha=65)
clf.fit(data_nb_x_train, y_train)
results = clf.predict(data_nb_x_test)

In [22]:
results_table['nb'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb
0,accuracy_score,0.603125,0.578125,0.6,0.6375,0.590625
1,f1_score,0.744467,0.65995,0.683168,0.743363,0.713348
2,precision_score,0.598706,0.626794,0.638889,0.636364,0.605948
3,recall_score,0.984043,0.696809,0.734043,0.893617,0.867021


In [25]:
svc = svm.SVC(kernel='rbf',C=10)
clf = BaggingClassifier(base_estimator=svc, n_estimators=35, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [26]:
results_table['bagging_svm'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm
0,accuracy_score,0.603125,0.578125,0.6,0.6375,0.590625,0.60625
1,f1_score,0.744467,0.65995,0.683168,0.743363,0.713348,0.744939
2,precision_score,0.598706,0.626794,0.638889,0.636364,0.605948,0.601307
3,recall_score,0.984043,0.696809,0.734043,0.893617,0.867021,0.978723


In [27]:
tree = DecisionTreeClassifier(criterion='entropy',
 max_depth=8,
 min_samples_split=15,
 min_samples_leaf=5)
clf = BaggingClassifier(base_estimator=tree, n_estimators=35, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [28]:
results_table['bagging_tree'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm,bagging_tree
0,accuracy_score,0.603125,0.578125,0.6,0.6375,0.590625,0.60625,0.596875
1,f1_score,0.744467,0.65995,0.683168,0.743363,0.713348,0.744939,0.684597
2,precision_score,0.598706,0.626794,0.638889,0.636364,0.605948,0.601307,0.633484
3,recall_score,0.984043,0.696809,0.734043,0.893617,0.867021,0.978723,0.744681


In [29]:
forest = RandomForestClassifier(criterion='entropy',
 n_estimators=15,
 max_depth=8,
 min_samples_leaf= 12,
 min_samples_split = 5)
clf = BaggingClassifier(base_estimator=forest, n_estimators=15, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [30]:
results_table['bagging_forest'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm,bagging_tree,bagging_forest
0,accuracy_score,0.603125,0.578125,0.6,0.6375,0.590625,0.60625,0.596875,0.55625
1,f1_score,0.744467,0.65995,0.683168,0.743363,0.713348,0.744939,0.684597,0.658654
2,precision_score,0.598706,0.626794,0.638889,0.636364,0.605948,0.601307,0.633484,0.600877
3,recall_score,0.984043,0.696809,0.734043,0.893617,0.867021,0.978723,0.744681,0.728723


In [31]:
estimators = [
    ('rf', RandomForestClassifier(criterion='entropy',
 n_estimators=15,
 max_depth=8,
 min_samples_leaf= 12,
 min_samples_split = 5)),
    ('dt', DecisionTreeClassifier(criterion='entropy',
 max_depth=8,
 min_samples_split=15,
 min_samples_leaf=5)),
    ('svc', svm.SVC(kernel='rbf',C=10)),
    ('knn',KNeighborsClassifier(n_neighbors=55,weights='distance',algorithm='auto'))
    ]

clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()) 
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [32]:
results_table['stacking'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm,bagging_tree,bagging_forest,stacking
0,accuracy_score,0.603125,0.578125,0.6,0.6375,0.590625,0.60625,0.596875,0.55625,0.609375
1,f1_score,0.744467,0.65995,0.683168,0.743363,0.713348,0.744939,0.684597,0.658654,0.74645
2,precision_score,0.598706,0.626794,0.638889,0.636364,0.605948,0.601307,0.633484,0.600877,0.603279
3,recall_score,0.984043,0.696809,0.734043,0.893617,0.867021,0.978723,0.744681,0.728723,0.978723


In [33]:
tree = DecisionTreeClassifier(criterion='entropy',
 max_depth=8,
 min_samples_split=15,
 min_samples_leaf=5)
clf = AdaBoostClassifier(base_estimator=tree,n_estimators=35, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [34]:
results_table['boost_tree'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm,bagging_tree,bagging_forest,stacking,boost_tree
0,accuracy_score,0.603125,0.578125,0.6,0.6375,0.590625,0.60625,0.596875,0.55625,0.609375,0.590625
1,f1_score,0.744467,0.65995,0.683168,0.743363,0.713348,0.744939,0.684597,0.658654,0.74645,0.666667
2,precision_score,0.598706,0.626794,0.638889,0.636364,0.605948,0.601307,0.633484,0.600877,0.603279,0.639024
3,recall_score,0.984043,0.696809,0.734043,0.893617,0.867021,0.978723,0.744681,0.728723,0.978723,0.696809


In [35]:
tree = RandomForestClassifier(criterion='entropy',
 n_estimators=15,
 max_depth=8,
 min_samples_leaf= 12,
 min_samples_split = 5)
clf = AdaBoostClassifier(base_estimator=tree,n_estimators=10, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [36]:
results_table['boost_forest'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm,bagging_tree,bagging_forest,stacking,boost_tree,boost_forest
0,accuracy_score,0.603125,0.578125,0.6,0.6375,0.590625,0.60625,0.596875,0.55625,0.609375,0.590625,0.609375
1,f1_score,0.744467,0.65995,0.683168,0.743363,0.713348,0.744939,0.684597,0.658654,0.74645,0.666667,0.664879
2,precision_score,0.598706,0.626794,0.638889,0.636364,0.605948,0.601307,0.633484,0.600877,0.603279,0.639024,0.67027
3,recall_score,0.984043,0.696809,0.734043,0.893617,0.867021,0.978723,0.744681,0.728723,0.978723,0.696809,0.659574


In [118]:
results_table.to_csv('result_fp1_fp2_static_valence.csv')