In [36]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.fft import irfft, rfft, rfftfreq
import scipy.io
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn import svm
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression

In [3]:
files = ['bioradio_fp1_all.csv','bioradio_fp2_all.csv'] 

result = []
for file in files:
    data = pd.read_csv(file,index_col=0)
    
    for j in range(len(data)):
        data_np = data.iloc[j].to_numpy()
            
        ans = []
        ans.append(data_np.mean())
        ans.append(data_np.max())
        ans.append(data_np.min())
        ans.append(data_np.std())
        
        count = 0
        for i in range(len(data_np)):
            if (data_np[i-1] >= 0 and data_np[i] < 0) or (data_np[i-1] <= 0 and data_np[i] > 0):
                count += 1
        ans.append(np.log(count))
        
        y = np.abs(rfft(data_np)) # мощность волн определённой частоты 
        x = rfftfreq(len(data_np), 1 / 128) # список всех частот 

        alphaX = x.copy()
        alphaY = y.copy()
        betaX = x.copy()
        betaY = y.copy()
        gammaX = x.copy()
        gammaY = y.copy()
        thetaX = x.copy()
        thetaY = y.copy()
        deltaX = x.copy()
        deltaY = y.copy()


        for i in range(len(alphaX)):
            if (alphaX[i] < 8) or (alphaX[i] > 13):
                alphaY[i] = 0

        for i in range(len(betaX)):
            if (betaX[i] < 14) or (betaX[i] > 40):
                betaY[i] = 0

        for i in range(len(gammaX)):
            if gammaX[i] < 40:
                gammaY[i] = 0

        for i in range(len(thetaX)):
            if (thetaX[i] < 4) or (thetaX[i] > 8):
                thetaY[i] = 0

        for i in range(len(deltaX)):
            if deltaX[i] > 4:
                deltaY[i] = 0


        alphaY = [elem for elem in alphaY if elem > 0]
        ans.append(np.sum(alphaY))
        ans.append(np.min(alphaY))
        ans.append(np.max(alphaY))
        ans.append(np.std(alphaY))

        betaY = [elem for elem in betaY if elem > 0]
        ans.append(np.sum(betaY))
        ans.append(np.min(betaY))
        ans.append(np.max(betaY))
        ans.append(np.std(betaY))

        gammaY = [elem for elem in gammaY if elem > 0]
        ans.append(np.sum(gammaY))
        ans.append(np.min(gammaY))
        ans.append(np.max(gammaY))
        ans.append(np.std(gammaY))
        

        thetaY = [elem for elem in thetaY if elem > 0]
        ans.append(np.sum(thetaY))
        ans.append(np.min(thetaY))
        ans.append(np.max(thetaY))
        ans.append(np.std(thetaY))

        deltaY = [elem for elem in deltaY if elem > 0]
        ans.append(np.sum(deltaY))
        ans.append(np.min(deltaY))
        ans.append(np.max(deltaY))
        ans.append(np.std(deltaY))

        ans.append(np.sum(alphaY)/np.sum(betaY))
        ans.append(np.sum(alphaY)/np.sum(gammaY))
        ans.append(np.sum(alphaY)/np.sum(thetaY))
        ans.append(np.sum(alphaY)/np.sum(deltaY))
        ans.append(np.sum(betaY)/np.sum(gammaY))
        ans.append(np.sum(betaY)/np.sum(thetaY))
        ans.append(np.sum(betaY)/np.sum(deltaY))
        ans.append(np.sum(gammaY)/np.sum(thetaY))
        ans.append(np.sum(gammaY)/np.sum(deltaY))
        ans.append(np.sum(thetaY)/np.sum(deltaY))    
        
        result.append(ans)


In [4]:
data_fp1_fp2 = pd.DataFrame(result[:19]).join(pd.DataFrame(result[19:]), rsuffix='_') 
data_fp1_fp2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25_,26_,27_,28_,29_,30_,31_,32_,33_,34_
0,-2.429206e-16,108.250523,-136.338349,36.385666,4.219508,20058.767499,322.24863,2093.89205,486.748374,37838.131079,...,0.443783,6.776485,0.718231,5789705000000000.0,15.269825,1.61843,1.304626e+16,0.105989,854381800000000.0,8061061000000000.0
1,1.214603e-16,81.343876,-91.414027,25.915732,4.276666,15931.490071,182.974237,1426.224231,339.975711,32328.77447,...,0.434663,4.18401,0.587707,4766507000000000.0,9.625867,1.352097,1.096598e+16,0.140465,1139220000000000.0,8110346000000000.0
2,-1.821904e-16,92.717765,-68.04798,21.719664,4.406719,15470.92669,157.877314,1720.965576,436.796082,28643.495308,...,0.475619,5.248041,0.665438,5131550000000000.0,11.034116,1.399097,1.078919e+16,0.126797,977803000000000.0,7711538000000000.0
3,-2.429206e-16,102.182135,-60.867353,22.013051,4.564348,13299.966939,74.81944,1959.596675,485.391284,33998.60107,...,0.418017,4.584766,0.706576,5057659000000000.0,10.967884,1.690304,1.209916e+16,0.154114,1103144000000000.0,7157980000000000.0
4,2.732857e-16,71.439914,-42.690381,17.643253,4.477337,12230.957131,160.923224,1084.282658,290.337712,24976.366916,...,0.430345,5.031573,0.974284,6292811000000000.0,11.691956,2.263962,1.462272e+16,0.193634,1250665000000000.0,6458905000000000.0
5,-6.073015e-17,72.712237,-54.02601,21.701296,4.406719,13959.16898,82.976011,1862.633428,464.391165,29193.046574,...,0.466947,5.518278,0.884003,7389978000000000.0,11.81779,1.893157,1.582617e+16,0.160196,1339182000000000.0,8359672000000000.0
6,-1.214603e-16,115.825321,-109.626656,25.797008,4.584967,17564.812732,320.909911,2409.949338,473.771379,38224.665867,...,0.554189,4.967581,0.916271,6306888000000000.0,8.963698,1.653355,1.13804e+16,0.18445,1269609000000000.0,6883215000000000.0
7,-6.073015e-17,83.961285,-39.943493,23.0471,4.382027,12463.341237,159.617387,1623.211171,411.097888,34566.420327,...,0.353414,4.052104,0.648192,6110645000000000.0,11.465598,1.834085,1.729033e+16,0.159964,1508018000000000.0,9427222000000000.0
8,-1.457524e-15,137.526124,-242.900578,37.529859,4.304065,27244.472374,368.010298,2819.399341,651.174117,55494.927932,...,0.393976,5.117807,0.814386,5939607000000000.0,12.990135,2.067092,1.507605e+16,0.159128,1160577000000000.0,7293360000000000.0
9,3.643809e-16,130.64746,-69.65773,24.798539,4.382027,15312.752882,228.374275,2166.920587,452.602954,37131.131341,...,0.403003,4.067915,0.744706,6480118000000000.0,10.094005,1.847891,1.607958e+16,0.183068,1592983000000000.0,8701584000000000.0


In [6]:
data_fp1_fp2.to_csv('fp1_fp2_bioradio_1_static_signs.csv')

In [15]:
data_bioradio = pd.read_csv('fp1_fp2_bioradio_1_static_signs.csv',index_col=0)
data_bioradio

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25_,26_,27_,28_,29_,30_,31_,32_,33_,34_
0,-2.429206e-16,108.250523,-136.338349,36.385666,4.219508,20058.767499,322.24863,2093.89205,486.748374,37838.131079,...,0.443783,6.776485,0.718231,5789705000000000.0,15.269825,1.61843,1.304626e+16,0.105989,854381800000000.0,8061061000000000.0
1,1.214603e-16,81.343876,-91.414027,25.915732,4.276666,15931.490071,182.974237,1426.224231,339.975711,32328.77447,...,0.434663,4.18401,0.587707,4766507000000000.0,9.625867,1.352097,1.096598e+16,0.140465,1139220000000000.0,8110346000000000.0
2,-1.821904e-16,92.717765,-68.04798,21.719664,4.406719,15470.92669,157.877314,1720.965576,436.796082,28643.495308,...,0.475619,5.248041,0.665438,5131550000000000.0,11.034116,1.399097,1.078919e+16,0.126797,977803000000000.0,7711538000000000.0
3,-2.429206e-16,102.182135,-60.867353,22.013051,4.564348,13299.966939,74.81944,1959.596675,485.391284,33998.60107,...,0.418017,4.584766,0.706576,5057659000000000.0,10.967884,1.690304,1.209916e+16,0.154114,1103144000000000.0,7157980000000000.0
4,2.732857e-16,71.439914,-42.690381,17.643253,4.477337,12230.957131,160.923224,1084.282658,290.337712,24976.366916,...,0.430345,5.031573,0.974284,6292811000000000.0,11.691956,2.263962,1.462272e+16,0.193634,1250665000000000.0,6458905000000000.0
5,-6.073015e-17,72.712237,-54.02601,21.701296,4.406719,13959.16898,82.976011,1862.633428,464.391165,29193.046574,...,0.466947,5.518278,0.884003,7389978000000000.0,11.81779,1.893157,1.582617e+16,0.160196,1339182000000000.0,8359672000000000.0
6,-1.214603e-16,115.825321,-109.626656,25.797008,4.584967,17564.812732,320.909911,2409.949338,473.771379,38224.665867,...,0.554189,4.967581,0.916271,6306888000000000.0,8.963698,1.653355,1.13804e+16,0.18445,1269609000000000.0,6883215000000000.0
7,-6.073015e-17,83.961285,-39.943493,23.0471,4.382027,12463.341237,159.617387,1623.211171,411.097888,34566.420327,...,0.353414,4.052104,0.648192,6110645000000000.0,11.465598,1.834085,1.729033e+16,0.159964,1508018000000000.0,9427222000000000.0
8,-1.457524e-15,137.526124,-242.900578,37.529859,4.304065,27244.472374,368.010298,2819.399341,651.174117,55494.927932,...,0.393976,5.117807,0.814386,5939607000000000.0,12.990135,2.067092,1.507605e+16,0.159128,1160577000000000.0,7293360000000000.0
9,3.643809e-16,130.64746,-69.65773,24.798539,4.382027,15312.752882,228.374275,2166.920587,452.602954,37131.131341,...,0.403003,4.067915,0.744706,6480118000000000.0,10.094005,1.847891,1.607958e+16,0.183068,1592983000000000.0,8701584000000000.0


In [28]:
labels_bioradio = pd.read_csv('bioradio_raiting_all.csv',index_col=0)
labels_bioradio

Unnamed: 0,valence,arousal,dominance,liking,valence_model,arousal_model
0,7,7,6,4,1,1
1,5,7,3,2,1,1
2,7,3,6,3,1,0
3,2,2,2,1,0,0
4,5,7,8,2,1,1
5,7,3,3,1,1,0
6,8,3,5,3,1,0
7,7,2,3,4,1,0
8,8,7,5,4,1,1
9,5,1,4,1,1,0


In [29]:
data_all = pd.read_csv('fp1_fp2_static_signs.csv',index_col=0)

In [30]:
labels = pd.read_csv('deap_all_labels.csv',index_col=0)

## Построение моделей

In [96]:
X_train, X_test, y_train, y_test = train_test_split(data_all, labels['valence'], test_size=0.25, random_state=249) 

In [97]:
results_table = pd.DataFrame()
results_table['criterion'] = ['accuracy_score', 'f1_score', 'precision_score', 'recall_score']
results_table

Unnamed: 0,criterion
0,accuracy_score
1,f1_score
2,precision_score
3,recall_score


In [98]:
clf = svm.SVC(kernel='rbf',C=10)
clf.fit(X_train, y_train)
results = clf.predict(data_bioradio)

In [99]:
results_table['svc'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                       precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc
0,accuracy_score,0.842105
1,f1_score,0.914286
2,precision_score,0.842105
3,recall_score,1.0


In [100]:
# #метод опорных векторов с gridsearch
# parameters_svc = {'kernel':('linear', 'poly', 'rbf', 'sigmoid'),'C':[0.0001, 1000], 'gamma':('scale', 'auto')} #'kernel':('linear', 'rbf'),
# svc = svm.SVC()
# clf = GridSearchCV(svc, parameters_svc, scoring = 'accuracy')
# clf.fit(X_train, y_train)
# results = clf.predict(X_test)
# clf.best_params_

In [101]:
# results_table['svc_grid'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
# results_table

In [102]:
tree = DecisionTreeClassifier(criterion='entropy',
 max_depth=8,
 min_samples_split=15,
 min_samples_leaf=5)
tree.fit(X_train, y_train)
results = tree.predict(data_bioradio)

In [103]:
results_table['tree'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                       precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc,tree
0,accuracy_score,0.842105,0.736842
1,f1_score,0.914286,0.83871
2,precision_score,0.842105,0.866667
3,recall_score,1.0,0.8125


In [107]:
clf = RandomForestClassifier(criterion='entropy',
 n_estimators=15,
 max_depth=8,
 min_samples_leaf= 12,
 min_samples_split = 5)
clf.fit(X_train, y_train)
results = clf.predict(data_bioradio)

In [108]:
results_table['forest'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                       precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc,tree,forest
0,accuracy_score,0.842105,0.736842,0.684211
1,f1_score,0.914286,0.83871,0.8125
2,precision_score,0.842105,0.866667,0.8125
3,recall_score,1.0,0.8125,0.8125


In [109]:
clf = KNeighborsClassifier(n_neighbors=55,weights='distance',algorithm='auto')
clf.fit(X_train, y_train)
results = clf.predict(data_bioradio)

In [110]:
results_table['knn'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                       precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn
0,accuracy_score,0.842105,0.736842,0.684211,0.842105
1,f1_score,0.914286,0.83871,0.8125,0.914286
2,precision_score,0.842105,0.866667,0.8125,0.842105
3,recall_score,1.0,0.8125,0.8125,1.0


In [111]:
# best = 0.52
# best_random= 0
# params = {}
# for i in range(1,500):
#     clf = svm.SVC(kernel='rbf',C=i)
#     clf.fit(X_train, y_train)
#     results = clf.predict(X_test)
#     if accuracy_score(y_test,results) > best:
#         best = accuracy_score(y_test,results)
#         best_random = i
#     if i % 10 == 0:
#         print(i)

In [112]:
data_nb_x_train = MinMaxScaler().fit(X_train).transform(X_train)
data_nb_x_test = MinMaxScaler().fit(data_bioradio).transform(data_bioradio)
clf = MultinomialNB(alpha=65)
clf.fit(data_nb_x_train, y_train)
results = clf.predict(data_nb_x_test)

In [113]:
results_table['nb'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                       precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb
0,accuracy_score,0.842105,0.736842,0.684211,0.842105,0.736842
1,f1_score,0.914286,0.83871,0.8125,0.914286,0.83871
2,precision_score,0.842105,0.866667,0.8125,0.842105,0.866667
3,recall_score,1.0,0.8125,0.8125,1.0,0.8125


In [114]:
svc = svm.SVC(kernel='rbf',C=10)
clf = BaggingClassifier(base_estimator=svc, n_estimators=35, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(data_bioradio)

In [115]:
results_table['bagging_svm'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                               precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm
0,accuracy_score,0.842105,0.736842,0.684211,0.842105,0.736842,0.368421
1,f1_score,0.914286,0.83871,0.8125,0.914286,0.83871,0.5
2,precision_score,0.842105,0.866667,0.8125,0.842105,0.866667,0.75
3,recall_score,1.0,0.8125,0.8125,1.0,0.8125,0.375


In [116]:
tree = DecisionTreeClassifier(criterion='entropy',
 max_depth=8,
 min_samples_split=15,
 min_samples_leaf=5)
clf = BaggingClassifier(base_estimator=tree, n_estimators=35, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(data_bioradio)

In [117]:
results_table['bagging_tree'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                       precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm,bagging_tree
0,accuracy_score,0.842105,0.736842,0.684211,0.842105,0.736842,0.368421,0.842105
1,f1_score,0.914286,0.83871,0.8125,0.914286,0.83871,0.5,0.914286
2,precision_score,0.842105,0.866667,0.8125,0.842105,0.866667,0.75,0.842105
3,recall_score,1.0,0.8125,0.8125,1.0,0.8125,0.375,1.0


In [118]:
forest = RandomForestClassifier(criterion='entropy',
 n_estimators=15,
 max_depth=8,
 min_samples_leaf= 12,
 min_samples_split = 5)
clf = BaggingClassifier(base_estimator=forest, n_estimators=15, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(data_bioradio)

In [119]:
results_table['bagging_forest'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                       precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm,bagging_tree,bagging_forest
0,accuracy_score,0.842105,0.736842,0.684211,0.842105,0.736842,0.368421,0.842105,0.631579
1,f1_score,0.914286,0.83871,0.8125,0.914286,0.83871,0.5,0.914286,0.740741
2,precision_score,0.842105,0.866667,0.8125,0.842105,0.866667,0.75,0.842105,0.909091
3,recall_score,1.0,0.8125,0.8125,1.0,0.8125,0.375,1.0,0.625


In [120]:
estimators = [
    ('rf', RandomForestClassifier(criterion='entropy',
 n_estimators=15,
 max_depth=8,
 min_samples_leaf= 12,
 min_samples_split = 5)),
    ('dt', DecisionTreeClassifier(criterion='entropy',
 max_depth=8,
 min_samples_split=15,
 min_samples_leaf=5)),
    ('svc', svm.SVC(kernel='rbf',C=10)),
    ('knn',KNeighborsClassifier(n_neighbors=55,weights='distance',algorithm='auto'))
    ]

clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()) 
clf.fit(X_train, y_train)
results = clf.predict(data_bioradio)

In [121]:
results_table['stacking'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                       precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm,bagging_tree,bagging_forest,stacking
0,accuracy_score,0.842105,0.736842,0.684211,0.842105,0.736842,0.368421,0.842105,0.631579,0.842105
1,f1_score,0.914286,0.83871,0.8125,0.914286,0.83871,0.5,0.914286,0.740741,0.914286
2,precision_score,0.842105,0.866667,0.8125,0.842105,0.866667,0.75,0.842105,0.909091,0.842105
3,recall_score,1.0,0.8125,0.8125,1.0,0.8125,0.375,1.0,0.625,1.0


In [122]:
tree = DecisionTreeClassifier(criterion='entropy',
 max_depth=8,
 min_samples_split=15,
 min_samples_leaf=5)
clf = AdaBoostClassifier(base_estimator=tree,n_estimators=35, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(data_bioradio)

In [123]:
results_table['boost_tree'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                       precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm,bagging_tree,bagging_forest,stacking,boost_tree
0,accuracy_score,0.842105,0.736842,0.684211,0.842105,0.736842,0.368421,0.842105,0.631579,0.842105,0.736842
1,f1_score,0.914286,0.83871,0.8125,0.914286,0.83871,0.5,0.914286,0.740741,0.914286,0.848485
2,precision_score,0.842105,0.866667,0.8125,0.842105,0.866667,0.75,0.842105,0.909091,0.842105,0.823529
3,recall_score,1.0,0.8125,0.8125,1.0,0.8125,0.375,1.0,0.625,1.0,0.875


In [124]:
tree = RandomForestClassifier(criterion='entropy',
 n_estimators=15,
 max_depth=8,
 min_samples_leaf= 12,
 min_samples_split = 5)
clf = AdaBoostClassifier(base_estimator=tree,n_estimators=10, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(data_bioradio)

In [125]:
results_table['boost_forest'] = [accuracy_score(labels_bioradio['valence_model'],results), f1_score(labels_bioradio['valence_model'],results), \
                       precision_score(labels_bioradio['valence_model'],results), recall_score(labels_bioradio['valence_model'],results)]
results_table

Unnamed: 0,criterion,svc,tree,forest,knn,nb,bagging_svm,bagging_tree,bagging_forest,stacking,boost_tree,boost_forest
0,accuracy_score,0.842105,0.736842,0.684211,0.842105,0.736842,0.368421,0.842105,0.631579,0.842105,0.736842,0.842105
1,f1_score,0.914286,0.83871,0.8125,0.914286,0.83871,0.5,0.914286,0.740741,0.914286,0.848485,0.914286
2,precision_score,0.842105,0.866667,0.8125,0.842105,0.866667,0.75,0.842105,0.909091,0.842105,0.823529,0.842105
3,recall_score,1.0,0.8125,0.8125,1.0,0.8125,0.375,1.0,0.625,1.0,0.875,1.0
