In [1]:
import pywt
import pyedflib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal
from scipy.fft import irfft, rfft, rfftfreq
import scaleogram as scg 
from PIL import Image
from torchvision import transforms
import torch
import torch.nn as nn
from torchvision import models
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor
import io
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
#Загрузка данных с f3 канала
data_all = pd.read_csv('eeg_f3_all.csv',index_col=0)

In [3]:
#Загрузка данных меток
labels = pd.read_csv('deap_all_labels.csv',index_col=0)

In [4]:
#Формирование классов высокой и низкой valence
valence = []
for elem in labels['0']:
    if elem >= 5:
        valence.append(1)
    else:
        valence.append(0)
labels['valence'] = valence
labels

In [5]:
#Формирование классов высокой и низкой arousal и сохранение в файл
arousal = []
for elem in labels['1']:
    if elem >= 5:
        arousal.append(1)
    else:
        arousal.append(0)
labels['arousal'] = arousal
labels.to_csv('deap_all_labels.csv')

In [4]:
#Извлечение ритмов при помощи вейвлет преобразования
gamma_rhythm = []
beta_rhythm = []
alpha_rhythm = []
theta_rhythm = []
delta_rhythm = []
for i in range(len(data_all)):
    coeff = pywt.swt(data_all.iloc[i], 'db4', level=4)
    gamma_rhythm.append(coeff[3][1]) #Результат первого разложения 30-60Гц, Коэффициенты детализации 
    beta_rhythm.append(coeff[2][1]) #Результат второго разложения 15-30Гц, Коэффициенты детализации 
    alpha_rhythm.append(coeff[1][1]) #Результат третьего разложения 8-15Гц, Коэффициенты детализации 
    theta_rhythm.append(coeff[0][1]) #Результат четвортого разложения 4-8Гц, Коэффициенты детализации 
    delta_rhythm.append(coeff[0][0]) #Результат четвертого разложения 0-4Гц, Коэффициенты аппроксимации

In [18]:
# Альтернативное Извлечение ритмов при помощи преобразования Фурье 
# Разкомментировать Ctrl + /
# gamma_rhythm = []
# beta_rhythm = []
# alpha_rhythm = []
# theta_rhythm = []
# delta_rhythm = []
# for i in range(len(data_all)):
#     y = rfft(data_all.iloc[i].to_numpy()) # мощность волн определённой частоты 
#     x = rfftfreq(len(data_all.iloc[i].to_numpy()), 1 / 128) # список всех частот 

#     alphaX = x.copy()
#     alphaY = y.copy()
#     betaX = x.copy()
#     betaY = y.copy()
#     gammaX = x.copy()
#     gammaY = y.copy()
#     thetaX = x.copy()
#     thetaY = y.copy()
#     deltaX = x.copy()
#     deltaY = y.copy()


#     for i in range(len(alphaX)):
#         if (alphaX[i] < 8) or (alphaX[i] > 13):
#             alphaY[i] = 0
        
#     for i in range(len(betaX)):
#         if (betaX[i] < 14) or (betaX[i] > 40):
#             betaY[i] = 0
        
#     for i in range(len(gammaX)):
#         if gammaX[i] < 40:
#             gammaY[i] = 0

#     for i in range(len(thetaX)):
#         if (thetaX[i] < 4) or (thetaX[i] > 8):
#             thetaY[i] = 0
    
#     for i in range(len(deltaX)):
#         if deltaX[i] > 4:
#             deltaY[i] = 0
    
#     filteredAlpha = irfft(alphaY) # Восстановление графика ЭЭГ по частотам
#     filteredBeta = irfft(betaY)
#     filteredGamma = irfft(gammaY) 
#     filteredTheta = irfft(thetaY)
#     filteredDelta = irfft(deltaY)
    
#     gamma_rhythm.append(filteredGamma)
#     beta_rhythm.append(filteredBeta)
#     alpha_rhythm.append(filteredAlpha)
#     theta_rhythm.append(filteredTheta)
#     delta_rhythm.append(filteredDelta)
    

In [19]:
# Загрузка модели alexnet
model = models.alexnet(pretrained=True)
train_nodes, eval_nodes = get_graph_node_names(model)
create_feature_extractor(model, return_nodes=eval_nodes) # просмотр структуры

AlexNet(
  (features): Module(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Module(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): 

In [20]:
# Получение новой модели alexnet для извлечения признаков с классификатора №3
new_model = create_feature_extractor(model, return_nodes=['classifier.3'])

In [21]:
# Загрузка модели VGG16
model_vgg = models.vgg16(pretrained=True)
train_nodes_vgg, eval_nodes_vgg = get_graph_node_names(model_vgg)
create_feature_extractor(model_vgg, return_nodes=['classifier.2']) #Просмотр структуры

VGG(
  (features): Module(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ce

In [22]:
# Получение новой модели VGG16 для извлечения признаков с классификатора №2
new_model_vgg = create_feature_extractor(model_vgg, return_nodes=['classifier.2']) 

In [25]:
#Вспомогательная функция для преобразования изображения в размер 244x244
transform = transforms.Compose([
  transforms.Resize([224,224]),
  transforms.ToTensor()                              
])

In [24]:
#Массивы для полученных признаков из CNN
input_svm_beta_alex = []
input_svm_beta_vgg = []

In [25]:
coikw = {'alpha': 0.5, 'hatch': '/'}

#Занимает много памяти в процессе, я делала каждые 100 в цикле (0,100),(100,200)..

for i in range(0,1280):
    #Преобразование ритма в изображение
    plot = scg.cws(beta_rhythm[i], figsize = (3.12, 3.12), coikw=coikw, cbar=None)
    #Очистка графика от осей, заголовков
    plot.invert_yaxis()
    plot.set_title(None)
    plot.set_xlabel(None)
    plot.set_ylabel(None)
    plot.axis('off')

    #Сохранение изображения
    img_buf = io.BytesIO()
    plot.get_figure().savefig(img_buf, format='png')
    plt.close('all')

    #Загрузка изображения
    im = Image.open(img_buf)
    im = im.convert('RGB')
    
    #Преобразование данных изображения в формат tensor [1,3,224,224]
    input_tensor = transform(im)
    input_batch = input_tensor.unsqueeze(0) 
    img_buf.close()

    #Извлечение признаков
    features = new_model(input_batch)
    featuresVGG = new_model_vgg(input_batch)
    
    #Сохранение признаков
    input_svm_beta_alex.append(features['classifier.3'][0].detach().numpy())
    input_svm_beta_vgg.append(featuresVGG['classifier.2'][0].detach().numpy())
    
    #Для отслеживания в процессе цикла
    if i % 50 == 0:
        print(i)

0


In [None]:
#Сохранение в файл полученных признаков
pd.DataFrame(input_svm_beta_alex).to_csv('f3_alexnet_beta_fc6_clf3.csv')
pd.DataFrame(input_svm_beta_vgg).to_csv('f3_vgg_beta_fc6_clf2.csv')
print(len(input_svm_beta_alex),len(input_svm_beta_vgg))

## Обучение моделей

In [241]:
#Загрузка признаков 
f3_beta_alexnet = pd.read_csv('f3_alexnet_beta_fc6_clf3.csv',index_col=0)
f3_beta_alexnet

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
0,0.0,0.683075,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,5.340591,0.000000,0.000000,0.000000,1.431042,0.000000,0.980753,0.0,0.000000,0.0
1,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.644496,...,0.000000,3.651717,0.376836,0.000000,0.000000,0.000000,0.295091,0.0,3.233924,0.0
2,0.0,3.104819,0.0,0.0,0.0,0.0,2.005969,0.0,0.0,0.000000,...,0.000000,5.430515,0.000000,0.492934,0.000000,0.830450,0.000000,0.0,3.254239,0.0
3,0.0,4.475897,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,4.776441,0.000000,2.937954,0.000000,0.000000,0.000000,0.0,0.000000,0.0
4,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,4.180420,...,5.875127,4.546699,0.000000,1.076356,0.000000,0.000000,0.000000,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1275,0.0,1.056538,0.0,0.0,0.0,0.0,0.194881,0.0,0.0,0.000000,...,0.000000,0.000000,2.909383,0.000000,0.000000,1.170804,0.000000,0.0,0.000000,0.0
1276,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,8.956781,0.000000,2.158442,0.000000,0.000000,0.000000,0.000000,0.0,2.122507,0.0
1277,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,...,0.000000,0.000000,3.258988,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0
1278,0.0,0.000000,0.0,0.0,0.0,0.0,0.587281,0.0,0.0,0.674846,...,4.237354,3.548856,3.429468,0.920785,0.000000,2.718819,0.000000,0.0,0.000000,0.0


In [242]:
#Деление на тестовую и обучающую выборки
#f3_alexnet_beta_fc6_clf3 - random_state 985,814,1096
#f3_vgg_beta_fc6_clf2 - random_state 39,165
#f3_alexnet_beta_fc6_clf3 - random_state 80,795
X_train, X_test, y_train, y_test = train_test_split(f3_beta_alexnet, labels['valence'], test_size=0.25, random_state=985) #, stratify=labels['valence']

In [33]:
#Таблица результатов
results_table = pd.DataFrame()
results_table['criterion'] = ['accuracy_score', 'f1_score', 'precision_score', 'recall_score']
results_table

Unnamed: 0,criterion
0,accuracy_score
1,f1_score
2,precision_score
3,recall_score


## Метод опорных векторов

In [243]:
clf = svm.SVC(kernel='rbf',C=25)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [35]:
results_table['svc'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc
0,accuracy_score,0.61875
1,f1_score,0.695
2,precision_score,0.643519
3,recall_score,0.755435


In [36]:
#метод опорных векторов с gridsearch
parameters_svc = {'kernel':('linear', 'poly', 'rbf', 'sigmoid'),'C':[0.0001, 1000], 'gamma':('scale', 'auto')} #'kernel':('linear', 'rbf'),
svc = svm.SVC()
clf = GridSearchCV(svc, parameters_svc, scoring = 'accuracy')
clf.fit(X_train, y_train)
results = clf.predict(X_test)
clf.best_params_

{'C': 0.0001, 'gamma': 'scale', 'kernel': 'poly'}

In [37]:
results_table['svc_grid'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid
0,accuracy_score,0.61875,0.575
1,f1_score,0.695,0.730159
2,precision_score,0.643519,0.575
3,recall_score,0.755435,1.0


In [38]:
print(classification_report(y_test,results))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       136
           1       0.57      1.00      0.73       184

    accuracy                           0.57       320
   macro avg       0.29      0.50      0.37       320
weighted avg       0.33      0.57      0.42       320



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
#Поиск random_state
# best = 0.55
# best_random= 0
# params = {}
# for i in range(0,1200):
#     X_train, X_test, y_train, y_test = train_test_split(f3_beta_alexnet, labels['valence'], test_size=0.25, random_state=i)
#     clf = svm.SVC(kernel='rbf',C=3)
#     clf.fit(X_train, y_train)
#     results = clf.predict(X_test)
#     if accuracy_score(y_test,results) > best:
#         best = accuracy_score(y_test,results)
#         best_random = i
#     if i % 10 == 0:
#         print(i)

In [None]:
# best_random

## Дерево решений

In [53]:
tree = DecisionTreeClassifier(criterion='gini',
 max_depth=3,
 min_samples_split=15,
 min_samples_leaf=10)
tree.fit(X_train, y_train)
results = tree.predict(X_test)

In [54]:
results_table['tree'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid,tree
0,accuracy_score,0.61875,0.575,0.565625
1,f1_score,0.695,0.730159,0.712215
2,precision_score,0.643519,0.575,0.575251
3,recall_score,0.755435,1.0,0.934783


In [55]:
print(classification_report(y_test,results))

              precision    recall  f1-score   support

           0       0.43      0.07      0.11       136
           1       0.58      0.93      0.71       184

    accuracy                           0.57       320
   macro avg       0.50      0.50      0.41       320
weighted avg       0.51      0.57      0.46       320



## Случайный лес

In [134]:
clf = RandomForestClassifier(criterion='gini',
 n_estimators=4,
 max_depth=2,
 min_samples_leaf= 15,
 min_samples_split = 10)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [135]:
results_table['forest'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid,tree,forest
0,accuracy_score,0.61875,0.575,0.565625,0.571875
1,f1_score,0.695,0.730159,0.712215,0.713987
2,precision_score,0.643519,0.575,0.575251,0.579661
3,recall_score,0.755435,1.0,0.934783,0.929348


## Наивный Байесовский классификатор

In [146]:
clf = MultinomialNB(alpha=1)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [147]:
results_table['nb'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid,tree,forest,nb
0,accuracy_score,0.61875,0.575,0.565625,0.571875,0.59375
1,f1_score,0.695,0.730159,0.712215,0.713987,0.661458
2,precision_score,0.643519,0.575,0.575251,0.579661,0.635
3,recall_score,0.755435,1.0,0.934783,0.929348,0.690217


## Метод ближайших соседей

In [178]:
clf = KNeighborsClassifier(n_neighbors=100,weights='distance',algorithm='auto')
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [179]:
results_table['knn'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid,tree,forest,nb,knn
0,accuracy_score,0.61875,0.575,0.565625,0.571875,0.59375,0.56875
1,f1_score,0.695,0.730159,0.712215,0.713987,0.661458,0.724
2,precision_score,0.643519,0.575,0.575251,0.579661,0.635,0.572785
3,recall_score,0.755435,1.0,0.934783,0.929348,0.690217,0.983696


## Bagging SVM

In [183]:
svc = svm.SVC(kernel='rbf',C=5)
clf = BaggingClassifier(base_estimator=svc, n_estimators=35, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [184]:
results_table['bagging_svm'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid,tree,forest,nb,knn,bagging_svm
0,accuracy_score,0.61875,0.575,0.565625,0.571875,0.59375,0.56875,0.625
1,f1_score,0.695,0.730159,0.712215,0.713987,0.661458,0.724,0.710145
2,precision_score,0.643519,0.575,0.575251,0.579661,0.635,0.572785,0.63913
3,recall_score,0.755435,1.0,0.934783,0.929348,0.690217,0.983696,0.798913


## Bagging дерево решений

In [187]:
tree = DecisionTreeClassifier(criterion='gini',
 max_depth=3,
 min_samples_split=15,
 min_samples_leaf=10)
clf = BaggingClassifier(base_estimator=tree, n_estimators=35, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [188]:
results_table['bagging_tree'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid,tree,forest,nb,knn,bagging_svm,bagging_tree
0,accuracy_score,0.61875,0.575,0.565625,0.571875,0.59375,0.56875,0.625,0.5875
1,f1_score,0.695,0.730159,0.712215,0.713987,0.661458,0.724,0.710145,0.729508
2,precision_score,0.643519,0.575,0.575251,0.579661,0.635,0.572785,0.63913,0.585526
3,recall_score,0.755435,1.0,0.934783,0.929348,0.690217,0.983696,0.798913,0.967391


## Bagging случайный лес

In [195]:
forest = RandomForestClassifier(criterion='gini',
 n_estimators=4,
 max_depth=2,
 min_samples_leaf= 15,
 min_samples_split = 10)
clf = BaggingClassifier(base_estimator=forest, n_estimators=5, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [196]:
results_table['bagging_forest'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid,tree,forest,nb,knn,bagging_svm,bagging_tree,bagging_forest
0,accuracy_score,0.61875,0.575,0.565625,0.571875,0.59375,0.56875,0.625,0.5875,0.58125
1,f1_score,0.695,0.730159,0.712215,0.713987,0.661458,0.724,0.710145,0.729508,0.72541
2,precision_score,0.643519,0.575,0.575251,0.579661,0.635,0.572785,0.63913,0.585526,0.582237
3,recall_score,0.755435,1.0,0.934783,0.929348,0.690217,0.983696,0.798913,0.967391,0.961957


## Stacking

In [204]:
estimators = [
    ('rf', RandomForestClassifier(criterion='gini',
 n_estimators=4,
 max_depth=2,
 min_samples_leaf= 15,
 min_samples_split = 10)),
    ('dt', DecisionTreeClassifier(criterion='gini',
 max_depth=3,
 min_samples_split=15,
 min_samples_leaf=10)),
    ('svc', svm.SVC(kernel='rbf',C=5)),
    ('nb', MultinomialNB(alpha=5)),
    ('knn',KNeighborsClassifier(n_neighbors=50,weights='distance',algorithm='auto'))
    ]

clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()) 
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [205]:
results_table['stacking'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid,tree,forest,nb,knn,bagging_svm,bagging_tree,bagging_forest,stacking
0,accuracy_score,0.61875,0.575,0.565625,0.571875,0.59375,0.56875,0.625,0.5875,0.58125,0.5625
1,f1_score,0.695,0.730159,0.712215,0.713987,0.661458,0.724,0.710145,0.729508,0.72541,0.710744
2,precision_score,0.643519,0.575,0.575251,0.579661,0.635,0.572785,0.63913,0.585526,0.582237,0.573333
3,recall_score,0.755435,1.0,0.934783,0.929348,0.690217,0.983696,0.798913,0.967391,0.961957,0.934783


## Boosting дерево решений

In [214]:
tree = DecisionTreeClassifier(criterion='gini',
 max_depth=3,
 min_samples_split=15,
 min_samples_leaf=10)
clf = AdaBoostClassifier(base_estimator=tree,n_estimators=35, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [215]:
results_table['boost_tree'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid,tree,forest,nb,knn,bagging_svm,bagging_tree,bagging_forest,stacking,boost_tree
0,accuracy_score,0.61875,0.575,0.565625,0.571875,0.59375,0.56875,0.625,0.5875,0.58125,0.5625,0.5375
1,f1_score,0.695,0.730159,0.712215,0.713987,0.661458,0.724,0.710145,0.729508,0.72541,0.710744,0.597826
2,precision_score,0.643519,0.575,0.575251,0.579661,0.635,0.572785,0.63913,0.585526,0.582237,0.573333,0.597826
3,recall_score,0.755435,1.0,0.934783,0.929348,0.690217,0.983696,0.798913,0.967391,0.961957,0.934783,0.597826


## Boosting случайный лес

In [238]:
tree = RandomForestClassifier(criterion='entropy',
 n_estimators=30,
 max_depth=3,
 min_samples_leaf= 15,
 min_samples_split = 5) 
clf = AdaBoostClassifier(base_estimator=tree,n_estimators=10, random_state=0)
clf.fit(X_train, y_train)
results = clf.predict(X_test)

In [239]:
results_table['boost_forest'] = [accuracy_score(y_test,results), f1_score(y_test,results), precision_score(y_test,results), recall_score(y_test,results)]
results_table

Unnamed: 0,criterion,svc,svc_grid,tree,forest,nb,knn,bagging_svm,bagging_tree,bagging_forest,stacking,boost_tree,boost_forest
0,accuracy_score,0.61875,0.575,0.565625,0.571875,0.59375,0.56875,0.625,0.5875,0.58125,0.5625,0.5375,0.584375
1,f1_score,0.695,0.730159,0.712215,0.713987,0.661458,0.724,0.710145,0.729508,0.72541,0.710744,0.597826,0.707692
2,precision_score,0.643519,0.575,0.575251,0.579661,0.635,0.572785,0.63913,0.585526,0.582237,0.573333,0.597826,0.594096
3,recall_score,0.755435,1.0,0.934783,0.929348,0.690217,0.983696,0.798913,0.967391,0.961957,0.934783,0.597826,0.875
