A ideia por trás do VotingClassifier é combinar classificadores de machine learn conceitualmente diferentes e usar um voto para prever os rótulos das classes. 
Tal classificador pode ser útil para um conjunto de modelos de desempenho igualmente bom, a fim de equilibrar suas fraquezas individuais.

[Documentação](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.VotingClassifier.html#sklearn.ensemble.VotingClassifier)

In [21]:
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import StratifiedShuffleSplit, train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import preprocessing
from sklearn.metrics import classification_report
import pandas as pd
import os
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Loading some example data
bases_prontas_path = os.path.join("D:\\","FIA","TCC","BASES","")
df = pd.read_csv(bases_prontas_path+'mask_dataset_vgg16_preprocess_input_224_224_3_feature_extracted.csv')
X, y = df.drop(['im_path', 'class'], axis=1), df['class'].values

label_transformer = preprocessing.LabelEncoder()
label_transformer.fit(y)
y = label_transformer.transform(y)

cross_validation = StratifiedShuffleSplit(n_splits=7, test_size=0.3, random_state=42)

In [3]:
# Training classifiers
#clf1 = SGDClassifier(alpha=0.31622776601683794, max_iter=100, random_state=42,tol=0.0001, loss="hinge", penalty = "l2")
#SGD falha no voting, para não gastar mais tempo decidi abortar o SGD
clf2 = KNeighborsClassifier(n_neighbors=6, weights="distance", algorithm= "brute", p= 2, metric = "braycurtis")
clf3 = KNeighborsClassifier(n_neighbors=3, weights="distance", algorithm= "ball_tree", p= 1, metric = "braycurtis")
clf4 = LogisticRegression(random_state=0, max_iter = 10000, solver = "liblinear", penalty= "l1", C= 1, class_weight = "balanced")
clf5 = RandomForestClassifier(max_depth=1000, random_state=42, criterion= "gini", class_weight = "balanced",n_estimators=1000)
clf6 = MLPClassifier(solver='lbfgs', random_state=42, max_iter=300000, learning_rate="invscaling", learning_rate_init=0.5, tol=0.01, hidden_layer_sizes=(50, 3),alpha=0.015038869469554102)
       
estimadores = [#('sgd', clf1), 
               ('knn6', clf2), 
               ('knn3', clf3), 
               ('regLog', clf4),
               ('RdnForest', clf5),
               ('MLP', clf6)]


#eclf = VotingClassifier(estimators=estimadores, weights=[1, 2, 2, 1, 1, 2])
eclf = VotingClassifier(estimators=estimadores, weights=[2, 2, 1, 1, 2])

In [4]:
params = {'voting': ["hard", "soft"]}
grid = GridSearchCV(estimator=eclf, param_grid=params, cv=cross_validation)
grid_historico = grid.fit(X, y)

In [6]:
print(f"O melhor modelo foi:\n{grid.best_estimator_}\nscore: {grid.best_score_}\nParametros:\n{grid.best_params_}")

O melhor modelo foi:
VotingClassifier(estimators=[('knn6',
                              KNeighborsClassifier(algorithm='brute',
                                                   metric='braycurtis',
                                                   n_neighbors=6,
                                                   weights='distance')),
                             ('knn3',
                              KNeighborsClassifier(algorithm='ball_tree',
                                                   metric='braycurtis',
                                                   n_neighbors=3, p=1,
                                                   weights='distance')),
                             ('regLog',
                              LogisticRegression(C=1, class_weight='balanced',
                                                 max_iter=10000, penalty='l1',
                                                 random_state=0,
                                                 solver='liblinear'))

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [26]:
eclf2 = VotingClassifier(estimators=estimadores, voting = 'soft', weights=[2, 2, 1, 1, 2])

In [27]:
eclf2_H = eclf2.fit(X_train,y_train)

In [28]:
### print("[INFO] evaluating network...")
lb = ["without_mask","mask_weared_incorrect","with_mask"]
def Avaliando_modelo(model, NWHead = None,x_test=X_test, y_test=y_test):
    predIdxs = model.predict(x_test)

    # for each image in the testing set we need to find the index of the
    # label with corresponding largest predicted probability
    #predIdxs = np.argmax(predIdxs, axis=1)

    # show a nicely formatted classification report
    # print(classification_report(y_test.argmax(axis=1), predIdxs,
    #                             target_names=lb))
    print(classification_report(y_test, predIdxs,
                                target_names=lb))

    # serialize the model to disk
    print("[INFO] saving mask detector model...")

    if NWHead:
        # plot the training loss and accuracy
        N = 100
        plt.style.use("ggplot")
        plt.figure()
        plt.plot(np.arange(0, N), NWHead.history["loss"], label="train_loss")
        plt.plot(np.arange(0, N), NWHead.history["val_loss"], label="val_loss")
        plt.plot(np.arange(0, N), NWHead.history["accuracy"], label="train_acc")
        plt.plot(np.arange(0, N), NWHead.history["val_accuracy"], label="val_acc")
        plt.title("Training Loss and Accuracy")
        plt.xlabel("Epoch #")
        plt.ylabel("Loss/Accuracy")
        plt.legend(loc="lower left")
        plt.show()
    #return classification_report(y_test.argmax(axis=1), predIdxs,
     #                           target_names=lb,output_dict=True)

In [29]:
Avaliando_modelo(eclf2, eclf2_H)

                       precision    recall  f1-score   support

         without_mask       0.94      0.90      0.92      2670
mask_weared_incorrect       0.81      0.89      0.85      1907
            with_mask       0.93      0.87      0.90      1947

             accuracy                           0.89      6524
            macro avg       0.89      0.89      0.89      6524
         weighted avg       0.90      0.89      0.89      6524

[INFO] saving mask detector model...


AttributeError: 'VotingClassifier' object has no attribute 'history'

<Figure size 432x288 with 0 Axes>