In [22]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

dataset = pd.read_csv('datasets/bests/PCA_10_CNN_19_256_MAX.csv', encoding='utf-8')

X = MinMaxScaler().fit_transform(dataset[dataset.columns[1:]])
y = dataset.classe

In [23]:
## Carregando o Bagging e os algoritmos base (DT), (MLP), (k-NN) e (NB)
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
from sklearn.metrics import confusion_matrix

# Separando dataset em duas partes: treinamento e teste
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Bagging DT

In [24]:
ns_estimators = [10, 20, 30]
for n_estimator in ns_estimators:
    print("n_estimator: ", n_estimator)
    ## Instanciando Bagging com DecisionTree
    bgclassifier = BaggingClassifier(
        estimator=DecisionTreeClassifier(criterion="entropy", max_depth=9), n_estimators=n_estimator
    )

    bgclassifier.fit(X_train, y_train)
    y_pred = bgclassifier.predict(X_test)

    ## Model Accuracy
    acuracia = metrics.accuracy_score(y_test, y_pred)
    print("Accuracy: %.3f" % acuracia)

n_estimator:  10
Accuracy: 0.871
n_estimator:  20
Accuracy: 0.879
n_estimator:  30
Accuracy: 0.887


In [25]:
ns_estimators = [10, 20, 30]
for n_estimator in ns_estimators:
    print("n_estimator: ", n_estimator)
    ## Instanciando Bagging com DecisionTree
    bgclassifier = BaggingClassifier(
        estimator=DecisionTreeClassifier(criterion="entropy", max_depth=9), n_estimators=n_estimator,
        max_features=0.5
    )

    bgclassifier.fit(X_train, y_train)
    y_pred = bgclassifier.predict(X_test)

    ## Model Accuracy
    acuracia = metrics.accuracy_score(y_test, y_pred)
    print("Accuracy: %.3f" % acuracia)

n_estimator:  10
Accuracy: 0.863
n_estimator:  20
Accuracy: 0.875
n_estimator:  30
Accuracy: 0.875


Impacto não tão positivo com o feature selection. Provavelmente porque o dataset tem pouco atributos (PCA 10 componentes).

## Bagging kNN

In [26]:
ns_estimators = [10, 20, 30]
for n_estimator in ns_estimators:
    print("n_estimator: ", n_estimator)
    ## Instanciando Bagging com KNeighbors
    bgclassifier = BaggingClassifier(
        estimator=KNeighborsClassifier(n_neighbors=7, metric="euclidean"),
        n_estimators=n_estimator,
    )
    bgclassifier.fit(X_train, y_train)
    y_pred = bgclassifier.predict(X_test)

    ## Model Accuracy
    acuracia = metrics.accuracy_score(y_test, y_pred)
    print("Accuracy: %.3f" % acuracia)

n_estimator:  10
Accuracy: 0.896
n_estimator:  20
Accuracy: 0.900
n_estimator:  30
Accuracy: 0.896


In [27]:
ns_estimators = [10, 20, 30]
for n_estimator in ns_estimators:
    print("n_estimator: ", n_estimator)
    ## Instanciando Bagging com KNeighbors
    bgclassifier = BaggingClassifier(
        estimator=KNeighborsClassifier(n_neighbors=7, metric="euclidean"),
        n_estimators=n_estimator,
        max_features=0.5
    )
    bgclassifier.fit(X_train, y_train)
    y_pred = bgclassifier.predict(X_test)

    ## Model Accuracy
    acuracia = metrics.accuracy_score(y_test, y_pred)
    print("Accuracy: %.3f" % acuracia)

n_estimator:  10
Accuracy: 0.854
n_estimator:  20
Accuracy: 0.892
n_estimator:  30
Accuracy: 0.867


Os resultados foram piorados com o uso do feature selection.


## Bagging MLP

In [28]:
ns_estimators = [10, 20, 30]
for n_estimator in ns_estimators:
    print("n_estimator: ", n_estimator)
    ## Instanciando Bagging com MLPClassifier
    bgclassifier = BaggingClassifier(
        estimator=MLPClassifier(
            hidden_layer_sizes=516,
            activation="relu",
            solver="adam",
            max_iter=500,
            learning_rate_init=0.0001,
        ),
        n_estimators=n_estimator,
    )

    bgclassifier.fit(X_train, y_train)
    y_pred = bgclassifier.predict(X_test)

    ## Model Accuracy
    acuracia = metrics.accuracy_score(y_test, y_pred)
    print("Accuracy: %.3f" % acuracia)

n_estimator:  10




Accuracy: 0.879
n_estimator:  20




Accuracy: 0.871
n_estimator:  30




Accuracy: 0.879




In [29]:
ns_estimators = [10, 20, 30]
for n_estimator in ns_estimators:
    print("n_estimator: ", n_estimator)
    ## Instanciando Bagging com MLPClassifier
    bgclassifier = BaggingClassifier(
        estimator=MLPClassifier(
            hidden_layer_sizes=516,
            activation="relu",
            solver="adam",
            max_iter=500,
            learning_rate_init=0.0001,
        ),
        n_estimators=n_estimator,
        max_features=0.5
    )

    bgclassifier.fit(X_train, y_train)
    y_pred = bgclassifier.predict(X_test)

    ## Model Accuracy
    acuracia = metrics.accuracy_score(y_test, y_pred)
    print("Accuracy: %.3f" % acuracia)

n_estimator:  10




Accuracy: 0.863
n_estimator:  20




Accuracy: 0.879
n_estimator:  30




Accuracy: 0.871




O uso de feature selection piorou um pouco a acurácia dos modelos.

# Bagging MultinomialNB

In [30]:
ns_estimators = [10, 20, 30]
for n_estimator in ns_estimators:
    print("n_estimator: ", n_estimator)
    ## Instanciando Bagging com MultinomialNB
    bgclassifier = BaggingClassifier(
        estimator=MultinomialNB(fit_prior=True, alpha=1.0), n_estimators=n_estimator
    )

    bgclassifier.fit(X_train, y_train)
    y_pred = bgclassifier.predict(X_test)

    ## Model Accuracy
    acuracia = metrics.accuracy_score(y_test, y_pred)
    print("Accuracy: %.3f" % acuracia)

n_estimator:  10
Accuracy: 0.829
n_estimator:  20
Accuracy: 0.863
n_estimator:  30
Accuracy: 0.825


In [31]:
ns_estimators = [10, 20, 30]
for n_estimator in ns_estimators:
    print("n_estimator: ", n_estimator)
    ## Instanciando Bagging com MultinomialNB
    bgclassifier = BaggingClassifier(
        estimator=MultinomialNB(fit_prior=True, alpha=1.0),
        n_estimators=n_estimator,
        max_features=0.5,
    )

    bgclassifier.fit(X_train, y_train)
    y_pred = bgclassifier.predict(X_test)

    ## Model Accuracy
    acuracia = metrics.accuracy_score(y_test, y_pred)
    print("Accuracy: %.3f" % acuracia)

n_estimator:  10
Accuracy: 0.825
n_estimator:  20
Accuracy: 0.808
n_estimator:  30
Accuracy: 0.817
