In [1]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.tree import ExtraTreeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import VotingClassifier
import matplotlib.pyplot as plt
import pandas as pd
import time
import numpy as np
from itertools import product, combinations
import random
from sklearn.model_selection import KFold
import csv

In [3]:
from ensembles import DiversityEnsembleClassifier as DEC
from brute_force_search import BruteForceEnsembleClassifier as BFEC

In [4]:
iris   = datasets.load_iris()
breast = datasets.load_breast_cancer()
wine   = datasets.load_wine()

In [5]:
def compare_results(data, target, n_estimators, csv_file):
    accuracy, f1, precision, recall, auc = 0, 0, 0, 0, 0
    n_samples = int(((data.shape[0]*0.8 * 4) // 5)-4)
    alg = {
                KNeighborsClassifier(): {'n_neighbors':[1, 20, 30, n_samples]},
                RidgeClassifier(): {'alpha':[1.0, 10.0],'max_iter':[10, 100]},
                SVC(): {'C':[1, 1000],'gamma':[0.0001, 0.001]},
                DecisionTreeClassifier(): {'min_samples_leaf':[1, n_samples], 'max_depth':[1, n_samples]},
                ExtraTreeClassifier(): {'min_samples_leaf':[1, n_samples], 'max_depth':[1, n_samples]},
                GaussianNB(): {},
                LinearDiscriminantAnalysis(): {},
                QuadraticDiscriminantAnalysis(): {},
                BernoulliNB(): {},
                LogisticRegression(): {'C':[1, 1000], 'max_iter':[100, 1000]},
                NearestCentroid(): {},
                PassiveAggressiveClassifier(): {'C':[1, 1000], 'max_iter':[100, 1000]},
                SGDClassifier(): {'alpha':[1e-5, 1e-2], 'max_iter':[100, 1000]}
    }     
    
    print('*'*60)
    print('Brute Force Ensemble Classifier')
    print('*'*60)
    aux = int(round(time.time() * 1000))
    ensemble_classifier = BFEC(algorithms=alg, stop_time=100, n_estimators=int(n_estimators), random_state=42)
    
    total_size = 0
    for i in ensemble_classifier.estimators_pool(alg):
        total_size = total_size+1
    if n_estimators < total_size:
        X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)
        search_results = ensemble_classifier.fit(X_train, y_train)
        
        #saving results as pandas dataframe and csv
        search_results_pd = pd.DataFrame.from_dict(search_results, orient='index')
        search_results_pd.to_csv (csv_file, index = None, header=True)

        ensemble = search_results_pd[-1:]["ensemble"].item()
        best_fitness_classifiers = search_results_pd[-1:]["best_fitness_classifiers"].item()

        ensemble_classifier.fit_ensemble(X_train, y_train, ensemble, best_fitness_classifiers)
        y_pred = ensemble_classifier.predict(X_test)
        accuracy += accuracy_score(y_test, y_pred)
        try: f1 += f1_score(y_test, y_pred)
        except: pass
        try: precision += precision_score(y_test, y_pred)
        except: pass
        try: recall += recall_score(y_test, y_pred)
        except: pass
        try: auc += roc_auc_score(y_test, y_pred)
        except: pass
        print('Accuracy :', accuracy)
        if f1>0: print('F1-score :', f1)
        if precision>0: print('Precision:', precision)
        if recall>0: print('Recall   :', recall)
        if auc>0: print('ROC AUC  :', auc)
        print('BFEC done in: ',(int(round(time.time() * 1000)) - aux)/10, 'ms')
    else:
        print('O n_estimators precisa ser menor que o valor total de elementos a serem combinados')

In [6]:
compare_results(data=iris.data, target=iris.target, n_estimators=20, csv_file='iris_results.csv')

************************************************************
Brute Force Ensemble Classifier
************************************************************
Accuracy : 0.9333333333333333
BFEC done in:  1124.2 ms


In [7]:
compare_results(data=wine.data, target=wine.target, n_estimators=20, csv_file='wine_results.csv')

************************************************************
Brute Force Ensemble Classifier
************************************************************
Accuracy : 0.8055555555555556
BFEC done in:  1432.3 ms


In [8]:
compare_results(data=breast.data, target=breast.target, n_estimators=20, csv_file='breast_results.csv')

************************************************************
Brute Force Ensemble Classifier
************************************************************
Accuracy : 0.956140350877193
F1-score : 0.9655172413793103
Precision: 0.9459459459459459
Recall   : 0.9859154929577465
ROC AUC  : 0.9464461185718965
BFEC done in:  3742.7 ms
