In [1]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.tree import ExtraTreeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import VotingClassifier
import matplotlib.pyplot as plt
import pandas as pd
import time
import numpy as np
from itertools import product, combinations
import random
from sklearn.model_selection import KFold

In [3]:
from ensembles import DiversityEnsembleClassifier as DEC
from brute_force_random_search import BruteForceEnsembleClassifier as BFEC

In [4]:
iris   = datasets.load_iris()
breast = datasets.load_breast_cancer()
wine   = datasets.load_wine()

In [5]:
def estimators(estimator_grid):
    for estimator, param_grid in list(estimator_grid.items()): 
        items = sorted(param_grid.items())
        if not items:
            yield (estimator, {})
        else:
            keys, values = zip(*items)
            for v in product(*values):
                params = dict(zip(keys, v))
                yield (estimator, params)

In [6]:
def define_all_possible_ensembles(data, n_estimators=10):
    n_samples = int(((data.shape[0]*0.8 * 4) // 5)-4)
    alg = {
                KNeighborsClassifier(): {'n_neighbors':[1, 20, 30, n_samples]},
                #RidgeClassifier(): {'alpha':[1.0, 10.0],'max_iter':[10, 100]},
                SVC(): {'C':[1, 1000],'gamma':[0.0001, 0.001]},
                DecisionTreeClassifier(): {'min_samples_leaf':[1, n_samples], 'max_depth':[1, n_samples]},
                ExtraTreeClassifier(): {'min_samples_leaf':[1, n_samples], 'max_depth':[1, n_samples]},
                GaussianNB(): {},
                LinearDiscriminantAnalysis(): {},
                QuadraticDiscriminantAnalysis(): {},
                BernoulliNB(): {},
                LogisticRegression(): {'C':[1, 1000], 'max_iter':[100, 1000]},
                #NearestCentroid(): {},
                PassiveAggressiveClassifier(): {'C':[1, 1000], 'max_iter':[100, 1000]},
                SGDClassifier(): {'alpha':[1e-5, 1e-2], 'max_iter':[100, 1000]}
    }
    all_ensembles = []
    for i, classifiers in enumerate(combinations(estimators(alg),n_estimators)):
        all_ensembles.append([i,classifiers])
    return all_ensembles

In [7]:
def compare_results(data, target, n_estimators, csv_file, all_possible_ensembles):
    accuracy, f1, precision, recall, auc = 0, 0, 0, 0, 0  
    
    print('*'*60)
    print('Brute Force Ensemble Classifier')
    print('*'*60)
    aux = int(round(time.time() * 1000))
    ensemble_classifier = BFEC(stop_time=100, n_estimators=int(n_estimators), random_state=42)
    
    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2)
    search_results = ensemble_classifier.fit(X_train, y_train, all_possible_ensembles)
    
    #saving results as pandas dataframe and csv
    search_results_pd = pd.DataFrame.from_dict(search_results, orient='index')
    search_results_pd.to_csv (csv_file, index = None, header=True)

    ensemble = search_results_pd[-1:]["ensemble"].item()
    best_fitness_classifiers = search_results_pd[-1:]["best_fitness_classifiers"].item()
    ensemble_classifier.fit_ensemble(X_train, y_train, ensemble[1], best_fitness_classifiers)
    y_pred = ensemble_classifier.predict(X_test)
    accuracy += accuracy_score(y_test, y_pred)
    try: f1 += f1_score(y_test, y_pred)
    except: pass
    try: precision += precision_score(y_test, y_pred)
    except: pass
    try: recall += recall_score(y_test, y_pred)
    except: pass
    try: auc += roc_auc_score(y_test, y_pred)
    except: pass
    print('Accuracy :', accuracy)
    if f1>0: print('F1-score :', f1)
    if precision>0: print('Precision:', precision)
    if recall>0: print('Recall   :', recall)
    if auc>0: print('ROC AUC  :', auc)
    print('BFEC done in: ',(int(round(time.time() * 1000)) - aux)/10, 'ms')

In [11]:
iris_possible_ensembles = define_all_possible_ensembles(data=iris.data, n_estimators=35)
len(iris_possible_ensembles)
compare_results(data=iris.data, 
                target=iris.target, 
                n_estimators=35, 
                csv_file='iris_random_results.csv', 
                all_possible_ensembles=iris_possible_ensembles)

************************************************************
Brute Force Ensemble Classifier
************************************************************
Accuracy : 0.8666666666666667
BFEC done in:  4171.2 ms


In [9]:
wine_possible_ensembles = define_all_possible_ensembles(data=wine.data, n_estimators=35)
compare_results(data=wine.data, 
                target=wine.target, 
                n_estimators=35, 
                csv_file='wine_random_results.csv', 
                all_possible_ensembles=wine_possible_ensembles)

************************************************************
Brute Force Ensemble Classifier
************************************************************
Accuracy : 0.9722222222222222
BFEC done in:  6663.7 ms


In [10]:
breast_possible_ensembles = define_all_possible_ensembles(data=breast.data, n_estimators=35)
compare_results(data=breast.data, 
                target=breast.target, 
                n_estimators=35, 
                csv_file='breast_random_results.csv', 
                all_possible_ensembles=breast_possible_ensembles)

************************************************************
Brute Force Ensemble Classifier
************************************************************
Accuracy : 0.9824561403508771
F1-score : 0.9864864864864865
Precision: 0.9864864864864865
Recall   : 0.9864864864864865
ROC AUC  : 0.9807432432432431
BFEC done in:  11511.1 ms
