In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
import pandas as pd

In [2]:
from DiversityEnsembleClassifier import DiversityEnsembleClassifier

### Loading datasets

In [3]:
breast = datasets.load_breast_cancer()
iris   = datasets.load_iris()
wine   = datasets.load_wine()
yeast  = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/yeast/yeast.data', header=None, delim_whitespace=True)
spam   = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data', header=None)

<h3>Testing method</h3>

In [None]:
def compare_results(data, target):
    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

    n_samples = ((X_train.shape[0] * 4) // 5)-1
    alg = {
                KNeighborsClassifier: {'n_neighbors':[1, n_samples]},
                SVC: {'C':[1, 1000],
                      'gamma':[0.001, 0.0001]
                      },
                DecisionTreeClassifier: {'min_samples_leaf':[1, n_samples], 'max_depth':[1, n_samples]},
                RandomForestClassifier: {'min_samples_leaf':[1, n_samples], 'max_depth':[1, n_samples],
                                         'n_estimators':[100, 100]},
                GaussianNB: {},
                LinearDiscriminantAnalysis: {}
          }
    dec = DiversityEnsembleClassifier(alg, population_size=25, max_epochs=100, random_state=42, njobs=-1)
    dec.fit(X_train,  y_train)
    dec_predict = dec.predict(X_test)

    rf = RandomForestClassifier(n_estimators=1000)
    rf.fit(X_train, y_train)
    rf_predict = rf.predict(X_test)

    ada = AdaBoostClassifier(n_estimators=1000)
    ada.fit(X_train, y_train)
    ada_predict = ada.predict(X_test)
    print()
    print('#'*60)
    print('Results')
    print('#'*60)
    print()
    
    print('-'*60)
    print('Diversity-based Ensemble Classifier')
    print('-'*60)
    print('Accuracy :', accuracy_score(y_test, dec_predict))
    try: print('F1-score :', f1_score(y_test, dec_predict))
    except: pass
    try: print('Precision:', precision_score(y_test, dec_predict))
    except: pass
    try: print('Recall   :', recall_score(y_test, dec_predict))
    except: pass
    try: print('ROC AUC  :', roc_auc_score(y_test, dec_predict))
    except: pass

    print('-'*60)
    print('Random Forest Classifier')
    print('-'*60)
    print('Accuracy :', accuracy_score(y_test, rf_predict))
    try: print('F1-score :', f1_score(y_test, rf_predict))
    except: pass
    try: print('Precision:', precision_score(y_test, rf_predict))
    except: pass
    try: print('Recall   :', recall_score(y_test, rf_predict))
    except: pass
    try: print('ROC AUC  :', roc_auc_score(y_test, rf_predict))
    except: pass

    print('-'*60)
    print('Ada Boost Classifer')
    print('-'*60)
    print('Accuracy :', accuracy_score(y_test, ada_predict))
    try: print('F1-score :', f1_score(y_test, ada_predict))
    except: pass
    try: print('Precision:', precision_score(y_test, ada_predict))
    except: pass
    try: print('Recall   :', recall_score(y_test, ada_predict))
    except: pass
    try: print('ROC AUC  :', roc_auc_score(y_test, ada_predict))
    except: pass


In [None]:
compare_results(data=breast.data, target=breast.target)

Starting genetic algorithm...
------------------------------------------------------------
Epoch 0
------------------------------------------------------------
Generating offspring...done in 15 ms
Fitting and predicting population...

In [None]:
compare_results(data=iris.data, target=iris.target)

In [None]:
compare_results(data=wine.data, target=wine.target)

In [None]:
yeast.iloc[:, -1].values

In [None]:
compare_results(data=yeast.iloc[:, 1:-1].values, target=yeast.iloc[:, -1].values)

In [None]:
compare_results(data=spam.iloc[:, :-1].values, target=spam.iloc[:, -1].values)