<a href="https://colab.research.google.com/github/acevedosharp/ensemble-testing-chamber/blob/master/ensemble_classifier_combination_tester.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- assemble all possible combination of ensembles $k \in \{1,2,3,4,5\}$
- 10-fold cross validation


In [84]:
import itertools
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
classifiers = {
    "Linear SVC":sklearn.svm.LinearSVC(),
    "Decission Tree": sklearn.tree.DecisionTreeClassifier(),
    "Extra Tree": sklearn.tree.ExtraTreeClassifier(),
    "Logistic": sklearn.linear_model.LogisticRegression(),
    "Passive Aggressive": sklearn.linear_model.PassiveAggressiveClassifier(),
    "Perceptron": sklearn.linear_model.Perceptron(),
    "Ridge": sklearn.linear_model.RidgeClassifier(),
    "SGD": sklearn.linear_model.SGDClassifier(),
    "Multi-layer Perceptron": sklearn.neural_network.MLPClassifier(),
    "Linear Discriminant": sklearn.discriminant_analysis.LinearDiscriminantAnalysis(),
    "Quadratic Discriminant": sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis(),
    "BernoulliNB": sklearn.naive_bayes.BernoulliNB(),
    "MultinomialNB": sklearn.naive_bayes.MultinomialNB(),
    "Nearest Neighbors": sklearn.neighbors.KNeighborsClassifier(),
    "Extra Trees": sklearn.ensemble.ExtraTreesClassifier(),
    "Random Forest (10 estimators)": sklearn.ensemble.RandomForestClassifier(n_estimators=10),
    "Gradient Boosting": sklearn.ensemble.GradientBoostingClassifier()
}

datasets = [
            sklearn.datasets.load_digits(return_X_y=True),
            sklearn.datasets.load_iris(return_X_y=True),
            sklearn.datasets.load_wine(return_X_y=True),
            sklearn.datasets.load_breast_cancer(return_X_y=True)
]

kf = KFold(n_splits=10)
for ds_idx, ds in enumerate(datasets):
  X, Y = ds[0], ds[1]
  X = StandardScaler().fit_transform(X)
  for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index] # np.take
    Y_train, Y_test = Y[train_index], Y[test_index] # np.take

    # Train every classifier with the new data
    for classifier_name, classifier in classifiers.items():
      classifier.fit(X_train, Y_train)
      
    # Assemble ensembles of size k in {1,2,3,4,5}
    for k in range(1,5):
      for combination in list(itertools.combinations(classifiers.keys(), k)):
        ensemble = []

        # group classifiers (already exist fitted in dict)
        for idx in range(k)
          ensemble.append(classifiers[combination[idx]])

        # save predictions
        predictions = np.zeros((len(X_test), k)) # (# test instances, ensemble size)
        for idx, classifier in enumerate(ensemble):
          predictions[:,idx] = classifier.predict(X_test)
        
        # do hard voting
        hard_voting_predictions = np.zeros((len(X_test), 1)) # (# test instances, 1)
        for idx in range(predictions.shape[0]):
          values, counts = np.unique(predictions[idx], return_counts=True)
          hard_voting_predictions[idx] = values[np.argmax(counts)]
        
        # compare voting predictions against Y_test
        total_instance_number = len(X_test)
        errors = 0
        for idx in range(hard_voting_predictions.shape[0]):
          if (hard_voting_predictions[idx][0] != Y_test[idx]):
            errors += 1
       
          