<a href="https://colab.research.google.com/github/acevedosharp/ensemble-testing-chamber/blob/master/ensemble_classifier_combination_tester.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- assemble all possible combination of ensembles $k \in \{1,2,3,4,5\}$
- 10-fold cross validation


In [13]:
import itertools
import numpy as np
import sklearn
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# classifiers
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

# datasets
from sklearn.datasets import *

from datetime import datetime

In [None]:
classifiers = {
    "Linear SVC": LinearSVC(),
    "Decission Tree": DecisionTreeClassifier(),
    "Extra Tree": ExtraTreeClassifier(),
    "Logistic": LogisticRegression(),
    "Passive Aggressive": PassiveAggressiveClassifier(),
    "Perceptron": Perceptron(),
    "Ridge": RidgeClassifier(),
    "SGD": SGDClassifier(),
    "Multi-layer Perceptron": MLPClassifier(),
    "Linear Discriminant": LinearDiscriminantAnalysis(),
    "Quadratic Discriminant": QuadraticDiscriminantAnalysis(),
    "BernoulliNB": BernoulliNB(),
    "MultinomialNB": MultinomialNB(),
    "Nearest Neighbors": KNeighborsClassifier(),
    "Extra Trees": ExtraTreesClassifier(),
    "Random Forest (10 estimators)": RandomForestClassifier(n_estimators=10),
    "Gradient Boosting": GradientBoostingClassifier()
}

ds_names = [
            "Digits",
            "Iris",
            "Wine",
            "Breast Cancer"
]

datasets = [
            sklearn.datasets.load_breast_cancer(return_X_y=True),
            # sklearn.datasets.load_iris(return_X_y=True),
            # sklearn.datasets.load_wine(return_X_y=True),
            sklearn.datasets.load_digits(return_X_y=True)
]

results = []

kf = KFold(n_splits=10)
for ds_idx, ds in enumerate(datasets):
  print("==================== CHANGING DATASET at ", datetime.now(), "====================")
  X, Y = ds[0], ds[1]
  #X = StandardScaler().fit_transform(X)
  fold_index = 0
  for train_index, test_index in kf.split(X):
    print("==================== CHANGING FOLD at ", datetime.now(), "====================")
    X_train, X_test = X[train_index], X[test_index] # np.take
    Y_train, Y_test = Y[train_index], Y[test_index] # np.take

    # Train every classifier with the new data
    print("++++++++++ began training classifiers at", datetime.now(), "++++++++++")
    for classifier_name, classifier in classifiers.items():
      classifier.fit(X_train, Y_train)
    print("++++++++++ ended training classifiers at", datetime.now(), "++++++++++")
    # Assemble ensembles of size k in {1,2,3,4,5}
    for k in range(1,6):
      print("==================== CHANGING to k = ", k, " at ", datetime.now(), "====================")
      for combination in list(itertools.combinations(classifiers.keys(), k)):
        ensemble = []
        ensemble_description = ""

        # group classifiers (already exist fitted in dict)
        for idx in range(k):
          ensemble.append(classifiers[combination[idx]])
          ensemble_description += combination[idx]
          ensemble_description += ","
        ensemble_description = ensemble_description[:-1]

        # save predictions
        predictions = np.zeros((len(X_test), k)) # (# test instances, ensemble size)
        for idx, classifier in enumerate(ensemble):
          predictions[:,idx] = classifier.predict(X_test)
        
        # do hard voting
        hard_voting_predictions = np.zeros((len(X_test), 1)) # (# test instances, 1)
        for idx in range(predictions.shape[0]):
          values, counts = np.unique(predictions[idx], return_counts=True)
          hard_voting_predictions[idx] = values[np.argmax(counts)]
        
        # compare voting predictions against Y_test
        total_instance_number = len(X_test)
        errors = 0
        for idx in range(hard_voting_predictions.shape[0]):
          if (hard_voting_predictions[idx][0] != Y_test[idx]):
            errors += 1
        score = errors/total_instance_number

        # save result (ensemble description, ensemble size, fold index, dataset, score)
        results.append([ensemble_description, k, fold_index, ds_names[ds_idx], score])

    fold_index += 1
        

++++++++++ began training classifiers at 2021-02-05 11:13:16.192785 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:13:17.573494 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:14:14.344765 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:14:15.556557 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:15:12.659088 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:15:13.994519 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:16:10.501455 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:16:11.843318 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:17:07.963025 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:17:09.339999 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:18:05.722505 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:18:07.194549 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:19:03.501411 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:19:05.072708 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:20:02.367362 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:20:03.819856 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:21:00.199559 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:21:01.409499 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:21:57.999208 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:21:59.148682 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:22:55.347501 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:23:08.565320 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:27:23.904813 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:27:37.177734 ++++++++++
++++++++++ began training classifiers at 2021-02-05 11:31:53.094901 ++++++++++


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


++++++++++ ended training classifiers at 2021-02-05 11:32:06.357711 ++++++++++


### Persist results in bulk to a database