<a href="https://colab.research.google.com/github/acevedosharp/ensemble-testing-chamber/blob/master/ensemble_classifier_combination_tester.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Measuring the effectiveness of ensembles

- assemble all possible combination of ensembles $k \in \{1,2,3,4,5\}$
- 10-fold cross validation


## Experimental setup

In [1]:
import itertools
import numpy as np
import sklearn
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# classifiers
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import ExtraTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import Perceptron
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

# datasets
from sklearn.datasets import *

from datetime import datetime

In [None]:
ds_names = [
            "Breast Cancer",
            "Digits"
]

datasets = [
            sklearn.datasets.load_breast_cancer(return_X_y=True),
            sklearn.datasets.load_digits(return_X_y=True)
]

In [44]:
ds_names = [
            "Iris",
            "Wine"
]

datasets = [
            sklearn.datasets.load_iris(return_X_y=True),
            sklearn.datasets.load_wine(return_X_y=True)
]



### Execute experiments

In [62]:
classifiers = {
    "Linear SVC": LinearSVC(),
    "Decission Tree": DecisionTreeClassifier(),
    "Extra Tree": ExtraTreeClassifier(),
    "Logistic": LogisticRegression(),
    "Passive Aggressive": PassiveAggressiveClassifier(),
    "Perceptron": Perceptron(),
    "Ridge": RidgeClassifier(),
    "SGD": SGDClassifier(),
    "Multi-layer Perceptron": MLPClassifier(),
    "Linear Discriminant": LinearDiscriminantAnalysis(),
    "Quadratic Discriminant": QuadraticDiscriminantAnalysis(),
    "BernoulliNB": BernoulliNB(),
    "MultinomialNB": MultinomialNB(),
    "Nearest Neighbors": KNeighborsClassifier(),
    "Extra Trees": ExtraTreesClassifier(),
    "Random Forest (10 estimators)": RandomForestClassifier(n_estimators=10),
    "Gradient Boosting": GradientBoostingClassifier()
}

results = []

kf = KFold(n_splits=10)
for ds_idx, ds in enumerate(datasets):
  X, Y = ds[0], ds[1]
  #X = StandardScaler().fit_transform(X)
  fold_index = 0
  for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index] # np.take
    Y_train, Y_test = Y[train_index], Y[test_index] # np.take

    # Train every classifier with the new data
    for classifier_name, classifier in classifiers.items():
      classifier.fit(X_train, Y_train)
    # Assemble ensembles of size k in {1,2,3,4,5}
    for k in range(1,6):
      for combination in list(itertools.combinations(classifiers.keys(), k)):
        ensemble = []
        ensemble_description = ""

        # group classifiers (already exist fitted in dict)
        for idx in range(k):
          ensemble.append(classifiers[combination[idx]])
          ensemble_description += combination[idx]
          ensemble_description += "-"
        ensemble_description = ensemble_description[:-1]

        # save predictions
        predictions = np.zeros((len(X_test), k)) # (# test instances, ensemble size)
        for idx, classifier in enumerate(ensemble):
          predictions[:,idx] = classifier.predict(X_test)
        
        # do hard voting
        hard_voting_predictions = np.zeros((len(X_test), 1)) # (# test instances, 1)
        for idx in range(predictions.shape[0]):
          values, counts = np.unique(predictions[idx], return_counts=True)
          hard_voting_predictions[idx] = values[np.argmax(counts)]
        
        # compare voting predictions against Y_test
        total_instance_number = len(X_test)
        errors = 0
        for idx in range(hard_voting_predictions.shape[0]):
          if (hard_voting_predictions[idx][0] != Y_test[idx]):
            errors += 1
        score = errors/total_instance_number

        # save result (ensemble description, ensemble size, fold index, dataset, score)
        results.append([ensemble_description, k, fold_index, ds_names[ds_idx], score])

    fold_index += 1
        

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

## Analysing results

### Utility functions

In [60]:
def saveResultsAsCsv(filename, headers, rows):
  if len(headers) == len(rows[0]):
    with open(filename,'w') as file:
      file.write(','.join(headers))
      file.write('\n')
      for row in final_results:
        file.write(','.join(list(map(lambda rc: str(rc), row))))
        file.write('\n')
  else:
    raise Exception('length of headers does not match length of single rows.')

def filterFinalResultsByDataset(datasetName):
  return list(filter(lambda res: str(res[2]) == datasetName, final_results))

def loadResultsFromCsv(filename):
  loadedResults = []
  with open(filename) as file:
    loadedResults = list(map(lambda entry: entry.split(','), [x.strip() for x in file.readlines()]))
  file.close()
  print('Headers are:', loadedResults[0])
  del loadedResults[0]
  return loadedResults


### Aggregate data

In [63]:
unique_ensembles = []
final_results = []

for ensemble in list(map(lambda res: res[0], results)):
  if ensemble not in unique_ensembles:
    unique_ensembles.append(ensemble)

for i, ensemble_description in enumerate(unique_ensembles):
  filtered = list(filter(lambda res: str(res[0]) == ensemble_description, results))
  sum = 0
  for entry in filtered:
    sum += float(entry[4])
  mean_error = sum / 10
  final_results.append([filtered[0][0], filtered[0][1], filtered[0][3], mean_error]) # [ensemble_description, ensemble_size, dataset, mean_error]


### Save results

In [66]:
saveResultsAsCsv('finalResultsIris-Wine.csv', ['ensembleDescription', 'k', 'dataset', 'errorRate'], final_results)

### Metrics

In [None]:
# join two results, create a modifier cell to evaluate metrics for a particular dataset

#### Mean error rate for ensembles of size <k>

In [69]:
meanErrorRateSizeK = {}

for k in range(1,6):
  sum = 0
  ensembles_k = list(filter(lambda res: int(res[1]) == k, final_results))
  for res in ensembles_k:
    sum += float(res[3])
  errorRate = sum/len(ensembles_k)
  print(f"Mean error rate in ensembles of size {k}: {errorRate}")
  meanErrorRateSizeK[k] = errorRate

Mean error rate in ensembles of size 1: 0.45599769319492506
Mean error rate in ensembles of size 2: 0.48339484813533246
Mean error rate in ensembles of size 3: 0.2864331987697039
Mean error rate in ensembles of size 4: 0.24967328500027436
Mean error rate in ensembles of size 5: 0.19845510602430919


#### For every classifier, the errorRates of ensembles of different <k>s that contained it

In [18]:
# Mean error rate of ensembles of size <k> that contained <classifier>
for ensemble in unique_ensembles:
  for k in range(1,6):
    sum = 0
    ensembles_k_unique = list(filter(lambda res: (int(res[1]) == k) and (str(res[0]) == ensemble, final_results)))
    for res in ensembles_k:
      sum += float(res[3])
    print(f"Mean error rate in ensembles of size {k} that contained {ensemble}: {sum/len(ensembles_k)}")


TypeError: ignored

#### Distribution of size k of ensembles with an errorRate < \<errorRate\>