In [1]:
from collections import Counter
from sklearn.model_selection import KFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator, ClassifierMixin

import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
class PerformanceEvaluator():
  def __init__(self, datasets, cv=None, groups=None):
    self.cv = cv
    self.datasets = []
    self.groups = groups

    if self.cv == None:
      self.cv = KFold(n_splits=5)

    for name, X, y in datasets:
      self.datasets.append({
        'name' : name,
        'X' : X,
        'y' : y,
      })

  def cross_validation_score(self, clf, dataset):
    times = []
    scores = []
    X, y = dataset['X'], dataset['y']
    for train, test in self.cv.split(X, y, self.groups):
      t0 = time.time()
      clf.fit(X[train], y[train])
      t1 = time.time()
      times.append(t1 - t0)
      scores.append(clf.score(X[test], y[test]))
    return scores, times

  def score(self, clf, dataset):
    score, times = self.cross_validation_score(clf[1], dataset)
    return self.create_score_result(clf, dataset, [score], [times])

  def create_score_result(self, clf, dataset, scores, times):
    return {
      'dataset': dataset['name'],
      'classifier': clf[0],
      'mean accuracy' : np.mean(scores),
      'std accuracy' : np.std(scores),
      'mean time' : "{:.2f}s".format(np.mean(times)),
      'std time' : "{:.2f}s".format(np.std(times)),
    }

  def evaluate(self, clfs):
    results = []
    for clf in clfs:
      for dataset in self.datasets:
        print('Evaluating {} with {}'.format(clf[0], dataset['name']))
        results.append(self.score(clf, dataset))
    
    dataframe = pd.DataFrame(results, columns=['dataset', 'classifier', 'mean accuracy', 'std accuracy', 'mean time', 'std time'])
    dataframe.sort_values(["dataset", "classifier"], axis=0, ascending=True, inplace=True)
    dataframe = dataframe.reset_index(drop=True)
    return dataframe

In [3]:
# Definição da classe ELM, baseada na classe BaseEstimator
class ELM(BaseEstimator, ClassifierMixin):
    def __init__(self, hidden_layer_size):
        self.hidden_layer_size = hidden_layer_size      
        
        self.H = 0
        self.beta = 0
        self.onehotencoder = OneHotEncoder(categories='auto')
        self.bias = np.matrix(np.random.uniform(0, 1, (1, self.hidden_layer_size)))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-1 * x))
        
    def fit(self, X, y=None):

        X = np.array(X)
        y = np.array(y)        
        self.input_size = X.shape[1]
        self.output_size = np.unique(y).shape[0]
        y = self.onehotencoder.fit_transform(y).toarray()
        self.W = np.random.uniform(-0.5, 0.5, (self.input_size, self.hidden_layer_size))

        self.H = X.dot(self.W) + self.bias
        self.H = self.sigmoid(self.H)

        pinv = np.linalg.pinv(self.H)
        self.beta = pinv.dot(y)

        return self

    def predict(self, X):
        X = np.array(X)
        h = self.sigmoid(X.dot(self.W) + self.bias)
        return np.argmax(np.array(h.dot(self.beta)), axis=1).reshape(-1,1)

In [4]:
from sklearn.datasets import make_classification
from sklearn.datasets import make_gaussian_quantiles

from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline

base1_X, base1_y = make_classification(random_state=0, n_samples=1000, n_features=5)
base2_X, base2_y = make_classification(random_state=0, n_samples=1000, n_features=1100)
base3_X, base3_y = make_gaussian_quantiles(random_state=0, n_samples=1000, n_features=5)
base4_X, base4_y = make_gaussian_quantiles(random_state=0, n_samples=1000, n_features=1100)

base1_y = base1_y.reshape(-1,1)
base2_y = base2_y.reshape(-1,1)
base3_y = base3_y.reshape(-1,1)
base4_y = base4_y.reshape(-1,1)

datasets = [
    ('base1', base1_X, base1_y),
    ('base2', base2_X, base2_y),
    ('base3', base3_X, base3_y),
    ('base4', base4_X, base4_y),
]

classifiers = [
    ('ELM', ELM(10)),
    ('Perceptron', Perceptron()),
    ('MLPClassifier', MLPClassifier()),
]

cv = None; groups = None
pe = PerformanceEvaluator(datasets, cv=cv, groups=groups)
pe.evaluate(classifiers)

Evaluating ELM with base1
Evaluating ELM with base2
Evaluating ELM with base3
Evaluating ELM with base4
Evaluating Perceptron with base1
Evaluating Perceptron with base2
Evaluating Perceptron with base3
Evaluating Perceptron with base4
Evaluating MLPClassifier with base1
Evaluating MLPClassifier with base2
Evaluating MLPClassifier with base3
Evaluating MLPClassifier with base4


Unnamed: 0,dataset,classifier,mean accuracy,std accuracy,mean time,std time
0,base1,ELM,0.955,0.017889,0.01s,0.00s
1,base1,MLPClassifier,0.958,0.011662,6.04s,1.64s
2,base1,Perceptron,0.854,0.084935,0.01s,0.00s
3,base2,ELM,0.54,0.045497,0.02s,0.00s
4,base2,MLPClassifier,0.602,0.031401,8.21s,0.87s
5,base2,Perceptron,0.806,0.017436,0.12s,0.02s
6,base3,ELM,0.504,0.030232,0.01s,0.00s
7,base3,MLPClassifier,0.901,0.01772,2.06s,0.31s
8,base3,Perceptron,0.272,0.046217,0.02s,0.00s
9,base4,ELM,0.337,0.045891,0.03s,0.01s
