In [5]:
from collections import Counter
from sklearn.model_selection import KFold
from sklearn.base import BaseEstimator, ClassifierMixin

import time
import numpy as np
import pandas as pd

In [6]:
class PerformanceEvaluator():
  def __init__(self, datasets, cv=None, groups=None):
    self.cv = cv
    self.datasets = []
    self.groups = groups

    if self.cv == None:
      self.cv = KFold(n_splits=5)

    for name, X, y in datasets:
      self.datasets.append({
        'name' : name,
        'X' : X,
        'y' : y,
      })

  def cross_validation_score(self, clf, dataset):
    times = []
    scores = []
    X, y = dataset['X'], dataset['y']
    for train, test in self.cv.split(X, y, self.groups):
      t0 = time.time()
      clf.fit(X[train], y[train])
      t1 = time.time()
      times.append(t1 - t0)
      scores.append(clf.score(X[test], y[test]))
    return scores, times

  def score(self, clf, dataset):
    score, times = self.cross_validation_score(clf[1], dataset)
    return self.create_score_result(clf, dataset, [score], [times])

  def create_score_result(self, clf, dataset, scores, times):
    return {
      'dataset': dataset['name'],
      'classifier': clf[0],
      'mean accuracy' : np.mean(scores),
      'std accuracy' : np.std(scores),
      'mean time' : "{:.2f}s".format(np.mean(times)),
      'std time' : "{:.2f}s".format(np.std(times)),
    }

  def evaluate(self, clfs):
    results = []
    for clf in clfs:
      for dataset in self.datasets:
        print('Evaluating {} with {}'.format(clf[0], dataset['name']))
        results.append(self.score(clf, dataset))
    
    dataframe = pd.DataFrame(results, columns=['dataset', 'classifier', 'mean accuracy', 'std accuracy', 'mean time', 'std time'])
    dataframe.sort_values(["dataset", "classifier"], axis=0, ascending=True, inplace=True)
    dataframe = dataframe.reset_index(drop=True)
    return dataframe

In [7]:
# Definição da classe Perceptron, baseada na classe BaseEstimator
from sklearn.preprocessing import LabelBinarizer
class Perceptron(BaseEstimator, ClassifierMixin):
    def __init__(self, lr=1, epoch=500):
        self.lr = lr
        self.epoch = epoch
        pass

    def fit(self, X, y=None):
        X = np.array(X)
        bias = np.ones((X.shape[0], 1))
        X = np.hstack((bias, X))
        self.weights = np.random.rand(X.shape[1])

        self.labelbinarizer = LabelBinarizer(neg_label=-1)
        y = self.labelbinarizer.fit_transform(y).reshape(-1)

        for ep in range(self.epoch):
            for idx, _ in enumerate(X):

                xi = X[idx]
                yi = y[idx]
                pred = np.sign(np.dot(xi, self.weights))
                dist = yi - pred
                self.weights = self.weights + self.lr * dist * xi

        return self

    def predict(self, X, y=None):
        X = np.array(X)
        shapelen = len(X.shape)
        X = X if shapelen > 1 else np.array([X])
        bias = np.ones((X.shape[0], 1))
        X = np.hstack((bias, X))

        pred = np.sign(np.dot(X, self.weights))
        return pred if shapelen > 1 else pred[0]

    def score(self, X, y, sample_weight=None):
        y = self.labelbinarizer.fit_transform(y).reshape(-1)
        return super().score(X, y, sample_weight)

In [8]:
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import make_classification

from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import Perceptron as SkPerceptron

data = load_breast_cancer()
breast_cancer_X, breast_cancer_y = data.data, data.target

default_X, default_y = make_classification(random_state=0)

datasets = [
    ('default_classification', default_X, default_y),
    ('breast_cancer_dataset', breast_cancer_X, breast_cancer_y),
]

clfs = [
    ('MyPerceptron', Perceptron()),
    ('Perceptron', SkPerceptron()),
    ('SGDClassifier', SGDClassifier()),
    ('SVM', svm.SVC()),
]

cv = None; groups = None
pe = PerformanceEvaluator(datasets, cv=cv, groups=groups)
pe.evaluate(clfs)

Evaluating MyPerceptron with default_classification
Evaluating MyPerceptron with breast_cancer_dataset
Evaluating Perceptron with default_classification
Evaluating Perceptron with breast_cancer_dataset
Evaluating SGDClassifier with default_classification
Evaluating SGDClassifier with breast_cancer_dataset
Evaluating SVM with default_classification
Evaluating SVM with breast_cancer_dataset


Unnamed: 0,dataset,classifier,mean accuracy,std accuracy,mean time,std time
0,breast_cancer_dataset,MyPerceptron,0.910402,0.036484,7.20s,0.06s
1,breast_cancer_dataset,Perceptron,0.896336,0.020938,0.00s,0.00s
2,breast_cancer_dataset,SGDClassifier,0.922667,0.030091,0.00s,0.00s
3,breast_cancer_dataset,SVM,0.906924,0.071124,0.01s,0.00s
4,default_classification,MyPerceptron,0.77,0.04,1.30s,0.08s
5,default_classification,Perceptron,0.78,0.107703,0.00s,0.00s
6,default_classification,SGDClassifier,0.79,0.10198,0.00s,0.00s
7,default_classification,SVM,0.82,0.05099,0.00s,0.00s
