In [1]:
import numpy as np

In [None]:
# majority vote ensemble
import operator

from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.externals import six
from sklearn.base import clone
from sklearn.pipeline import _name_estimators

class MajorityVoteClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, classifiers, vote='classlabel', weights=None):
        self.classifiers = classifiers
        self.named_classifiers = { key: value for key, value in _name_estimators(classifiers) }
        self.vote = vote
        self.weights = weights # if none given, will calculate by equal weights
                                  

    def fit(self, X, y):
        self.lablenc_ = LabelEncoder()
        self.lablenc_.fit(y)
        self.classes_ = self.lablenc_.classes_
        self.classifiers_ = []
        for clf in self.classifiers:
            fitted_clf = clone(clf).fit(X, self.lablenc_.transform(y))
            self.classifiers_.append(fitted_clf)
        return self
    
    def predict(self, X):
        if self.vote == 'probability':
            maj_vote = np.argmax(self.predict_proba(X), axis=1) # use predict_proba method; I feel should move argmax into the method
        else:  # 'classlabel' vote
            maj_vote = self.predict_label(X)
        return maj_vote

    def predict_proba(self, X):
        probas = np.asarray([clf.predict_proba(X) for clf in self.classifiers_])
        avg_proba = np.average(probas, axis=0, weights=self.weights)
        return avg_proba
        
    def predict_label(self, X):
        #  Collect results from clf.predict calls
        predictions = np.asarray([clf.predict(X) for clf in self.classifiers_]).T
        maj_vote = np.apply_along_axis(lambda x: np.argmax(np.bincount(x, weights=self.weights)), axis=1, arr=predictions)
        maj_vote = self.lablenc_.inverse_transform(maj_vote)
        return maj_vote
        
    def get_params(self, deep=True):
        if not deep:
            return super(MajorityVoteClassifier,self).get_params(deep=False)
        else:
            out = self.named_classifiers.copy()
            for name, step in six.iteritems(self.named_classifiers):
                for key, value in six.iteritems(step.get_params(deep=True)):
                    out['%s__%s' % (name, key)] = value
            return out

In [28]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

iris = datasets.load_iris()
X, y = iris.data[50:, [1, 2]], iris.target[50:] #  sepal width and petal length of either class Iris-Versicolor or Iris-Virginica
le = LabelEncoder()
y = le.fit_transform(y) 

# LabelEncoder transforms lables into 0 to n-1 classes
print('classes: {}'.format(le.classes_))
print('transform [1,2]: {}'.format(le.transform([1,2])))
print('inverse transform [0,1]: {}'.format(le.inverse_transform([0,1])))

classes: [1 2]
transform [1,2]: [0 1]
inverse transform [0,1]: [1 2]


  if diff:


In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=1)