In [6]:
from sklearn.base import BaseEstimator,ClassifierMixin, TransformerMixin, clone, RegressorMixin
from collections import Counter
from sklearn.model_selection import KFold
import warnings
warnings.filterwarnings('ignore')
def VotingFunc(predictions1,classes,weights = {}):
    predictions = np.column_stack(np.array(pr) for pr in predictions1)
    ans = []
    class_to_ind = {classes[i]:i for i in range(len(classes))}
        
    for pred1 in predictions:
        pred = pred1
        for p in range(pred.shape[0]):
            pred[p] = class_to_ind[pred[p]]
        counter = np.bincount(pred.astype("int64"))
        for we in weights.keys():
            if class_to_ind[we]<counter.shape[0]:
                counter[class_to_ind[we]]*=weights[we]
        ans.append(classes[np.argmax(counter)])
    return np.array(ans)

class SimpleVoting(BaseEstimator,ClassifierMixin, TransformerMixin):
    def __init__(self, models):
        self.models = models
    def fit(self, X, y):
        self.models_ = [clone(x) for x in self.models]
        
        for model in self.models_:
            model.fit(X, y)

        return self
    def predict(self, X):
        predictions = np.column_stack([
            model.predict(X) for model in self.models_
        ])
        ans = []
        classes = self.models_[0].classes_
        class_to_ind = {classes[i]:i for i in range(len(classes))}
        for pred1 in predictions:
            pred = pred1
            for p in range(pred.shape[0]):
                pred[p] = class_to_ind[pred[p]]
            counter = np.bincount(pred)
            ans.append(classes[np.argmax(counter)])
        return np.array(ans) 
class SoftVoting(BaseEstimator,ClassifierMixin, TransformerMixin):
    def __init__(self, models):
        self.models = models
    def fit(self, X, y):
        self.models_ = [clone(x) for x in self.models]
        
        for model in self.models_:
            model.fit(X, y)

        return self
    def predict(self, X):
        predictions = self.models_[0].predict_proba(X) 
        for i,model in enumerate(self.models_):
            if i>0:
                predictions+=model.predict_proba(X) 
        predictions = np.argmax(predictions,axis = 1)
        classes = self.models_[0].classes_
        predictions = np.apply_along_axis(lambda x: classes[x], 0, predictions)
        return predictions 
class WeightVoting(BaseEstimator,ClassifierMixin, TransformerMixin):
    def __init__(self, models,weights):
        self.models = models
        self.weights = weights
    def fit(self, X, y):
        self.models_ = [clone(x) for x in self.models]
        
        for model in self.models_:
            model.fit(X, y)

        return self
    def predict(self, X):
        predictions = np.column_stack([
            model.predict(X) for model in self.models_
        ])
        ans = []
        classes = self.models_[0].classes_
        class_to_ind = {classes[i]:i for i in range(len(classes))}
        
        for pred1 in predictions:
            pred = pred1
            for p in range(pred.shape[0]):
                pred[p] = class_to_ind[pred[p]]
            counter = np.bincount(pred)
            for we in self.weights.keys():
                if class_to_ind[we]<counter.shape[0]:
                    counter[class_to_ind[we]]*=self.weights[we]
            ans.append(classes[np.argmax(counter)])
        return np.array(ans) 
class MeanModel(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, models):
        self.models = models
        
    def fit(self, X, y):
        self.models_ = [clone(x) for x in self.models]
        
        for model in self.models_:
            model.fit(X, y)

        return self
    
    def predict(self, X):
        predictions = np.column_stack([
            model.predict(X) for model in self.models_
        ])
        return np.mean(predictions, axis=1) 

class StackingWithMeanRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, base_models, meta_model, n_folds=5):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds
   
    def fit(self, X, y):
        self.trained_base_models = [list() for x in self.base_models]
        #массив в каждой ячейке которого будет по n_folds базовых моделей обученных на n_folds-1 фолдах
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156)
        # мета признаки
        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        for i, base_model in enumerate(self.base_models):
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(base_model)
                instance.fit(X[train_index], y[train_index])
                y_pred = instance.predict(X[holdout_index])
                self.trained_base_models[i].append(instance)
                out_of_fold_predictions[holdout_index, i] = y_pred
                
        # Обучение мета_модели
        self.meta_model_.fit(out_of_fold_predictions, y)
        return self
   
    def predict(self, X):
        # мета признаки
        meta_features = np.zeros((X.shape[0], len(self.trained_base_models)))
        for j,model_type in enumerate(self.trained_base_models):
            model_type_pred = np.zeros((X.shape[0], len(model_type)))
            for i,model in enumerate(model_type):
                model_type_pred[:,i] = model.predict(X)
            meta_features[:,j] = model_type_pred.mean(axis=1)#Усреднение результатов моделей, обученных на разных фолдах
        y_pred = self.meta_model_.predict(meta_features)
        return y_pred
class StackingWithVotingClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
    def __init__(self, base_models, meta_model, n_folds=5):
        self.base_models = base_models
        self.meta_model = meta_model
        self.n_folds = n_folds
   
    def fit(self, X, y):
        self.trained_base_models = [list() for x in self.base_models]
        #массив в каждой ячейке которого будет по n_folds базовых моделей обученных на n_folds-1 фолдах
        self.meta_model_ = clone(self.meta_model)
        kfold = KFold(n_splits=self.n_folds, shuffle=True, random_state=156)
        # мета признаки
        out_of_fold_predictions = np.zeros((X.shape[0], len(self.base_models)))
        for i, base_model in enumerate(self.base_models):
            for train_index, holdout_index in kfold.split(X, y):
                instance = clone(base_model)
                instance.fit(X[train_index], y[train_index])
                y_pred = instance.predict(X[holdout_index])
                self.trained_base_models[i].append(instance)
                out_of_fold_predictions[holdout_index, i] = y_pred
                
        # Обучение мета_модели
        self.meta_model_.fit(out_of_fold_predictions, y)
        return self
   
    def predict(self, X):
        # мета признаки
        meta_features = np.zeros((X.shape[0], len(self.trained_base_models)))
        for j,model_type in enumerate(self.trained_base_models):
            model_type_pred = np.zeros((X.shape[0], len(model_type)))
            for i,model in enumerate(model_type):
                model_type_pred[:,i] = model.predict(X)
            
            meta_features[:,j] = VotingFunc([model_type_pred[:,i] for i in range(len(self.trained_base_models))],classes=self.trained_base_models[0][0].classes_)
            #Голосование результатов моделей, обученных на разных фолдах
        y_pred = self.meta_model_.predict(meta_features)
        return y_pred

In [7]:
from sklearn import datasets
from xgboost import XGBClassifier

iris = datasets.load_iris()
X, y = iris.data[:, 1:3], iris.target
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier
import numpy as np

clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1, max_depth=100000)
clf3 = GaussianNB()
clf4 = XGBClassifier()

print('5-fold cross validation:\n')

eclf = SimpleVoting([clf1,clf2,clf3])
eclf1 = SoftVoting([clf1,clf2,clf3])
eclf2 = WeightVoting([clf1,clf2,clf3],weights={2:1})
eclf3 = StackingWithVotingClassifier(base_models=[clf1,clf2,clf3],meta_model=clf4)

labels = ['Logistic Regression', 'Random Forest', 'Naive Bayes', 'HardVoting','SoftVoting',"WeightVoting","Stacking"]
for clf, label in zip([clf1, clf2, clf3, eclf,eclf1,eclf2,eclf3], labels):

    scores = model_selection.cross_val_score(clf, X, y, 
                                              cv=5, 
                                              scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" 
          % (scores.mean(), scores.std(), label))

5-fold cross validation:

Accuracy: 0.90 (+/- 0.05) [Logistic Regression]
Accuracy: 0.93 (+/- 0.05) [Random Forest]
Accuracy: 0.91 (+/- 0.04) [Naive Bayes]
Accuracy: 0.95 (+/- 0.05) [HardVoting]
Accuracy: 0.95 (+/- 0.04) [SoftVoting]
Accuracy: 0.95 (+/- 0.05) [WeightVoting]
Accuracy: 0.95 (+/- 0.03) [Stacking]


In [8]:
from sklearn import datasets

iris = datasets.load_iris()
X, y = iris.data[:, 1:3], iris.target
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.naive_bayes import GaussianNB 
from sklearn.ensemble import RandomForestClassifier
import numpy as np

clf1 = LogisticRegression(random_state=1)
clf2 = SGDClassifier(loss = "log",random_state=2)
clf3 = GaussianNB()

print('5-fold cross validation:\n')

eclf = SimpleVoting([clf1,clf2,clf3])
eclf1 = SoftVoting([clf1,clf2,clf3])
eclf2 = WeightVoting([clf1,clf2,clf3],weights={2:1})

labels = ['Logistic Regression', 'SGD', 'Naive Bayes', 'HardVoting','SoftVoting',"WeightVoting"]
for clf, label in zip([clf1, clf2, clf3, eclf,eclf1,eclf2], labels):

    scores = model_selection.cross_val_score(clf, X, y, 
                                              cv=5, 
                                              scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" 
          % (scores.mean(), scores.std(), label))

5-fold cross validation:

Accuracy: 0.90 (+/- 0.05) [Logistic Regression]
Accuracy: 0.67 (+/- 0.18) [SGD]
Accuracy: 0.91 (+/- 0.04) [Naive Bayes]
Accuracy: 0.93 (+/- 0.03) [HardVoting]
Accuracy: 0.90 (+/- 0.04) [SoftVoting]
Accuracy: 0.93 (+/- 0.03) [WeightVoting]
