In [1]:
from keras.datasets import cifar10
import random
from keras.utils import to_categorical
from keras.utils.data_utils import get_file
import numpy as np

Using TensorFlow backend.


In [2]:
data_path = get_file('cifar10_samples.npz',
            'https://github.com/jcborges/DeepStack/releases/download/Cifar10/cifar10_samples.npz')
data = np.load(data_path)
x_train, y_train, x_val, y_val = [item[1] for item in data.items()]

In [3]:
import keras
from keras.utils.data_utils import get_file

#Source Code: https://github.com/jcborges/DeepStack/releases/download/Cifar10/cifar10SimpleCNN.h5
model_path1 = get_file('cifar10SimpleCNN.h5',
            'https://github.com/jcborges/DeepStack/releases/download/Cifar10/cifar10SimpleCNN.h5')
model1 = keras.models.load_model(model_path1)

#Source Code: https://github.com/jcborges/DeepStack/releases/download/Cifar10/cifar10SimpleCNN2.py
model_path2 = get_file('cifar10SimpleCNN2.h5',
            'https://github.com/jcborges/DeepStack/releases/download/Cifar10/cifar10SimpleCNN2.h5')
model2 = keras.models.load_model(model_path2)

#Source Code: https://github.com/jcborges/DeepStack/releases/download/Cifar10/cifar10keras.py
model_path3 = get_file('cifar10keras.h5',
            'https://github.com/jcborges/DeepStack/releases/download/Cifar10/cifar10keras.h5')
model3 = keras.models.load_model(model_path3)

#Source Code: https://github.com/jcborges/DeepStack/releases/download/Cifar10/cifar10vgg.py
model_path4 = get_file('cifar10vgg.h5',
            'https://github.com/jcborges/DeepStack/releases/download/Cifar10/cifar10vgg.h5')
model4 = keras.models.load_model(model_path4)

In [4]:
from deepstack.base import KerasMember
member1 = KerasMember(name="model1", keras_model=model1, train_batches=(x_train, y_train), val_batches=(x_val, y_val))
member2 = KerasMember(name="model2", keras_model=model2, train_batches=(x_train, y_train), val_batches=(x_val, y_val))
member3 = KerasMember(name="model3", keras_model=model3, train_batches=(x_train, y_train), val_batches=(x_val, y_val))
member4 = KerasMember(name="model4", keras_model=model4, train_batches=(x_train, y_train), val_batches=(x_val, y_val))

In [69]:
from deepstack.ensemble import Ensemble

class StackEnsemble(Ensemble):
    def __init__(self, model=None):
        """
        Constructor of a Stacking Ensemble.
        Args:
            model: ensemble model which should serve as meta-model.
                `sklearn.ensemble.RandomForestRegressor` per default for predicting class probabilities.
            members (list): ensemble Members to add to the Stack
        """
        self.model = model
        if model is None:
            self.model = RandomForestRegressor(n_estimators=100, max_depth=3, n_jobs=20)
        # Initialize Parameters:
        self.members = []
        self._nmembers = 0
        self.predictions = None
        self._y_squeezed = False  # Flags if labels dimension must be squeezed

    def __repr__(self):
        reps = [member.name for member in self.members]
        return "<StackEnsemble: [" + ", ".join(reps) + "]>"

    def __str__(self):
        reps = [member.name for member in self.members]
        return "StackEnsemble: with" + \
            str(self._nmembers) + " Base-Learners [" + ", ".join(reps) + "]"

    def add_members(self, members):
        """
        Adds ensemble Members to the Stack
        Args:
            members: a list containing instances of class `Member`
        """
        for member in members:
            self.add_member(member)
        self._test()

    def add_member(self, member):
        """
        Adds a ensemble Member to the Stack
        Args:
            member: an instance of class `Member`
        """
        self.members.append(member)
        self._nmembers += 1
        if member.val_probs is None:
            try:
                member.val_probs = member._calculate_val_predictions()
            except Exception as e:
                warnings.warn(str(e))
        if member.train_probs is None:
            try:
                member.train_probs = member._calculate_train_predictions()
            except Exception as e:
                warnings.warn(str(e))

    def fit(self, X=None, y=None, kwargs={}):
        """
        Trains the meta-model
        Args:
            X: training data for meta-learner
            y: training classes for meta-learner
            kwargs: further arguments for the fit function
        """
        assert(len(self.members) > 1)
        # Assumption: all members have same train_batches.classes
        if X is None or y is None:
            return self._fit_train()
        if X.ndim >= 3:
            X = X.reshape(X.shape[0], np.prod(X.shape[1::]))
        try:
            self._y_squeezed = False
            return self.model.fit(X, y, **kwargs)
        except ValueError:  # Normally bad input shape for non-multi-output models
            self._y_squeezed = True
            y_flat = np.argmax(y, axis=1)
            return self.model.fit(X, y_flat, **kwargs)

    def predict(self, X=None, predict_proba=False, kwargs={}):
        """
        Meta-Model prediction for the class' probabilities as a regression
        problem.
        Args:
            X: input data to be predicted
            kwargs: further arguments for prediction function
            predict_proba: if should call method `predict_proba`
                instead of `predict`.
        Returns:
            the predicted probabilities as np.array
        """
        if X is None:
            X = self._get_pred_X()
        if X.ndim == 3:
            X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])
        if (predict_proba or self._y_squeezed) and hasattr(self.model, 'predict_proba'):
            self.predictions = self.model.predict_proba(X, **kwargs)
            print("Calling predict_proba")
        elif hasattr(self.model, 'predict'):
            self.predictions = self.model.predict(X, **kwargs)
            print("Calling predict")            
        else:
            raise ValueError("Model has no predict function")
        return np.array(self.predictions)

    def describe(self, probabilities_val=None, metric=None,
                 maximize=True):
        """
        Prints information about the performance of base and meta learners
        based on validation data.
        Args:
            probabilities_val: (optional) probabilities/prediction on
                validation data
            metric: (optional) evaluation metric function.
                Default: `sklearn.metrics.roc_auc_score`
            maximize: if metric should be maximized (otherwise minimized)
        """
        best_score = float("-inf") if maximize else float("inf")
        if metric is None:
            metric = metrics.roc_auc_score
        if probabilities_val is None:
            probabilities_val = self._predict_val()
        # Assumption: all members have same val_classes
        val_classes = self.members[0].val_classes
        for i in range(self._nmembers):
            member = self.members[i]
            model_score = _calculate_metric(member.val_classes, member.val_probs, metric)
            max_flag = maximize and model_score > best_score
            min_flag = not(maximize) and model_score < best_score
            if max_flag or min_flag:
                best_score = model_score
            text = member.name + " - {}: {:1.4f}".format(
                metric.__name__, model_score)
            print(text)
        ensemble_score = _calculate_metric(val_classes, probabilities_val, metric)
        print("StackEnsemble {}: {:1.4f}".format(
            metric.__name__, ensemble_score))
        return ensemble_score

    def _get_X(self, attrname):
        X = []
        probs = getattr(self.members[0], attrname)
        # Assumption: all members have same train_probs length
        for i in range(len(probs)):
            preds = []
            for member in self.members:
                preds.append(getattr(member, attrname)[i])
            X.append(preds)
        return np.array(X)

    def _get_train_X(self):
        return self._get_X("train_probs")

    def _get_val_X(self):
        return self._get_X("val_probs")

    def _get_pred_X(self):
        return self._get_X("submission_probs")

    def _fit_train(self):
        return self.fit(self._get_train_X(), self.members[0].train_classes)

    def _fit_submission(self):
        """
        Fits model on training and validation data.
        Useful when training the meta-learner for final submission prediction
        """
        X1 = self._get_train_X()
        X2 = self._get_val_X()
        y1 = self.members[0].train_classes
        y2 = self.members[0].val_classes
        X = np.concatenate((X1, X2))
        y = np.concatenate((y1, y2))
        return self.fit(X, y)

    def _predict_val(self):
        return self.predict(self._get_val_X())

    def _test(self):
        """
        Test assumption that all members' classes have same shape and values.
        Names should be unique.
        This is an internal condition for class structures.
        """
        if self._nmembers < 2:
            return True
        t1 = [(np.array_equal(self.members[i].train_classes,
                              self.members[i + 1].train_classes))
              for i in range(self._nmembers - 1)]
        t2 = [(np.array_equal(self.members[i].val_classes,
                              self.members[i + 1].val_classes))
              for i in range(self._nmembers - 1)]
        assert(np.sum(t1) == self._nmembers - 1)
        assert(np.sum(t2) == self._nmembers - 1)
        names = [self.members[i].name for i in range(self._nmembers)]
        assert(len(list(names)) == len(set(names)))
        return True

    def save(self, folder="./premodels/"):
        """
        Saves meta-learner and base-learner of ensemble into folder / directory
        Args:
            folder: the folder where models should be saved to.
                Create if not exists.
        """
        if not os.path.exists(folder):
            os.mkdir(folder)
        [member.save(folder=folder) for member in self.members]
        temp = self.members
        # Reset base-learners. These are loaded idependently
        self.members = None
        self._nmembers = 0
        joblib.dump(self, os.path.join(folder, "stackensemble.joblib"))
        self.members = temp
        self._nmembers = len(self.members)
        return self

    @classmethod
    def load(cls, folder="./premodels/"):
        """
        Loads meta-learner and base-learners from folder / directory
        Args:
            folder: directory where models should be loaded from
        Returns:
            loaded StackEnsemble with Members
        """
        stack = joblib.load(os.path.join(folder, "stackensemble.joblib"))
        stack.members = []
        if folder[-1] != os.sep:
            folder += os.sep
        for fn in glob.glob(folder + "**/"):
            member = Member.load(fn)
            stack.add_member(member)
        return stack


def _calculate_metric(y_true, y_pred, metric=None):  # TODO: Refactor
    if metric is None:
        metric = metrics.roc_auc_score
    try:
        return metric(y_true, y_pred)
    except ValueError:
        pass

    try:
        y_true_cat = to_categorical(y_true)
        return metrics.roc_auc_score(y_true_cat, y_pred)
    except ValueError:
        pass

    # Classification Task
    y_t = y_true
    if y_true.ndim > 1:
        y_t = np.argmax(y_true, axis=1)
    y_p = y_pred
    if y_pred.ndim > 1:
        y_p = np.argmax(y_pred, axis=1)
    return metric(y_t, y_p)


In [92]:
from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor, ExtraTreesClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn import metrics
from sklearn import tree

stack = StackEnsemble()
stack.model = RandomForestClassifier(verbose=0, n_estimators=200, max_depth=15, n_jobs=20, min_samples_split=20)
stack.add_members([member1, member2, member3, member4])

estimators = [
    ('rf2', RandomForestClassifier(verbose=0, n_estimators=200, max_depth=15, n_jobs=20, min_samples_split=30)),
    ('etr2', ExtraTreesClassifier(verbose=0, n_estimators=200, max_depth=10, n_jobs=20, min_samples_split=20))
]
clf = StackingClassifier(
    estimators=estimators, final_estimator=LogisticRegression()
)

stack.model = clf
stack.fit()
stack.describe(metric=metrics.accuracy_score)

  n_iter_i = _check_optimize_result(solver, opt_res, max_iter)


Calling predict_proba
model1 - accuracy_score: 0.6116
model2 - accuracy_score: 0.6571
model3 - accuracy_score: 0.5500
model4 - accuracy_score: 0.6062
StackEnsemble accuracy_score: 0.6969


0.6969

In [None]:
stack = StackEnsemble()
stack.model = RandomForestRegressor(verbose=0, n_estimators=200, max_depth=15, n_jobs=20, min_samples_split=20)
stack.add_members([member1, member2, member3, member4])
stack.fit()
stack.describe()

In [None]:
stack = StackEnsemble()
stack.model = RandomForestClassifier(verbose=0, n_estimators=200, max_depth=15, n_jobs=20, min_samples_split=20)
stack.add_members([member1, member2, member3, member4])
stack.fit()
stack.describe()

In [None]:
X=stack._get_val_X()
X = X.reshape(X.shape[0], X.shape[1] * X.shape[2])
tt=stack.model.predict_proba(X)

In [None]:
tt.shape