In [None]:
from stacking_estimator import StackingEstimator
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import RFE
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline, make_union
from sklearn.kernel_approximation import RBFSampler
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.preprocessing import FunctionTransformer
from sklearn.feature_selection import VarianceThreshold
from xgboost import XGBClassifier
from copy import copy

pipelines = {
    'ATP': make_pipeline(
        StackingEstimator(estimator=DecisionTreeClassifier(criterion="gini", max_depth=6, 
                                                           min_samples_leaf=13, min_samples_split=5, random_state=42)),
        DecisionTreeClassifier(criterion="entropy", max_depth=2, min_samples_leaf=5, min_samples_split=15, random_state=42)
    ),
    'Blank': make_pipeline(
        StackingEstimator(estimator=DecisionTreeClassifier(criterion="gini", max_depth=9, min_samples_leaf=10,
                                                           min_samples_split=5, random_state=42)),
        SGDClassifier(alpha=0.001, eta0=1.0, fit_intercept=False, l1_ratio=0.5, learning_rate="invscaling",
                      loss="log", penalty="elasticnet", power_t=0.1, random_state=42)
    ),
    'D-erythronate-4-phosphate': make_pipeline(
        RFE(estimator=ExtraTreesClassifier(criterion="entropy", max_features=1.0, 
                                           n_estimators=100, random_state=42), step=0.4),
        MultinomialNB(alpha=0.1, fit_prior=True)
    ),
    'D-threonate-4-phosphate': make_pipeline(
        RBFSampler(gamma=0.5),
        XGBClassifier(learning_rate=0.001, max_depth=2, min_child_weight=2, n_estimators=100,
                      n_jobs=1, subsample=0.8500000000000001, random_state=42, verbosity=0)
    ),
    'Glycerol-2-phospate': make_pipeline(
        StackingEstimator(estimator=GaussianNB()),
        MLPClassifier(alpha=0.01, learning_rate_init=0.001, random_state=42)
    ),
    'Glycerol-3-phosphate': make_pipeline(
        make_union(
            StackingEstimator(estimator=GradientBoostingClassifier(learning_rate=1.0, max_depth=4,
                                                                   max_features=0.1, min_samples_leaf=15,
                                                                   min_samples_split=14, n_estimators=100,
                                                                   random_state=42, subsample=0.9000000000000001)),
            FunctionTransformer(copy)
        ),
        GaussianNB()
    ),
    'L-erythronate-4-phosphate': make_pipeline(
        StackingEstimator(estimator=GradientBoostingClassifier(learning_rate=0.5, max_depth=8,
                                                               max_features=0.6000000000000001, min_samples_leaf=2,
                                                               min_samples_split=17, n_estimators=100,
                                                               random_state=42, subsample=0.45)),
        GaussianNB()
    ),
    'phosphoenolpyruvate': make_pipeline(
        RFE(estimator=ExtraTreesClassifier(criterion="entropy", max_features=0.6000000000000001,
                                           n_estimators=100, random_state=42), step=0.7000000000000001),
        KNeighborsClassifier(n_neighbors=3, p=1, weights="distance")
    ),
    'Pyrophosphate': make_pipeline(
        RBFSampler(gamma=0.6000000000000001),
        MLPClassifier(alpha=0.1, learning_rate_init=0.01, random_state=42)
    ),
    'TTP': XGBClassifier(learning_rate=0.01, max_depth=9,
                         min_child_weight=13, n_estimators=100,
                         n_jobs=1, subsample=1.0, 
                         random_state=42, verbosity=0)
}


def set_param_recursive(pipeline_steps, parameter, value):
    """Recursively iterate through all objects in the pipeline and set a given parameter.
    Parameters
    ----------
    pipeline_steps: array-like
        List of (str, obj) tuples from a scikit-learn pipeline or related object
    parameter: str
        The parameter to assign a value for in each pipeline object
    value: any
        The value to assign the parameter to in each pipeline object
    Returns
    -------
    None
    """
    for (_, obj) in pipeline_steps:
        recursive_attrs = ["steps", "transformer_list", "estimators"]
        for attr in recursive_attrs:
            if hasattr(obj, attr):
                set_param_recursive(getattr(obj, attr), parameter, value)
        if hasattr(obj, "estimator"):  # nested estimator
            est = getattr(obj, "estimator")
            if hasattr(est, parameter):
                setattr(est, parameter, value)
        if hasattr(obj, parameter):
            setattr(obj, parameter, value)
            
            
msa_pipelines = {
    'ATP': XGBClassifier(learning_rate=0.1, max_depth=4,
                         min_child_weight=13, n_estimators=100,
                         n_jobs=1, subsample=0.9500000000000001,
                         verbosity=0),
    'Blank': make_pipeline(
        StackingEstimator(estimator=MLPClassifier(alpha=0.0001, learning_rate_init=0.001)),
        GaussianNB()
    ),
    'D-erythronate-4-phosphate': make_pipeline(
        StackingEstimator(estimator=SGDClassifier(alpha=0.01, eta0=0.01,
                                                  fit_intercept=False, l1_ratio=0.5,
                                                  learning_rate="invscaling", loss="perceptron",
                                                  penalty="elasticnet", power_t=0.1)),
        StackingEstimator(estimator=ExtraTreesClassifier(bootstrap=True, criterion="gini",
                                                         max_features=0.7000000000000001, min_samples_leaf=20,
                                                         min_samples_split=4, n_estimators=100)),
        StackingEstimator(estimator=KNeighborsClassifier(n_neighbors=12, p=1, weights="distance")),
        StackingEstimator(estimator=LogisticRegression(C=0.1, dual=False, penalty="l2")),
        BernoulliNB(alpha=1.0, fit_prior=False)
    ),
    'D-threonate-4-phosphate': XGBClassifier(learning_rate=0.01, max_depth=10,
                                             min_child_weight=2, n_estimators=100,
                                             n_jobs=1, subsample=1.0, verbosity=0),
    'Glycerol-2-phospate': DecisionTreeClassifier(criterion="entropy", max_depth=3,
                                                  min_samples_leaf=5, min_samples_split=9),
    'Glycerol-3-phosphate': make_pipeline(
        StackingEstimator(estimator=MLPClassifier(alpha=0.1, learning_rate_init=0.001)),
        StackingEstimator(estimator=XGBClassifier(learning_rate=0.1, max_depth=7,
                                                  min_child_weight=16, n_estimators=100,
                                                  n_jobs=1, subsample=0.55, verbosity=0)),
        LinearSVC(C=5.0, dual=True, loss="squared_hinge", penalty="l2", tol=0.1)
    ),
    'L-erythronate-4-phosphate': make_pipeline(
        make_union(
            FunctionTransformer(copy),
            FunctionTransformer(copy)
        ),
        VarianceThreshold(threshold=0.0001),
        BernoulliNB(alpha=1.0, fit_prior=False)
    ),
    'phosphoenolpyruvate': ExtraTreesClassifier(bootstrap=True, criterion="entropy",
                                                max_features=0.25, min_samples_leaf=2,
                                                min_samples_split=9, n_estimators=100),
    'Pyrophosphate': ExtraTreesClassifier(bootstrap=False, criterion="gini",
                                          max_features=0.35000000000000003, min_samples_leaf=18,
                                          min_samples_split=9, n_estimators=100),
    'TTP': make_pipeline(
        StackingEstimator(estimator=MultinomialNB(alpha=10.0, fit_prior=False)),
        XGBClassifier(learning_rate=0.01, max_depth=9,
                      min_child_weight=4, n_estimators=100,
                      n_jobs=1, subsample=0.45, verbosity=0)
    )
}