## Combining models

Pipelines are containers of steps. A step can be one of the following:

- Transformer
- Estimator
- Pipeline
- FeatureUnion

Now we will inspect feature Union

In [1]:
import sklearn
from sklearn import pipeline

In [2]:
from sklearn import svm
from sklearn.datasets import samples_generator
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.pipeline import Pipeline

### New ###
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA
from sklearn.pipeline import FeatureUnion
import numpy as np
# generate some data to play with
X, y = samples_generator.make_classification(n_samples=1000, n_informative=5, n_redundant=0, random_state=42)

### Stacking features from PCA and KernelPCA

In [3]:
features = [("pca", PCA()), ("kernel_pca",KernelPCA())]
feature_combiner = FeatureUnion(features)

In [4]:
feature_combiner.fit_transform(X).shape

(1000, 511)

### Learning on top of the stacked features

In [5]:
clf = svm.SVC(kernel='linear')

In [6]:
svmpipe = Pipeline([("feature_combination",feature_combiner), ('svc', clf)])

In [7]:
svmpipe.fit(X,y)

Pipeline(memory=None,
     steps=[('feature_combination', FeatureUnion(n_jobs=None,
       transformer_list=[('pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('kernel_pca', KernelPCA(alpha=1.0, coef0=1, copy_X=True, degree=3, eigen_solver='auto',...r', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))])

In [8]:
svmpipe.steps[0][1].transform(X).shape

(1000, 511)

## Combining models probabilities as features

In [9]:
from shutil import rmtree
from tempfile import mkdtemp
cachedir = mkdtemp()
rmtree(cachedir)


In [10]:
anova_filter = SelectKBest(f_regression, k=40)

In [11]:
# Notice we cannot do this since classifiers don't have a transform method
# THIS WILL PRODUCE AN ERROR
#model_combiner = FeatureUnion([("logistic", sklearn.linear_model.LogisticRegression()),\
#                                ("svm", clf)])

## ClassifierTransformer:  Features from  model Probabilities

In [12]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection._split import check_cv
from sklearn.base import is_classifier, clone

In [13]:
class ClassifierTransformer(sklearn.base.BaseEstimator, sklearn.base.TransformerMixin):
    
    def __init__(self, estimator=None, n_classes=2, cv=3):
        self.estimator = estimator
        self.n_classes = n_classes
        self.cv = cv
    
    def _get_labels(self, y):
        y_labels = np.zeros(len(y))
        y_us = np.sort(np.unique(y))
        stepsize = int(len(y_us) / self.n_classes)
        
        for i_class in range(self.n_classes):

            if i_class + 1 == self.n_classes:
                y_labels[y >= y_us[i_class * stepsize]] = i_class
            else:
                y_labels[
                    np.logical_and(
                        y >= y_us[i_class * stepsize],
                        y < y_us[(i_class + 1) * stepsize]
                    )
                ] = i_class
        return y_labels
        
    def fit(self, X, y):
        """
        Function gets as input the targets.
        Targets (which are expected to be real values not classes as in this example) are used
        to build y_labels which are integers that have been assigned a particular class acording
        to the discretization done in _get_labels.
        """
        y_labels = self._get_labels(y)

        cv = check_cv(self.cv, y_labels, classifier=is_classifier(self.estimator))
        self.estimators_ = []
        
        for train_indices, _ in cv.split(X, y_labels):
            self.estimators_.append(
                clone(self.estimator).fit(X[train_indices], y_labels[train_indices])
            )
        return self
    
    def transform(self, X, y=None):
        """
        This function generates 
        
        X_prob: A matrix containing n_classes columns with the proabbility of each datapoint
                beeing of a particular class.
                
        X_pred: A vector containing the chosen classes (coded as integers) for each datapoint. 
        
        """
        cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))
        
        X_prob = np.zeros((X.shape[0], self.n_classes))
        X_pred = np.zeros(X.shape[0])
        for estimator, (_, test) in zip(self.estimators_, cv.split(X)):
            X_prob[test] = estimator.predict_proba(X[test])
            X_pred[test] = estimator.predict(X[test])
            
        return np.hstack([X_prob, np.array([X_pred]).T])

In [14]:
def get_rfc():
    return RandomForestClassifier(
        n_estimators=100,
        max_features=0.5,
        max_depth=None,
        max_leaf_nodes=270,
        min_impurity_decrease=0.0001,
        random_state=123,
        n_jobs=-1
    )

In [15]:
svmpipe2 = Pipeline([ 
                     ('fu', FeatureUnion([ ("feature_combination",feature_combiner),
                                           ("c1", ClassifierTransformer(get_rfc(), n_classes=2, cv=3)),
                                           ("c2", ClassifierTransformer(get_rfc(), n_classes=10, cv=3)),
                                         ])),
                     ('svc', clf)])

In [16]:
svmpipe2.fit(X,y)

Pipeline(memory=None,
     steps=[('fu', FeatureUnion(n_jobs=None,
       transformer_list=[('feature_combination', FeatureUnion(n_jobs=None,
       transformer_list=[('pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('kernel_pca', KernelPCA(alpha...r', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))])

In [17]:
svmpipe2.steps[0][1].transform(X).shape

(1000, 525)

In [18]:
svmpipe2.score(X, y)

0.93

In [19]:
svmpipe3 = Pipeline([ 
                     ('fu', FeatureUnion([ ("feature_combination",feature_combiner),
        #                                   ("c1", ClassifierTransformer(get_rfc(), n_classes=5, cv=3)),
        #                                   ("c2", ClassifierTransformer(get_rfc(), n_classes=10, cv=3)),
                                         ])),
                     ('svc', clf)])

In [20]:
svmpipe3.fit(X, y);
svmpipe3.steps[0][1].transform(X).shape

(1000, 511)

In [21]:
print("The c1 and c2 stacking of features adds {} features".format(513-499))
print("Then 14 features crom from c1__n_classes=2, c2__n_classes=10 and a constant +2\
 which is\n\t one column for the predicted class of c1\n\t one column for the predicted class of c2")

The c1 and c2 stacking of features adds 14 features
Then 14 features crom from c1__n_classes=2, c2__n_classes=10 and a constant +2 which is
	 one column for the predicted class of c1
	 one column for the predicted class of c2


### Compare with same pipeline where the SVM does not have as input the outputs of c1 and c2

In [22]:
svmpipe = Pipeline([("feature_combination",feature_combiner), ('svc', clf)])
svmpipe.fit(X, y)

Pipeline(memory=None,
     steps=[('feature_combination', FeatureUnion(n_jobs=None,
       transformer_list=[('pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('kernel_pca', KernelPCA(alpha=1.0, coef0=1, copy_X=True, degree=3, eigen_solver='auto',...r', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))])

In [23]:
svmpipe.score(X, y)

0.842

In [24]:
svmpipe.steps[0][1].transform(X).shape

(1000, 511)

### Selecting values for the different parts of a pipeline via CrossValidation

In [25]:
from sklearn.model_selection import GridSearchCV

In [26]:
pipeline_grid = {"fu__feature_combination__pca__n_components":[10,15,20], 
                 "fu__feature_combination__kernel_pca__degree":[2,3,4],
                 "fu__c1__n_classes":[2,3,4,5,10],
                 "fu__c2__n_classes":[2,3,4,5,10],
                }

In [27]:
pipe_cv = GridSearchCV(svmpipe2, param_grid=pipeline_grid, n_jobs = -1, cv=3)

In [28]:
pipe_cv.fit(X,y)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=Pipeline(memory=None,
     steps=[('fu', FeatureUnion(n_jobs=None,
       transformer_list=[('feature_combination', FeatureUnion(n_jobs=None,
       transformer_list=[('pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('kernel_pca', KernelPCA(alpha...r', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))]),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'fu__feature_combination__pca__n_components': [10, 15, 20], 'fu__feature_combination__kernel_pca__degree': [2, 3, 4], 'fu__c1__n_classes': [2, 3, 4, 5, 10], 'fu__c2__n_classes': [2, 3, 4, 5, 10]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [29]:
pipe_cv.score(X,y)

0.93

In [31]:
n_configurations = len(pipe_cv.cv_results_["params"])

In [32]:
print("There are a total of {} tested combinations of hyperparams".format( n_configurations))

There are a total of 225 tested combinations of hyperparams


In [33]:
pipe_cv.cv_results_["params"][0]

{'fu__c1__n_classes': 2,
 'fu__c2__n_classes': 2,
 'fu__feature_combination__kernel_pca__degree': 2,
 'fu__feature_combination__pca__n_components': 10}

In [34]:
pipe_cv.cv_results_["params"][1]

{'fu__c1__n_classes': 2,
 'fu__c2__n_classes': 2,
 'fu__feature_combination__kernel_pca__degree': 2,
 'fu__feature_combination__pca__n_components': 15}

In [35]:
pipe_cv.cv_results_['mean_test_score']

array([0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928,
       0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928,
       0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928,
       0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928,
       0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928,
       0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928,
       0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825,
       0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825,
       0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825,
       0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825,
       0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928, 0.928,
       0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825,
       0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825,
       0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825, 0.825,
       0.825, 0.825,