## Getting ready...

In [19]:
from sklearn.preprocessing import OrdinalEncoder
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [20]:
example = pd.DataFrame([['first', 'very much'],
                        ['second', 'very little'],
                        ['third', 'average']],
                       columns=['rank', 'importance'])

## How to do it...

We need the ToString class from *Processing categorical data*

In [21]:
class ToString(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None, **fit_params):
        return self
    def transform(self, X, y=None, **fit_params):
        return X.astype(str)
    def fit_transform(self, X, y=None, **fit_params):
        self.fit(X, y, **fit_params)
        return self.transform(X)

In [22]:
oe = OrdinalEncoder(categories=[['first', 'second', 'third'],
                                ['very much', 'average', 'very little']])

categorical_pipeline = Pipeline(steps=[('string_converter', ToString()),
                                       ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
                                       ('onehot', OneHotEncoder(handle_unknown='ignore'))])

## How it works...

In [23]:
np.hstack((oe.fit_transform(example), categorical_pipeline.fit_transform(example).todense()))

matrix([[0., 0., 1., 0., 0., 0., 0., 1.],
        [1., 2., 0., 1., 0., 0., 1., 0.],
        [2., 1., 0., 0., 1., 1., 0., 0.]])

In [24]:
def derive_ohe_columns(df, pipeline):
    return [str(col) + '_' + str(lvl) for col, lvls in zip(df.columns, 
                                                           pipeline.named_steps.onehot.categories_) for lvl in lvls]

In [25]:
example.columns.tolist() + derive_ohe_columns(example, categorical_pipeline)

['rank',
 'importance',
 'rank_first',
 'rank_second',
 'rank_third',
 'importance_average',
 'importance_very little',
 'importance_very much']