In [5]:
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import pandas as pd

In [6]:
class DataFrameSelector(BaseEstimator, TransformerMixin):
    '''Input a DataFrame and returns given columns on NumPy array'''
    def __init__(self, columns):
        self.columns = columns
    def fit(self, X, y):
        return self
    def transform(self, X):
        return X[self.columns].values

In [7]:
class ImputerByRegression(BaseEstimator, TransformerMixin):
    def __init__(self, feature, columns, estimator=RandomForestRegressor()):
        self.feature = feature
        self.columns = columns
        self.estimator = estimator
    def fit(self, X, y=None):
        missing_values = X[X[self.feature].isnull()]
        input_values = X[X[self.feature].notnull()]
        
        features = input_values[self.columns]
        labels = input_values[self.feature]
        
        return estimator.fit(features, labels)
    
    def transform(self, X):
        X[self.feature].where(X[self.feature].notnull(), estimator.predict(X[self.columns]), inplace=True)
        return X

In [8]:


class AddAttributes(BaseEstimator, TransformerMixin):
    def __init__(self, add_isAlone=True, add_FamSize=True):
        self.add_isAlone = add_isAlone
        self.add_FamSize = add_FamSize
    def fit(self, X, y=None):
        return self
    
    def create_treatment_column(self, single_data):
        """This function create a treatment's list:
        1 - Miss.
        2 - Mr.
        3 - Mrs.
        4 - Master
        5 - Others"""
        if re.findall(r"(.+(M|m)iss.+)|(.+(M|m)ile.+)|(.+(M|m)s.+)", single_data):
            return "Miss"
        elif re.findall(r"(.+(M|m)rs.+)|(.+(M|m)me.+)", single_data):
            return "Mrs"
        elif re.findall(r".+(M|m)r.+", single_data):
            return "Mr"
        elif re.findall(r".+(M|m)aster.+", single_data):
            return "Master"
        return "Others"  # Others

    def transform(self, X):
        treatment = np.apply_along_axis(create_treatment_column, 0, X[:, 0])
        return np.c_[X[1:], treatment]