### Stacking with multiple regressors

In [1]:
import importlib
import helper_functions
import pandas as pd
importlib.reload(helper_functions)
from helper_functions import *
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.pipeline import Pipeline, FeatureUnion, _transform_one
from sklearn.externals.joblib import Parallel, delayed

In [2]:
# Defining pipeline as in other examples
trans_pipeline = Pipeline([
    ('impute_numerical', DFTransform(lambda X: fill_numerical_nans(X))),
    ('impute_categorical', DFTransform(lambda X: impute_categorical(X))),
    ('impute_special_cases', DFTransform(lambda X: impute_special_cases(X))),
    ('drop_features', DFTransform(lambda X: drop_features(X))),
    ('ordinal_features', DFTransform(lambda X: encode_ordinals(X))),
    ('check_nans', DFTransform(lambda X: check_nans(X))),
    ('encode_dummies', DFTransform(lambda X: create_dummies(X)))
    ])

In [3]:
# Load the data
train_df =  pd.read_csv('data/train.csv')
X_train = train_df.drop(['SalePrice','Id'], axis=1)
y_train = train_df['SalePrice']
X_test = pd.read_csv('data/test.csv').drop(['Id'], axis=1)
X_train, y_train = prepare_inputs(X_train, y_train)

# Transforming the input
X_combined = pd.concat((X_train, X_test)).reset_index(drop=True) 
X_tranformed = trans_pipeline.fit_transform(X_combined)

# Split the transformed input back
X_train_trans = X_tranformed[:X_train.shape[0]] 
X_test_trans = X_tranformed[X_train.shape[0]:]

Creating dummies...
Starting with input of shape: (2915, 78)
Returning output of shape: (2915, 219)


In [4]:
from sklearn.model_selection import train_test_split

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_train_trans, y_train, test_size=.2, random_state=42)

# Split the training set into two subsets for prediction and blending
X_train_subset1, X_train_subset2, y_train_subset1, y_train_subset2 = \
                                    train_test_split(X_train, y_train, test_size=.3, random_state=42)

In [5]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.linear_model import Ridge

# Define first estimator
adaboost_estimator = AdaBoostRegressor(base_estimator=Ridge(alpha=10, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=42, solver='cholesky', tol=0.001),
         learning_rate=.001, loss='linear', n_estimators=100,
         random_state=42)

In [6]:
# Train the first estimator on the first subset
adaboost_estimator.fit(X_train_subset1, y_train_subset1)

AdaBoostRegressor(base_estimator=Ridge(alpha=10, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=42, solver='cholesky', tol=0.001),
         learning_rate=0.001, loss='linear', n_estimators=100,
         random_state=42)

In [7]:
# Predict with the first estimator on the second subset
prediction_adaboost = adaboost_estimator.predict(X_train_subset2)

In [9]:
from sklearn.linear_model import Lasso

# Define second regressor
lasso_estimator = Lasso(alpha=0.0001, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=True, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [10]:
# Train the second estimator on the first subset
lasso_estimator.fit(X_train_subset1, y_train_subset1)

Lasso(alpha=0.0001, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=True, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [17]:
# Predict with the second estimator on the second subset
prediction_lasso = lasso_estimator.predict(X_train_subset2)

In [18]:
from sklearn.ensemble import BaggingRegressor

# Not we define a bagging ensemble for the blending
bagging_estimator = BaggingRegressor(Ridge(alpha=10, copy_X=True, fit_intercept=True, max_iter=None,random_state=42), 
            n_estimators=500, max_samples=200, bootstrap=True, n_jobs=4)

In [19]:
# This blender now we train on the predictions of the first layer
X_blended = np.column_stack((prediction_nn, prediction_adaboost))

bagging_estimator.fit(X_blended, y_train_subset2)

BaggingRegressor(base_estimator=Ridge(alpha=10, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=42, solver='auto', tol=0.001),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=200, n_estimators=500, n_jobs=4, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [21]:
# And now we use the stack to make a prediction on unseen data
test_prediction_ada = adaboost_estimator.predict(X_test)
test_prediction_lasso = lasso_estimator.predict(X_test)

X_test_blended = np.column_stack((test_prediction_lasso, test_prediction_ada))
y_predicted = bagging_estimator.predict(X_test_blended)

In [22]:
print_benchmark(y_test, y_predicted)

R2-score: 0.864335241262
RMSE (log): 0.14637856702473265
