This kernel adopts the idea of [Eliot Barril](https://www.kaggle.com/eliotbarr/house-prices-advanced-regression-techniques/stacking-starter/code). To understand this Ensemble Model, please review this [article](http://mlwave.com/kaggle-ensembling-guide/).

In [1]:
import pandas as pd
import numpy as np
import pickle
import os

from scipy.stats import skew


from sklearn.cross_validation import KFold
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.model_selection import KFold, GridSearchCV

from sklearn.linear_model import BayesianRidge, ElasticNet, HuberRegressor
from sklearn import svm
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, BaggingRegressor

from sklearn.ensemble import ExtraTreesRegressor
from sklearn.tree import DecisionTreeRegressor

import xgboost as xgb
from xgboost import XGBRegressor


from math import sqrt

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')



# Load data

In [2]:
with open("input/preprocessed_data_3.pkl", "rb") as f:
    train_data = pickle.load(f)
    test_data = pickle.load(f)
    ids = pickle.load(f)
    labels = pickle.load(f)

In [3]:
type(train_data)

pandas.core.frame.DataFrame

In [4]:
len(labels)

1458

In [5]:
ntrain = train_data.shape[0]
ntest = test_data.shape[0]

# Build model

## First level model

1. BayesianRidge
2. ElasticNet
3. HuberRegressor
4. SVM RBF (SVR)
5. SVM Linear (SVR(kernel="linear"))
6. RandomForestRegressor
7. AdaBoostRegressor
8. BaggingRegressor 
9. XGBRegressor

Steps:
1. Tuning hyperparameter for each model
2. Stacking all model
4. Run XGB on top

### Tuning parameters

In [6]:
def hyperparaTuning(clf, train, labels, param_grid):
    cv = KFold(n_splits=5, shuffle=True, random_state=45).split(train)
    score = make_scorer(mean_squared_error)
    grid_search = GridSearchCV(clf, param_grid, cv=cv, scoring=score)
    grid_search.fit(train, labels)
    return grid_search.grid_scores_, grid_search.best_params_, grid_search.best_score_, grid_search.best_estimator_

#### BayesianRidge

In [7]:
br = BayesianRidge()

In [8]:
param_grid_br = {'alpha_1': [1e-04, 1e-2, 0, 0.1, 0.2], 
                 'alpha_2': [1e-04, 1e-2, 0, 0.1, 0.2],
                 'lambda_1': [1e-04, 1e-2, 0, 0.1, 0.2], 
                 'lambda_2': [1e-04, 1e-2, 0, 0.1, 0.2]
    
}

In [10]:
hyperparaTuning(br, train_data, labels, param_grid_br)

([mean: 0.01227, std: 0.00145, params: {'alpha_1': 0.0001, 'lambda_1': 0.0001, 'lambda_2': 0.0001, 'alpha_2': 0.0001},
  mean: 0.01226, std: 0.00140, params: {'alpha_1': 0.0001, 'lambda_1': 0.0001, 'lambda_2': 0.01, 'alpha_2': 0.0001},
  mean: 0.01227, std: 0.00145, params: {'alpha_1': 0.0001, 'lambda_1': 0.0001, 'lambda_2': 0, 'alpha_2': 0.0001},
  mean: 0.01238, std: 0.00122, params: {'alpha_1': 0.0001, 'lambda_1': 0.0001, 'lambda_2': 0.1, 'alpha_2': 0.0001},
  mean: 0.01254, std: 0.00113, params: {'alpha_1': 0.0001, 'lambda_1': 0.0001, 'lambda_2': 0.2, 'alpha_2': 0.0001},
  mean: 0.01227, std: 0.00145, params: {'alpha_1': 0.0001, 'lambda_1': 0.01, 'lambda_2': 0.0001, 'alpha_2': 0.0001},
  mean: 0.01226, std: 0.00140, params: {'alpha_1': 0.0001, 'lambda_1': 0.01, 'lambda_2': 0.01, 'alpha_2': 0.0001},
  mean: 0.01227, std: 0.00145, params: {'alpha_1': 0.0001, 'lambda_1': 0.01, 'lambda_2': 0, 'alpha_2': 0.0001},
  mean: 0.01238, std: 0.00122, params: {'alpha_1': 0.0001, 'lambda_1': 0.0

#### ElasticNet

In [34]:
en = ElasticNet()

In [92]:
param_grid_en = {'alpha': [0.1, 0.2, 0.3, 0.8], 
                 'l1_ratio': [1.5, 2, 3]
    
}

In [93]:
hyperparaTuning(en, train_data, labels, param_grid_en)

([mean: 0.06366, std: 0.00585, params: {'l1_ratio': 1.5, 'alpha': 0.1},
  mean: 0.08408, std: 0.00743, params: {'l1_ratio': 2, 'alpha': 0.1},
  mean: 0.14313, std: 0.01307, params: {'l1_ratio': 3, 'alpha': 0.1},
  mean: 0.14502, std: 0.01284, params: {'l1_ratio': 1.5, 'alpha': 0.2},
  mean: 0.15995, std: 0.01061, params: {'l1_ratio': 2, 'alpha': 0.2},
  mean: 0.15995, std: 0.01061, params: {'l1_ratio': 3, 'alpha': 0.2},
  mean: 0.15995, std: 0.01061, params: {'l1_ratio': 1.5, 'alpha': 0.3},
  mean: 0.15995, std: 0.01061, params: {'l1_ratio': 2, 'alpha': 0.3},
  mean: 0.15995, std: 0.01061, params: {'l1_ratio': 3, 'alpha': 0.3},
  mean: 0.15995, std: 0.01061, params: {'l1_ratio': 1.5, 'alpha': 0.8},
  mean: 0.15995, std: 0.01061, params: {'l1_ratio': 2, 'alpha': 0.8},
  mean: 0.15995, std: 0.01061, params: {'l1_ratio': 3, 'alpha': 0.8}],
 {'alpha': 0.2, 'l1_ratio': 2},
 0.15995149307632398,
 ElasticNet(alpha=0.2, copy_X=True, fit_intercept=True, l1_ratio=2,
       max_iter=1000, normali

#### HuberRegressor

In [43]:
hr = HuberRegressor()

In [45]:
param_grid_hr = {'alpha': [1000,100,10, 1],
              'epsilon' : [1.2,1.25,1.50],
              'tol' : [1e-10, 1e-9]}

In [44]:
hyperparaTuning(hr, train_data, labels, param_grid_hr)

([mean: 0.01377, std: 0.00216, params: {'epsilon': 1.2, 'tol': 1e-10, 'alpha': 1000},
  mean: 0.01377, std: 0.00216, params: {'epsilon': 1.2, 'tol': 1e-09, 'alpha': 1000},
  mean: 0.01370, std: 0.00217, params: {'epsilon': 1.25, 'tol': 1e-10, 'alpha': 1000},
  mean: 0.01370, std: 0.00217, params: {'epsilon': 1.25, 'tol': 1e-09, 'alpha': 1000},
  mean: 0.01351, std: 0.00215, params: {'epsilon': 1.5, 'tol': 1e-10, 'alpha': 1000},
  mean: 0.01351, std: 0.00215, params: {'epsilon': 1.5, 'tol': 1e-09, 'alpha': 1000},
  mean: 0.01244, std: 0.00194, params: {'epsilon': 1.2, 'tol': 1e-10, 'alpha': 100},
  mean: 0.01244, std: 0.00194, params: {'epsilon': 1.2, 'tol': 1e-09, 'alpha': 100},
  mean: 0.01265, std: 0.00172, params: {'epsilon': 1.25, 'tol': 1e-10, 'alpha': 100},
  mean: 0.01265, std: 0.00172, params: {'epsilon': 1.25, 'tol': 1e-09, 'alpha': 100},
  mean: 0.01262, std: 0.00193, params: {'epsilon': 1.5, 'tol': 1e-10, 'alpha': 100},
  mean: 0.01262, std: 0.00193, params: {'epsilon': 1.5,

#### SVM RBF (SVR)

In [57]:
svm_rbf = svm.SVR()

In [65]:
param_grid_svr = {'tol': [0.0001, 0.001, 0.01, 0.1], 
                  'C': [1.0, 1.5, 2.0, 2.5], 
                  'epsilon': [1, 2, 3, 4]
}

In [66]:
hyperparaTuning(svm_rbf, train_data, labels, param_grid_svr)

([mean: 0.07818, std: 0.00867, params: {'C': 1.0, 'epsilon': 1, 'tol': 0.0001},
  mean: 0.07819, std: 0.00867, params: {'C': 1.0, 'epsilon': 1, 'tol': 0.001},
  mean: 0.07831, std: 0.00881, params: {'C': 1.0, 'epsilon': 1, 'tol': 0.01},
  mean: 0.07734, std: 0.00962, params: {'C': 1.0, 'epsilon': 1, 'tol': 0.1},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 2, 'tol': 0.0001},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 2, 'tol': 0.001},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 2, 'tol': 0.01},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 2, 'tol': 0.1},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 3, 'tol': 0.0001},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 3, 'tol': 0.001},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 3, 'tol': 0.01},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 3, 'tol': 0.1},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon

#### SVM Linear (SVR(kernel="linear"))

In [68]:
svm_ln = svm.SVR(kernel="linear")

In [69]:
param_grid_svm = {'tol': [0.0001, 0.001, 0.01, 0.1], 
                  'C': [1.0, 1.5, 2.0, 2.5], 
                  'epsilon': [1, 2, 3, 4]
}

In [70]:
hyperparaTuning(svm_ln, train_data, labels, param_grid_svm)

([mean: 0.07539, std: 0.00931, params: {'C': 1.0, 'epsilon': 1, 'tol': 0.0001},
  mean: 0.07536, std: 0.00931, params: {'C': 1.0, 'epsilon': 1, 'tol': 0.001},
  mean: 0.07527, std: 0.00918, params: {'C': 1.0, 'epsilon': 1, 'tol': 0.01},
  mean: 0.07580, std: 0.01050, params: {'C': 1.0, 'epsilon': 1, 'tol': 0.1},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 2, 'tol': 0.0001},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 2, 'tol': 0.001},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 2, 'tol': 0.01},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 2, 'tol': 0.1},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 3, 'tol': 0.0001},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 3, 'tol': 0.001},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 3, 'tol': 0.01},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon': 3, 'tol': 0.1},
  mean: 0.16039, std: 0.01136, params: {'C': 1.0, 'epsilon

#### RandomForestRegressor

In [71]:
rf = RandomForestRegressor()

In [84]:
param_grid_rf = {'max_depth': [3, 5],
                 'min_samples_split': [3, 5],
                 'min_samples_leaf': [9, 11],
                 'min_weight_fraction_leaf': [0.001, 0.1, 0]    
}

In [85]:
hyperparaTuning(rf, train_data, labels, param_grid_rf)

([mean: 0.04019, std: 0.00372, params: {'min_samples_leaf': 9, 'max_depth': 3, 'min_weight_fraction_leaf': 0.001, 'min_samples_split': 3},
  mean: 0.04414, std: 0.00454, params: {'min_samples_leaf': 9, 'max_depth': 3, 'min_weight_fraction_leaf': 0.1, 'min_samples_split': 3},
  mean: 0.03961, std: 0.00442, params: {'min_samples_leaf': 9, 'max_depth': 3, 'min_weight_fraction_leaf': 0, 'min_samples_split': 3},
  mean: 0.04053, std: 0.00375, params: {'min_samples_leaf': 9, 'max_depth': 3, 'min_weight_fraction_leaf': 0.001, 'min_samples_split': 5},
  mean: 0.04587, std: 0.00444, params: {'min_samples_leaf': 9, 'max_depth': 3, 'min_weight_fraction_leaf': 0.1, 'min_samples_split': 5},
  mean: 0.03837, std: 0.00177, params: {'min_samples_leaf': 9, 'max_depth': 3, 'min_weight_fraction_leaf': 0, 'min_samples_split': 5},
  mean: 0.04002, std: 0.00407, params: {'min_samples_leaf': 11, 'max_depth': 3, 'min_weight_fraction_leaf': 0.001, 'min_samples_split': 3},
  mean: 0.04613, std: 0.00485, params:

#### AdaBoostRegressor

In [100]:
ab = AdaBoostRegressor()

In [101]:
param_grid_ab = {'learning_rate': [0.001, 0.01, 0.1]
    
}

In [102]:
hyperparaTuning(ab, train_data, labels, param_grid_ab)

([mean: 0.04335, std: 0.00331, params: {'learning_rate': 0.001},
  mean: 0.04311, std: 0.00311, params: {'learning_rate': 0.01},
  mean: 0.03454, std: 0.00442, params: {'learning_rate': 0.1}],
 {'learning_rate': 0.001},
 0.043353672553229423,
 AdaBoostRegressor(base_estimator=None, learning_rate=0.001, loss='linear',
          n_estimators=50, random_state=None))

#### BaggingRegressor

In [81]:
bg = BaggingRegressor()

In [96]:
param_grid_bg = {'n_estimators': [10, 30],
                 'max_samples': [3, 5],
                 'max_features': [3, 5]
    
}

In [97]:
hyperparaTuning(bg, train_data, labels, param_grid_bg)

([mean: 0.15771, std: 0.01575, params: {'max_features': 3, 'max_samples': 3, 'n_estimators': 10},
  mean: 0.14597, std: 0.01412, params: {'max_features': 3, 'max_samples': 3, 'n_estimators': 30},
  mean: 0.14346, std: 0.01416, params: {'max_features': 3, 'max_samples': 5, 'n_estimators': 10},
  mean: 0.13105, std: 0.01191, params: {'max_features': 3, 'max_samples': 5, 'n_estimators': 30},
  mean: 0.14961, std: 0.02279, params: {'max_features': 5, 'max_samples': 3, 'n_estimators': 10},
  mean: 0.13807, std: 0.01719, params: {'max_features': 5, 'max_samples': 3, 'n_estimators': 30},
  mean: 0.12285, std: 0.02094, params: {'max_features': 5, 'max_samples': 5, 'n_estimators': 10},
  mean: 0.12127, std: 0.01531, params: {'max_features': 5, 'max_samples': 5, 'n_estimators': 30}],
 {'max_features': 3, 'max_samples': 3, 'n_estimators': 10},
 0.15770689001274149,
 BaggingRegressor(base_estimator=None, bootstrap=True,
          bootstrap_features=False, max_features=3, max_samples=3,
          n

#### XGBRegressor

In [111]:
xgb = XGBRegressor(objective = 'reg:linear')

In [128]:
param_grid_xgb = {'max_depth': [3], 
                  'learning_rate': [0.01], 
                  'n_estimators': [100], 
                  'gamma': [0.01], 
                  'min_child_weight': [5], 
                  'subsample': [0.6], 
                  'colsample_bytree': [0.1], 
                  'reg_alpha': [0.01], 
                  'reg_lambda': [0.01]
    
}

In [129]:
hyperparaTuning(xgb, train_data, labels, param_grid_xgb)

([mean: 17.86428, std: 0.12644, params: {'gamma': 0.01, 'reg_alpha': 0.01, 'colsample_bytree': 0.1, 'subsample': 0.6, 'min_child_weight': 5, 'max_depth': 3, 'reg_lambda': 0.01, 'n_estimators': 100, 'learning_rate': 0.01}],
 {'colsample_bytree': 0.1,
  'gamma': 0.01,
  'learning_rate': 0.01,
  'max_depth': 3,
  'min_child_weight': 5,
  'n_estimators': 100,
  'reg_alpha': 0.01,
  'reg_lambda': 0.01,
  'subsample': 0.6},
 17.864279712249921,
 XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.1,
        gamma=0.01, learning_rate=0.01, max_delta_step=0, max_depth=3,
        min_child_weight=5, missing=None, n_estimators=100, nthread=-1,
        objective='reg:linear', reg_alpha=0.01, reg_lambda=0.01,
        scale_pos_weight=1, seed=0, silent=True, subsample=0.6))

## Stacking first models

In [6]:
x_train = np.array(train_data)
x_test = np.array(test_data)
y_train = labels

In [7]:
y_train = y_train.reset_index()
del y_train['index']
y_train = y_train['SalePrice']

In [17]:
NFOLDS = 5
SEED = 3

In [20]:
kf = KFold(n_splits=5, shuffle=True, random_state=45).split(train_data)

In [21]:
#out of folds
def get_oof(clf):
    oof_train = np.zeros((ntrain,))
    oof_test = np.zeros((ntest,))
    oof_test_skf = np.empty((NFOLDS, ntest))

    for i, (train_index, test_index) in enumerate(kf):
        x_tr = x_train[train_index]
        y_tr = y_train[train_index]
        x_te = x_train[test_index]

        clf.fit(x_tr, y_tr)

        oof_train[test_index] = clf.predict(x_te)
        oof_test_skf[i, :] = clf.predict(x_test)

    oof_test[:] = oof_test_skf.mean(axis=0)
    return oof_train.reshape(-1, 1), oof_test.reshape(-1, 1)

In [22]:
br = BayesianRidge(alpha_1=0.2, alpha_2=0, compute_score=False, copy_X=True,
                   fit_intercept=True, lambda_1=0, lambda_2=0.2, n_iter=300,
                   normalize=False, tol=0.001, verbose=False)

en = ElasticNet(alpha=0.2, copy_X=True, fit_intercept=True, l1_ratio=2,
                max_iter=1000, normalize=False, positive=False, precompute=False,
                random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

hr = HuberRegressor(alpha=1, epsilon=1.2, fit_intercept=True, max_iter=100,
                    tol=1e-10, warm_start=False)

svm_rbf = svm.SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=2, gamma='auto',
          kernel='rbf', max_iter=-1, shrinking=True, tol=0.0001, verbose=False)

svm_ln = svm.SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=2, gamma='auto',
             kernel='linear', max_iter=-1, shrinking=True, tol=0.0001, verbose=False)

rf = RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=3,
                           max_features='auto', max_leaf_nodes=None,
                           min_impurity_split=1e-07, min_samples_leaf=11,
                           min_samples_split=5, min_weight_fraction_leaf=0.1,
                           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
                           verbose=0, warm_start=False)

ab = AdaBoostRegressor(base_estimator=None, learning_rate=0.001, loss='linear',
                       n_estimators=50, random_state=None)

xgb = XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.1,
        gamma=0.01, learning_rate=0.01, max_delta_step=0, max_depth=3,
        min_child_weight=5, missing=None, n_estimators=100, nthread=-1,
        objective='reg:linear', reg_alpha=0.01, reg_lambda=0.01,
        scale_pos_weight=1, seed=0, silent=True, subsample=0.6)

In [23]:
br_oof_train, br_oof_test = get_oof(br)
en_oof_train, en_oof_test = get_oof(en)
hr_oof_train, hr_oof_test = get_oof(hr)
svr_oof_train, svr_oof_test = get_oof(svm_rbf)
svm_oof_train, svm_oof_test = get_oof(svm_ln)
rf_oof_train, rf_oof_test = get_oof(rf)
ab_oof_train, ab_oof_test = get_oof(ab)
xgb_oof_train, xgb_oof_test = get_oof(xgb)

In [24]:
print("Br-CV: {}".format(sqrt(mean_squared_error(y_train, br_oof_train))))
print("EN-CV: {}".format(sqrt(mean_squared_error(y_train, en_oof_train))))
print("HR-CV: {}".format(sqrt(mean_squared_error(y_train, hr_oof_train))))
print("SVR-CV: {}".format(sqrt(mean_squared_error(y_train, svr_oof_train))))
print("SVM-CV: {}".format(sqrt(mean_squared_error(y_train, svm_oof_train))))
print("RF-CV: {}".format(sqrt(mean_squared_error(y_train, rf_oof_train))))
print("AB-CV: {}".format(sqrt(mean_squared_error(y_train, ab_oof_train))))
print("XGB-CV: {}".format(sqrt(mean_squared_error(y_train, xgb_oof_train))))

Br-CV: 0.11196441712692931
EN-CV: 12.030652568617453
HR-CV: 12.030652568617453
SVR-CV: 12.030652568617453
SVM-CV: 12.030652568617453
RF-CV: 12.030652568617453
AB-CV: 12.030652568617453
XGB-CV: 12.030652568617453


## Second level model

In [15]:
x_train = np.concatenate((xgb_oof_train, br_oof_train, en_oof_train, hr_oof_train, rf_oof_train), axis=1)
x_test = np.concatenate(((xgb_oof_train, br_oof_train, en_oof_train, hr_oof_train, rf_oof_train), axis=1)

SyntaxError: unexpected EOF while parsing (<ipython-input-15-17dba03eb73a>, line 2)

In [None]:
xgb_tuning = XGBRegressor(objective = 'reg:linear')

In [None]:
param_grid = {'max_depth': [3], 
                  'learning_rate': [0.1], 
                  'n_estimators': [50, 100], 
                  'gamma': [0.01, 0.1, 1], 
                  'min_child_weight': [3, 5, 7], 
                  'subsample': [0.01, 0.1, 1], 
                  'colsample_bytree': [0.1, 0.5, 1], 
                  'reg_alpha': [0.01, 0.1, 1], 
                  'reg_lambda': [0.01, 0.1, 1]
    
}

In [None]:
hyperparaTuning(xgb_tuning, x_train, labels, param_grid)

In [None]:
xgb_final = .fit(x_train, labels)

# Submit result

In [26]:
def modelSubmit(model, dtest, name):
    ln_pred = model.predict(dtest)
    pred = np.expm1(ln_pred)
    submission = pd.DataFrame({"Id": ids, "SalePrice": pred})
    filename = os.path.join(name + ".csv")
    submission.to_csv('output/' + filename, index=False)

In [27]:
modelSubmit(xgb_final, x_test, 'xgstacker_4')