In [None]:
%cd '/content/drive/MyDrive/Colab Notebooks/kaggle/house_pricing'

In [None]:
!pip install -q catboost
!pip install -q mlens
!pip install -q lightgbm

In [None]:
from common import *

from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.compose import TransformedTargetRegressor
from sklearn.metrics import mean_absolute_error

from mlens.ensemble import SuperLearner, BlendEnsemble
from mlens.metrics.metrics import rmse
from mlens.model_selection import Evaluator

from itertools import combinations

import warnings
warnings.filterwarnings('ignore')

In [None]:
x_train, y_train, x_test = read_preprocessed_data('preprocessed')
print(x_train.shape, y_train.shape, x_test.shape)

---
####Lasso

In [None]:
lasso = TransformedTargetRegressor(regressor=Lasso(alpha=0), 
                                   func=np.log1p, inverse_func=np.expm1)
print(get_cv_score(x_train, y_train, lasso))

---
####Ridge

In [None]:
ridge = Ridge(alpha=0.18)
print(get_cv_score(x_train, y_train, ridge))

---
####LGBM

In [None]:
%%time
params = {'num_leaves': 7, 'n_estimators': 4262, 'min_sum_hessian_in_leaf': 9, 
          'min_data_in_leaf': 1, 'max_bin': 67, 'learning_rate': 0.01, 
          'feature_fraction': 0.28, 'bagging_freq': 2, 'bagging_fraction': 0.59}
lgbm = LGBMRegressor()
lgbm.set_params(**params)
print(get_cv_score(x_train, y_train, lgbm))

---
####XGBoost

In [None]:
%%time
params = {'subsample': 0.3, 'n_estimators': 3378, 'min_child_weight': 0, 
          'max_depth': 4, 'learning_rate': 0.009, 'lambda': 120, 'gamma': 79, 
          'colsample_bytree': 0.4, 'alpha': 52}
xgb = XGBRegressor(**params, objective='reg:squarederror')
print(get_cv_score(x_train, y_train, xgb))

---
####CatBoost

In [None]:
%%time
params = {'n_estimators': 1751, 'learning_rate': 0.05, 'l2_leaf_reg': 5, 'depth': 6}
catboost = CatBoostRegressor(logging_level='Silent', **params)
print(get_cv_score(x_train, y_train, catboost))

---
####Ensemble

In [None]:
def create_combs(names, models):
    model_comb = []
    name_comb = []
    for i in range(2, len(names) + 1):
        model_comb.extend(list(list
        (map(list, combinations(models, i)))))
        name_comb.extend(list(list(map(list, combinations(names, i)))))
    return zip(name_comb, model_comb)

In [None]:
# %%capture
%%time
from tqdm.notebook import tqdm
import warnings
warnings.filterwarnings('ignore')
models = [ridge, lasso, lgbm, xgb, catboost]
names = ['ridge', 'lasso', 'lgbm', 'xgb', 'catboost']
metamodel = LinearRegression()
combs = create_combs(names, models)
print(type(combs))
best_combination = [1e9, '']
for n, m in tqdm(list(combs)):
    ensemble = SuperLearner(scorer=mean_absolute_error, random_state=0, folds=5)
    ensemble.add(m)
    ensemble.add_meta(metamodel)
    error = get_cv_score(x_train, y_train, ensemble)
    print(n, error)
    if error < best_combination[0]:
        best_combination[0] = error
        best_combination[1] = n
    
print('best combination', best_combination)