## Importing Libraries

In [1]:
import pandas as pd

from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error, mean_absolute_error, root_mean_squared_error, r2_score

## Configurations

In [2]:
# path for the training and testing datasets
train_path = 'data/processed/train_processed.csv'
test_path = 'data/processed/test_processed.csv'

FOLDS=5 # no. of folds for cross validation
SEED=42 # SEED for reproducibility

In [3]:
# loading the training and testing dataset
df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)

In [4]:
# separating the dependent and independent features
X_train = df_train.drop('MedHouseVal', axis=1)
y_train = df_train['MedHouseVal']

X_test = df_test.drop('MedHouseVal', axis=1)
y_test = df_test['MedHouseVal']

## Model Evaluation

### Evaluating the ensemble of tuned CatBoostRegressr, XGBRegressor and LGBMRegressor

In [5]:
# params for CatBoostRegressor
catboost_params = {
  'iterations': 1485,
  'learning_rate': 0.09566208338756467,
  'depth': 8,
  'l2_leaf_reg': 4,
  'border_count': 149,
  'random_strength': 0.3221392635228121,
  'bagging_temperature': 0.16171642890213989
}

tuned_CBR = CatBoostRegressor(**catboost_params, random_state=SEED, verbose=False)

In [6]:
# params for XGBRegressor
xgb_params = {
  'n_estimators': 535,
  'learning_rate': 0.057582817878155085,
  'max_depth': 10,
  'min_child_weight': 10,
  'subsample': 0.7124624923071757,
  'colsample_bytree': 0.7561466714445289,
  'gamma': 0.0433184458126587,
  'reg_alpha': 0.934442472471591,
  'reg_lambda': 2.07724971668887
}

tuned_XGB = XGBRegressor(**xgb_params, random_state=SEED)

In [7]:
# params for LGBMRegressor
lgbm_params = {
  'n_estimators': 214,
  'learning_rate': 0.10979784611692994,
  'max_depth': 9,
  'num_leaves': 75,
  'min_child_samples': 33,
  'subsample': 0.939496220559003,
  'colsample_bytree': 0.8775803119044167,
  'reg_alpha': 0.8615502636228614,
  'reg_lambda': 0.8363793385956172
}

tuned_LGBM = LGBMRegressor(**lgbm_params, random_state=SEED, verbose=-1)

In [8]:
# ensemble of tuned models
ensemble = StackingRegressor(
    estimators=[
        ('catboost', tuned_CBR),
        ('lgbm', tuned_LGBM),
        ('xgb', tuned_XGB)
], final_estimator=Ridge(), cv=FOLDS)

In [9]:
# training the ensemble on training dataset
ensemble.fit(X_train, y_train)
y_pred = ensemble.predict(X_test)

ensemble_results = {
    'MSE': mean_squared_error(y_test, y_pred),
    'MAE': mean_absolute_error(y_test, y_pred),
    'RMSE': root_mean_squared_error(y_test, y_pred),
    'R2 Score': r2_score(y_test, y_pred)
}

ensemble_results_df = pd.DataFrame([ensemble_results])

NameError: name 'X_val' is not defined

In [None]:
# ensemble results
ensemble_results_df