```
     _                                     ____             _       _   ____                                    
    / \   _ __ ___   __ _ _______  _ __   / ___|  ___   ___(_) __ _| | |  _ \ _ __ ___   __ _ _ __ ___  ___ ___ 
   / _ \ | '_ ` _ \ / _` |_  / _ \| '_ \  \___ \ / _ \ / __| |/ _` | | | |_) | '__/ _ \ / _` | '__/ _ \/ __/ __|
  / ___ \| | | | | | (_| |/ / (_) | | | |  ___) | (_) | (__| | (_| | | |  __/| | | (_) | (_| | | |  __/\__ \__ \
 /_/   \_\_| |_| |_|\__,_/___\___/|_| |_| |____/ \___/ \___|_|\__,_|_| |_|   |_|  \___/ \__, |_|  \___||___/___/
                                                                                        |___/                   
```

### Module
__VotingRegressor__ Combine conceptually different machine learning regressors and return the average predicted values

### Goal
Investigating the relationship between independent variables or features and a dependent variable or outcome.

### Tools
1. Pandas
2. scikit-learn
3. GradientBoostingRegressor
4. ExtraTreesRegressor
5. VotingRegressor

### Requirement
1. File Definition
2. Data Preparation
3. hotspot_spi.csv generated
 
### Data Source
__${WORKDIR}__/data/ouptut/hotspot_spi.csv

In [None]:
import os
import sys

supervised_dir = os.path.normpath(os.getcwd() + os.sep + os.pardir)
sys.path.append(supervised_dir)
sys.path

In [None]:
import pandas as pd
import numpy as np

import functions_regression as freg
from  load_dataset import LoadDataset, SpiType

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import VotingRegressor

from sklearn.ensemble import RandomForestRegressor

from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split

## Get the data

In [None]:
load_dataset = LoadDataset()
X, y = load_dataset.return_X_y_regr(spi_type = SpiType.INDICATORS)

columns_names = X.columns

X = scale(X)
y = scale(y)

In [None]:
print("X.shape:", X.shape, "y.shape:", y.shape)

### Split dataset into train and test sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

print("X_train.shape:", X_train.shape, "y_train.shape:", y_train.shape)
print("X_test.shape:", X_test.shape, "y_test.shape:", y_test.shape)

## Modeling

### Building, train and predict model

In [None]:
gbr = GradientBoostingRegressor(random_state=1)
etr = ExtraTreesRegressor(random_state=1)
rfr = RandomForestRegressor()

regressor = VotingRegressor(estimators = [
    ("gbr", gbr), 
    ("etr", etr), 
    ("rfr", rfr)
])

regressor = regressor.fit(X, y)
y_pred = regressor.predict(X_test)

### Getting Best Hyperparameter Optimization

*Note: The execution of the code below may take a few minutes or hours.*

*Uncomment and run it when you need to optimize hyperparameters.*

In [None]:
# from sklearn.model_selection import (GridSearchCV)
# import warnings

# warnings.filterwarnings('ignore')

# space = dict()
# space['loss'] = ['squared_error', 'absolute_error', 'huber', 'quantile']
# space['criterion'] = ['friedman_mse', 'squared_error', 'mse']
# space['learning_rate'] = [n for n in np.arange(0.01, 1.0, 0.01)]
# space['min_samples_split'] = [n for n in range(5)]
# space['max_depth'] = [n for n in range(10)]
# space['n_estimators'] = [n for n in range(500)]

# gridsearch = GridSearchCV(regressor, param_grid = space, scoring='accuracy', cv=2)
# gridsearch.fit(X_train, y_train)

# print("Tuned Hyperparameters :", clf.best_params_)
# print("Accuracy :",clf.best_score_)

### Model Evaluation

In [None]:
freg.evaluate_model(regressor, X, y, X_train, y_train, X_test, y_test, y_pred)

#### Plot Error Iterations

In [None]:
params = {
    "model": regressor,
    "n_estimators": params["n_estimators"],
    "X_test": X_test,
    "y_test": y_test, 
    "y_pred": y_pred
}

freg.plot_training_deviance(**params)

In [None]:
params = {
    "model": regressor,
    "dataset": pd.DataFrame(X, columns=columns_names),
    "X_test": X_test,
    "y_test": y_test, 
}

freg.plot_feature_importance(**params)

In [None]:
# from sklearn.metrics import mean_squared_error
# for x in range(150, 1200, 50):
#     _ = regressor.set_params(n_estimators=x, warm_start=True)
#     _ = regressor.fit(X_train, y_train)
#     xb = mean_squared_error(y_test, regressor.predict(X_test))
#     print("x={}, {:.4f}".format(x, xb))