# Emsemble methods to predict $ET_0$
- Random Forest
- Gradient Boosting

Libs required

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import warnings
from sklearn import metrics, ensemble
from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import StandardScaler, Normalizer

warnings.filterwarnings("ignore")

In [2]:
X_treino = pd.read_csv('X_treino.csv', index_col=['data'], parse_dates=['data'])
y_treino = pd.read_csv('y_treino.csv', index_col=['data'], parse_dates=['data'])
X_teste = pd.read_csv('X_teste.csv', index_col=['data'], parse_dates=['data'])
y_teste = pd.read_csv('y_teste.csv', index_col=['data'], parse_dates=['data'])

Grid Search for Random Forest

In [4]:
model_rf = ensemble.RandomForestRegressor(random_state=42)
n_estimators = [20, 50, 100, 150, 200, 250]
max_depth = [3, 4, 5, 6, 7, 8, 9, 10]

param_grid = {'n_estimators': n_estimators, 
              'max_depth': max_depth}

gs = GridSearchCV(estimator = model_rf, 
                  param_grid = param_grid, 
                  scoring = 'neg_mean_squared_error', 
                  cv=10)

gs = gs.fit(X_treino, y_treino)
print(gs.best_params_)

{'max_depth': 10, 'n_estimators': 150}


Grid Search for Gradient Boosting

In [7]:
model_gb = ensemble.GradientBoostingRegressor(random_state=42)
n_estimators = [20, 50, 100, 150, 200, 250]
max_depth = [3, 4, 5, 6, 7, 8, 9, 10]
learning_rate = [ 0.1, 0.05, 0.01, 0.001]

param_grid = {'n_estimators': n_estimators, 
              'max_depth': max_depth, 
              'learning_rate': learning_rate}

gs = GridSearchCV(estimator = model_gb, 
                  param_grid = param_grid, 
                  scoring = 'neg_mean_squared_error', 
                  cv = 10)
gs = gs.fit(X_treino, y_treino)

print(gs.best_params_)

{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 250}


Prediction Models

In [9]:
models = {}
models['Random_Forest'] = {'model': ensemble.RandomForestRegressor(max_depth=10, 
                                                        n_estimators=150,
                                                        random_state=42)}
models['Gradient_Boosting'] = {'model': ensemble.GradientBoostingRegressor(learning_rate=0.1, 
                                                            max_depth=3, 
                                                            n_estimators=250, 
                                                            random_state=42)}

In [10]:
for key in models:
    print('processando {}...'.format(key))
    models[key]['model'].fit(X_treino, y_treino)
    y_pred = models[key]['model'].predict(X_teste)
    models[key]['rmse'] = np.sqrt(metrics.mean_squared_error(y_teste, y_pred))
    models[key]['mae'] = np.sqrt(metrics.mean_absolute_error(y_teste, y_pred))

processando Random_Forest...
processando Gradient_Boosting...


In [11]:
df = pd.DataFrame(models)
df_T = df.transpose()
df_T[['rmse', 'mae']]

Unnamed: 0,rmse,mae
Random_Forest,0.00788666,0.0795553
Gradient_Boosting,0.00838521,0.082527
