In [18]:
import json
from IPython.display import clear_output
import time
import warnings
import os

from sklearn.neural_network import MLPRegressor
from sklearn import preprocessing
from sklearn.model_selection import ParameterGrid
from tqdm import tnrange, tqdm_notebook
import pandas as pd
import numpy as np


import fit_predict_models as fpm
import utils as ut

warnings.filterwarnings('ignore')

%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
def do_grid_search(predicted,real,test_size,val_size):

    parameters = {'hidden_layer_sizes': [2,5,10,15,20,50], 
                  'max_iter': [1000],
                  'learning_rate_init': [0.1,0.9,0.001,0.0001],
                  'time_window': [2,5,10,15,20]
                 }

    best_model = None
    metric = 'MSE'
    best_result = {'time_window':0,metric:None}
    result_type = ut.result_options.val_result

    list_params=list(ParameterGrid(parameters))
    
    for params in tqdm_notebook(list_params,desc='GridSearch'):
        
        result = None

        forecaster =MLPRegressor(activation='logistic', alpha=0.0001, 
                                batch_size='auto', beta_1=0.9,
                                beta_2=0.999, early_stopping=False, epsilon=1e-08,
                                hidden_layer_sizes=(params['hidden_layer_sizes'],), 
                                learning_rate='constant',
                                learning_rate_init=params['learning_rate_init'], 
                                max_iter=params['max_iter'], momentum=0.9,
                                nesterovs_momentum=True, power_t=0.5, 
                                random_state=None,
                                shuffle=False, solver='lbfgs', tol=0.00000001, 
                                validation_fraction=0.1,
                                verbose=False, warm_start=False)  
        result_atual = []
        for t in range(0,10):
            result_atual.append(fpm.additive_hybrid_model(predicted,real,
                                                         time_window=params['time_window'],
                                                        base_model=forecaster,
                                                        test_size=test_size,
                                                        val_size=val_size,
                                                        result_options=result_type,
                                                        title='None',
                                                        type_data=0)[metric])

        result = np.mean(np.array(result_atual))

        if best_result[metric] == None:
            best_model = forecaster
            best_result[metric] = result
            best_result['time_window'] = params['time_window']
        else:

            if best_result[metric] > result:
                best_model = forecaster
                best_result[metric] = result
                best_result['time_window'] = params['time_window']

    result_model = {'best_result': best_result, 'model': best_model}
    return result_model

In [30]:
def run_all_bases(base_model, data):
    for i in data:
        clear_output()
        base_actual = pd.read_excel(open(i['path_data']
                              , 'rb'), sheet_name='Planilha1')
        
        real = base_actual['Target'].values

        predicted = base_actual[base_model].values

        data_title = 'zhang_'+str(base_model)+'_MLP'

        print(i['name'])
        test_size=i['test_size']
        val_size= i['val_size']
        type_data = i['type_data']
        gs_result = do_grid_search(predicted=predicted,real=real,
                                   test_size=test_size,val_size=val_size)
        print(gs_result)
        os.mkdir(f'./models_pkl/{type_data}-{data_title}')
        title_temp = f'{type_data}-{data_title}/{type_data}-{data_title}'

        for i in range(0,30):
            fpm.additive_hybrid_model(predicted=predicted,real=real,
                                     time_window=gs_result['best_result']['time_window'],
                                     base_model=gs_result['model'],test_size=test_size,
                                     val_size=val_size, result_options=ut.result_options.save_result,
                                     title=f'./models_pkl/{title_temp}', type_data=type_data)
            time.sleep(1)

In [31]:
with open('./models_configuration_60_20_20.json') as f:
    data = json.load(f)
    
run_all_bases('ARIMA', data)

recife


HBox(children=(IntProgress(value=0, description='GridSearch', max=1, style=ProgressStyle(description_width='in…

{'best_result': {'time_window': 15, 'MSE': 0.7106812611954381}, 'model': MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(2,), learning_rate='constant',
             learning_rate_init=0.01, max_iter=1000, momentum=0.9,
             n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
             random_state=None, shuffle=False, solver='lbfgs', tol=1e-08,
             validation_fraction=0.1, verbose=False, warm_start=False)}


In [32]:
run_all_bases('ARIMAX', data)

recife


HBox(children=(IntProgress(value=0, description='GridSearch', max=1, style=ProgressStyle(description_width='in…

{'best_result': {'time_window': 15, 'MSE': 0.46972471473208843}, 'model': MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto', beta_1=0.9,
             beta_2=0.999, early_stopping=False, epsilon=1e-08,
             hidden_layer_sizes=(2,), learning_rate='constant',
             learning_rate_init=0.01, max_iter=1000, momentum=0.9,
             n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
             random_state=None, shuffle=False, solver='lbfgs', tol=1e-08,
             validation_fraction=0.1, verbose=False, warm_start=False)}
