In [33]:
import json
from IPython.display import clear_output
import time
import warnings
import os
import glob
from sklearn.svm import SVR
from sklearn import preprocessing
from sklearn.model_selection import ParameterGrid
from tqdm import tnrange, tqdm_notebook
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams

import fit_predict_models as fpm
import utils as ut

warnings.filterwarnings('ignore')
%matplotlib inline
rcParams['figure.figsize'] = 15, 6

%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
def do_grid_search(linear_forecast,real,nonlinear_forecast,test_size,val_size):

    parameters = {
                  'c': [0.001, 0.01, 10, 100, 1000], 
                  'gamma': [0.1, 0.01, 0.001, 0.0001],
                  'epsilon': [0.1, 0.01, 0.001], 
                  'tol': [0.01, 0.001, 0.0001],
                  'time_window':[2,5,10,15,20]
                 }

    best_model = None
    metric = 'MSE'
    best_result = {'time_window':0,metric:None}
    result_type = ut.result_options.val_result

    list_params=list(ParameterGrid(parameters))
    
    for params in tqdm_notebook(list_params,desc='GridSearch'):
        
        result = None

        forecaster = SVR(C=params['c'], gamma=params['gamma'], 
                         epsilon=params['epsilon'], tol=params['tol'],
                         max_iter=100000)
 
        result = fpm.nolic_model(linear_forecast=linear_forecast, 
                             nonlinear_forecast=nonlinear_forecast, 
                             real=real, 
                             time_window=1, 
                             base_model=forecaster, 
                             test_size=test_size,
                             val_size=val_size,
                             title='nolic_model', 
                             result_options=result_type,
                             type_data=0)[metric]
                        

        if best_result[metric] == None:
            best_model = forecaster
            best_result[metric] = result
            best_result['time_window'] = params['time_window']
        else:

            if best_result[metric] > result:
                best_model = forecaster
                best_result[metric] = result
                best_result['time_window'] = params['time_window']

    result_model = {'best_result': best_result, 'model': best_model}
    return result_model

In [38]:
def run_all_bases(base_model, error_model, data):
    for i in data:
        print(i['name'])
        test_size = i['test_size']
        val_size = i['val_size']
        type_data = i['type_data']
        
        model_path = str(type_data)+'-zhang_'+base_model+'_'+error_model
        model_path = './models_pkl/'+model_path
        
        model_execs = glob.glob(model_path+'/*')
        result, _, hybrid_arima = ut.do_computations(i, model_execs, 
                                             'MSE', 'test_metrics')
        print(result)
        base_actual = pd.read_excel(open(i['path_data']
                                      , 'rb'), sheet_name='Planilha1')
        
        real = base_actual['Target'].values

        predicted = base_actual[base_model].values

        error_forecaster_tw = len(predicted) - len(hybrid_arima['predicted_values'])

        linear_forecast = predicted[error_forecaster_tw:]

        nonlinear_forecast = hybrid_arima['predicted_values']
        real = real[error_forecaster_tw:]
        data_title = 'nolic_'+base_model+'_'+error_model+'_SVR'

        gs_result = do_grid_search(linear_forecast=linear_forecast,
                                   real=real,
                                   nonlinear_forecast=nonlinear_forecast,
                                   test_size=test_size,
                                   val_size=val_size
                                  )

        print(gs_result)
        os.mkdir(f'./models_pkl/{type_data}-{data_title}')
        title_temp = f'{type_data}-{data_title}/{type_data}-{data_title}'  

        fpm.nolic_model(linear_forecast=linear_forecast, 
                        nonlinear_forecast=nonlinear_forecast, 
                        real=real, 
                        time_window=gs_result['best_result']['time_window'], 
                        base_model=gs_result['model'], 
                        test_size=test_size,
                        val_size=val_size,
                        title=f'./models_pkl/{title_temp}', 
                        result_options=ut.result_options.save_result,
                        type_data=type_data)


In [39]:
with open('./models_configuration_60_20_20.json') as f:
    data = json.load(f)

In [40]:
run_all_bases('ARIMA', 'SVR', data)

fortaleza
        MSE       MAPE       ARV     theil       MAE
0  0.132765  10.026068  0.351027  0.949472  0.283099


HBox(children=(IntProgress(value=0, description='GridSearch', max=900, style=ProgressStyle(description_width='…


{'best_result': {'time_window': 2, 'MSE': 0.108366210343824}, 'model': SVR(C=1000, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.001,
    kernel='rbf', max_iter=100000, shrinking=True, tol=0.0001, verbose=False)}


TypeError: nolic_model() got an unexpected keyword argument 'normalize'

In [None]:
run_all_bases('ARIMAX', 'SVR', data)

In [None]:
run_all_bases('ARIMA', 'MLP', data)

In [None]:
run_all_bases('ARIMAX', 'MLP', data)