In [7]:
import json
from IPython.display import clear_output
import time
import warnings
import os

from sklearn.svm import SVR
from sklearn import preprocessing
from sklearn.model_selection import ParameterGrid
from tqdm import tnrange, tqdm_notebook
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams

import fit_predict_models as fpm
import utils as ut

warnings.filterwarnings('ignore')
%matplotlib inline
rcParams['figure.figsize'] = 15, 6

%load_ext autoreload

%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
def do_grid_search(predicted,real,test_size,val_size):

    parameters = {
                  'c': [0.001, 0.01, 10, 100, 1000], 
                  'gamma': [0.1, 0.01, 0.001, 0.0001],
                  'epsilon': [0.1, 0.01, 0.001], 
                  'tol': [0.01, 0.001, 0.0001],
                  'time_window':[2,5,10,15,20]
                 }

    best_model = None
    metric = 'MSE'
    best_result = {'time_window':0,metric:None}
    result_type = ut.result_options.val_result

    list_params=list(ParameterGrid(parameters))
    
    for params in tqdm_notebook(list_params,desc='GridSearch'):
        
        result = None

        forecaster = SVR(C=params['c'], gamma=params['gamma'], 
                         epsilon=params['epsilon'], tol=params['tol'], max_iter=100000)
 
        result = fpm.additive_hybrid_model(predicted,real,
                                           time_window=params['time_window'],
                                           base_model=forecaster,
                                           test_size=test_size,
                                           val_size=val_size,
                                           result_options=result_type,
                                           title='None',
                                           type_data=0)[metric]

        if best_result[metric] == None:
            best_model = forecaster
            best_result[metric] = result
            best_result['time_window'] = params['time_window']
        else:

            if best_result[metric] > result:
                best_model = forecaster
                best_result[metric] = result
                best_result['time_window'] = params['time_window']

    result_model = {'best_result': best_result, 'model': best_model}
    return result_model

In [9]:
def run_all_bases(base_model, data):
    for i in data:
        clear_output()
        base_actual = pd.read_excel(open(i['path_data']
                              , 'rb'), sheet_name='Planilha1')
        
        real = base_actual['Target'].values

        predicted = base_actual[base_model].values

        data_title = 'zhang_'+str(base_model)+'_SVR'

        print(i['name'])
        test_size=i['test_size']
        val_size= i['val_size']
        type_data = i['type_data']
        gs_result = do_grid_search(predicted=predicted,real=real,
                                   test_size=test_size,val_size=val_size)
        print(gs_result)
        os.mkdir(f'./models_pkl/{type_data}-{data_title}')
        title_temp = f'{type_data}-{data_title}/{type_data}-{data_title}'

        fpm.additive_hybrid_model(predicted=predicted,real=real,
                                 time_window=gs_result['best_result']['time_window'],
                                 base_model=gs_result['model'],test_size=test_size,
                                 val_size=val_size, result_options=ut.result_options.save_result,
                                 title=f'./models_pkl/{title_temp}', type_data=type_data)


In [10]:
with open('./models_configuration_60_20_20.json') as f:
    data = json.load(f)
    
run_all_bases('ARIMA', data)

recife


HBox(children=(IntProgress(value=0, description='GridSearch', max=900, style=ProgressStyle(description_width='…


{'best_result': {'time_window': 15, 'MSE': 0.03705198544054392}, 'model': SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.01,
    kernel='rbf', max_iter=100000, shrinking=True, tol=0.01, verbose=False)}


In [None]:
run_all_bases('ARIMAX', data)

fortaleza


HBox(children=(IntProgress(value=0, description='GridSearch', max=900, style=ProgressStyle(description_width='…