# Baseline models

In [2]:
from statsmodels.tsa.ar_model import AutoReg, ar_select_order
from pmdarima.arima import auto_arima
from arch.univariate import arch_model

### AR model

In [31]:
def train_loop_ar(path,num_sim,str_sims,plot_res=False):
    
    df_sim = pd.DataFrame()
    
    # loop over all datasets in the simulation setup
    for j in range(len(str_sims)):

        # print dataset
        print('Dataset: ', str_sims[j], '(',j+1,'/',len(str_sims),')')

        # load data
        data_name = 'sim'+str(num_sim)+'_'+str_sims[j]
        with np.load(path+'Temp/'+data_name+'.npz') as data:
            y_train = data['y_train']
            #y_val = data['y_val']
            y_test = data['y_test']

        # concatenate series into a single array
        test_size = y_test.shape[0]
        #y = np.concatenate((y_train,y_val,y_test),axis=0)
        y = np.concatenate((y_train,y_test),axis=0)

        # rolling/expanding window forecast
        i=0
        pred = []
        while i < test_size:
            if i == 0:
                #sel = ar_select_order(y[-(w_size+test_size-i):-(test_size-i)], maxlag=10, ic='bic') # rolling window
                sel = ar_select_order(y[:-(test_size-i)], maxlag=10, ic='bic') # expanding window
                ar_lags = sel.ar_lags
                res = sel.model.fit()
            else:
                res = AutoReg(y[:-(test_size-i)], lags = ar_lags).fit()
            #print(res.summary())
            pred.append(res.predict(start=test_size+i,end=test_size+i))
            i = i+1

        # evaluate on test set
        df_result = format_predictions(pred, y_test)
        result_metrics = calculate_metrics(df_result)
        #print(result_metrics)

        # plot results
        if plot_res:
            plt.figure(figsize=(15,5))
            plt.plot(df_result.value, label="True")
            plt.plot(df_result.prediction, label="Prediction")
            plt.legend()
            plt.title("Test Set")
            plt.show()

        # append metrics on test set
        df_metrics = pd.DataFrame(np.expand_dims((result_metrics['rmse'],result_metrics['mae'],result_metrics['mape'],result_metrics['r2'],),axis=0),columns=['rmse','mae','mape','r2'])
        df_sim = pd.concat([df_sim,df_metrics],axis=0, ignore_index=True)

    # return results
    return df_sim

### ARIMA model

In [None]:
def train_loop_arima(path,num_sim,str_sims):
    
    df_sim = pd.DataFrame()
    
    # loop over all datasets in the simulation setup
    for j in range(len(str_sims)):
        
        # print dataset
        print('Dataset: ', str_sims[j], '(',j+1,'/',len(str_sims),')')

        # load data
        data_name = 'sim'+str(num_sim)+'_'+str_sims[j]
        with np.load(path+'Temp/'+data_name+'.npz') as data:
            y_train = data['y_train']
            #y_val = data['y_val']
            y_test = data['y_test']

        # concatenate series into a single array
        test_size = y_test.shape[0]
        #y = np.concatenate((y_train,y_val,y_test),axis=0)
        y = np.concatenate((y_train,y_test),axis=0)

        # rolling/expanding window forecast
        i=0
        pred = []
        while i < test_size:
            # rolling window
            #model = auto_arima(y[-(w_size+test_size-i):-(test_size-i)], start_p=1, start_q=1,
            #                  max_p=5, max_q=5,            
            #                  d=None,          
            #                  seasonal=False,   
            #                  information_criterion='bic',
            #                  test='adf',
            #                  trace=False,  
            #                  suppress_warnings=True, 
            #                  stepwise=True)
            # expanding window
            if i == 0:
                model = auto_arima(y[:-test_size], start_p=1, start_q=1,
                      max_p=5, max_q=5,            
                      d=None,          
                      seasonal=False,   
                      information_criterion='bic',
                      test='adf',
                      trace=False,  
                      suppress_warnings=True, 
                      stepwise=True)
            else:
                model.update(np.reshape(y[-(test_size-i+1):-(test_size-i)],(1,)))
                #print(sel.ar_lags)
                #res = sel.model.fit()
                #print(res.summary())
            pred.append(model.predict(n_periods=1))
            i = i+1

        # evaluate on test set
        df_result = format_predictions(pred, y_test)
        result_metrics = calculate_metrics(df_result)
        #print(result_metrics)

        # plot results
        #plt.figure(figsize=(15,5))
        #plt.plot(df_result.value, label="True")
        #plt.plot(df_result.prediction, label="Prediction")
        #plt.legend()
        #plt.title("Test Set")
        #plt.show()

        # append metrics on test set
        df_metrics = pd.DataFrame(np.expand_dims((result_metrics['rmse'],result_metrics['mae'],result_metrics['mape'],result_metrics['r2'],),axis=0),columns=['rmse','mae','mape','r2'])
        df_sim = pd.concat([df_sim,df_metrics],axis=0, ignore_index=True)

    return df_sim

### GARCH model

In [None]:
def train_loop_garch(path,num_sim,str_sims):
    
    df_sim = pd.DataFrame()
    
    # loop over all datasets in the simulation setup
    for j in range(len(str_sims)):
        
        # print dataset
        print('Dataset: ', str_sims[j], '(',j+1,'/',len(str_sims),')')

        # load data
        data_name = 'sim'+str(num_sim)+'_'+str_sims[j]
        with np.load(path+'Temp/'+data_name+'.npz') as data:
            y_train = data['y_train']
            #y_val = data['y_val']
            y_test = data['y_test']

        # concatenate series into a single array
        test_size = y_test.shape[0]
        #y = np.concatenate((y_train,y_val,y_test),axis=0)
        y = np.concatenate((y_train,y_test),axis=0)

        # rolling/expanding window forecast
        i=0
        pred = []
        while i < test_size:
            # rolling window
            #model = auto_arima(y[-(w_size+test_size-i):-(test_size-i)], start_p=1, start_q=1,
            #                  max_p=5, max_q=5,            
            #                  d=None,          
            #                  seasonal=False,   
            #                  information_criterion='bic',
            #                  test='adf',
            #                  trace=False,  
            #                  suppress_warnings=True, 
            #                  stepwise=True)
            # expanding window
            if i == 0:
                bic = []
                param_list = list(itertools.product([1,2,3,4,5], [1,2,3,4,5]))
                for j in range(len(param_list)):
                    garch = arch_model(y[:-test_size], mean='Constant', vol='Garch', p=param_list[j][0], q=param_list[j][1], dist='Normal',rescale=False)
                    model = garch.fit(disp="off")
                    bic.append(model.bic)
                bic_min_ind = np.argmin(bic)
            else:
                garch = arch_model(y[:-(test_size-i)], mean='Constant', vol='Garch', p=param_list[bic_min_ind][0], q=param_list[bic_min_ind][1], dist='Normal')
                model = garch.fit(disp="off")
            pred.append(model.forecast(horizon=1, reindex=True).mean['h.1'].iloc[-1]+np.sqrt(model.forecast(horizon=1, reindex=True).variance['h.1'].iloc[-1]))
            i = i+1

        # evaluate on test set
        df_result = format_predictions(np.expand_dims(np.array(pred),axis=1), y_test)
        result_metrics = calculate_metrics(df_result)
        #print(result_metrics)

        # plot results
        #plt.figure(figsize=(15,5))
        #plt.plot(df_result.value, label="True")
        #plt.plot(df_result.prediction, label="Prediction")
        #plt.legend()
        #plt.title("Test Set")
        #plt.show()

        # append metrics on test set
        df_metrics = pd.DataFrame(np.expand_dims((result_metrics['rmse'],result_metrics['mae'],result_metrics['mape'],result_metrics['r2'],),axis=0),columns=['rmse','mae','mape','r2'])
        df_sim = pd.concat([df_sim,df_metrics],axis=0, ignore_index=True)

    return df_sim