# AR model

In [2]:
from statsmodels.tsa.ar_model import AutoReg, ar_select_order

In [31]:
def train_loop_ar(path,num_sim,str_sims,plot_res=False):
    
    df_sim = pd.DataFrame()
    
    # loop over all datasets in the simulation setup
    for j in range(len(str_sims)):

        # print dataset
        #print('No. of dataset: ', j+1,'/',len(str_sims))
        print('Dataset: ', str_sims[j], '(',j+1,'/',len(str_sims),')')

        # load data
        data_name = 'sim'+str(num_sim)+'_'+str_sims[j]
        with np.load(path+'Temp/'+data_name+'.npz') as data:
            y_train = data['y_train']
            y_val = data['y_val']
            y_test = data['y_test']

        # concatenate series into a single array
        test_size = y_test.shape[0]
        y = np.concatenate((y_train,y_val,y_test),axis=0)

        # rolling/expanding window forecast
        i=0
        pred = []
        while i < test_size:
            if i == 0:
                #sel = ar_select_order(y[-(w_size+test_size-i):-(test_size-i)], maxlag=10, ic='bic') # rolling window
                sel = ar_select_order(y[:-(test_size-i)], maxlag=10, ic='bic') # expanding window
                ar_lags = sel.ar_lags
                res = sel.model.fit()
            else:
                res = AutoReg(y[:-(test_size-i)], lags = ar_lags).fit()
            #print(res.summary())
            pred.append(res.predict(start=test_size+i,end=test_size+i))
            i = i+1

        # evaluate on test set
        df_result = format_predictions(pred, y_test)
        result_metrics = calculate_metrics(df_result)
        #print(result_metrics)

        # plot results
        if plot_res:
            plt.figure(figsize=(15,5))
            plt.plot(df_result.value, label="True")
            plt.plot(df_result.prediction, label="Prediction")
            plt.legend()
            plt.title("Test Set")
            plt.show()

        # append metrics on test set
        df_metrics = pd.DataFrame(np.expand_dims((result_metrics['rmse'],result_metrics['mae'],result_metrics['mape'],result_metrics['r2'],),axis=0),columns=['rmse','mae','mape','r2'])
        df_sim = pd.concat([df_sim,df_metrics],axis=0, ignore_index=True)

    # return results
    return df_sim