# GARCH model

In [1]:
#import numpy as np
#import matplotlib.pyplot as plt
#import pandas as pd
#import itertools
#from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

In [2]:
from sklearn.preprocessing import MinMaxScaler
from arch.univariate import arch_model

In [3]:
#def format_predictions(predictions, values):
#    vals = np.concatenate(values, axis=0).ravel()
#    preds = np.concatenate(predictions, axis=0).ravel()
#    df_result = pd.DataFrame(data={"value": vals, "prediction": preds})
#    df_result = df_result.sort_index()
#    return df_result

#def calculate_metrics(df):
#    return {'rmse' : mean_squared_error(df.value, df.prediction) ** 0.5,
#            'mae' : mean_absolute_error(df.value, df.prediction),
#            'mape': mean_absolute_percentage_error(df.value, df.prediction),
#            'r2' : r2_score(df.value, df.prediction)}

In [4]:
def train_loop_garch(path,num_sim,str_sims):
    
    df_sim = pd.DataFrame()
    
    # loop over all datasets in the simulation setup
    for j in range(len(str_sims)):
        
        # print dataset
        print('No. of dataset: ', j+1,'/',len(str_sims))

        # load data
        data_name = 'sim'+str(num_sim)+'_'+str_sims[j]
        with np.load(path+'Temp/'+data_name+'.npz') as data:
            y_train = data['y_train']
            y_val = data['y_val']
            y_test = data['y_test']

        # concatenate series into a single array
        test_size = y_test.shape[0]
        y = np.concatenate((y_train,y_val,y_test),axis=0)

        # rolling/expanding window forecast
        i=0
        pred = []
        while i < test_size:
            # rolling window
            #model = auto_arima(y[-(w_size+test_size-i):-(test_size-i)], start_p=1, start_q=1,
            #                  max_p=5, max_q=5,            
            #                  d=None,          
            #                  seasonal=False,   
            #                  information_criterion='bic',
            #                  test='adf',
            #                  trace=False,  
            #                  suppress_warnings=True, 
            #                  stepwise=True)
            # expanding window
            if i == 0:
                bic = []
                param_list = list(itertools.product([1,2,3,4,5], [1,2,3,4,5]))
                for j in range(len(param_list)):
                    garch = arch_model(y[:-test_size], mean='Constant', vol='Garch', p=param_list[j][0], q=param_list[j][1], dist='Normal',rescale=False)
                    model = garch.fit(update_freq=5)
                    bic.append(model.bic)
                bic_min_ind = np.argmin(bic)
            else:
                garch = arch_model(y[:-(test_size-i)], mean='Constant', vol='Garch', p=param_list[bic_min_ind][0], q=param_list[bic_min_ind][1], dist='Normal')
                model = garch.fit()
            pred.append(model.forecast(horizon=1, reindex=True).mean['h.1'].iloc[-1]+np.sqrt(model.forecast(horizon=1, reindex=True).variance['h.1'].iloc[-1]))
            i = i+1

        # evaluate on test set
        df_result = format_predictions(np.expand_dims(np.array(pred),axis=1), y_test)
        result_metrics = calculate_metrics(df_result)
        #print(result_metrics)

        # plot results
        plt.figure(figsize=(15,5))
        plt.plot(df_result.value, label="True")
        plt.plot(df_result.prediction, label="Prediction")
        plt.legend()
        plt.title("Test Set")
        plt.show()

        # append metrics on test set
        df_metrics = pd.DataFrame(np.expand_dims((result_metrics['rmse'],result_metrics['mae'],result_metrics['mape'],result_metrics['r2'],),axis=0),columns=['rmse','mae','mape','r2'])
        df_sim = pd.concat([df_sim,df_metrics],axis=0, ignore_index=True)

    return df_sim

In [7]:
#path = 'C:/Users/Meier/Dropbox (Institut für Statistik)/Structural Breaks + DL/Simulation/Python Code/' 

In [8]:
#path= 'C:/Users/Johan/Dropbox (Institut für Statistik)/Structural Breaks + DL/Simulation/Python Code/'

In [9]:
#str_sims = ['Ref','Tau0.1Ref','Tau0.1Beta0.5','Tau0.1Beta1.0','Tau0.1Beta2.0','Tau0.2Ref','Tau0.2Beta0.5','Tau0.2Beta1.0','Tau0.2Beta2.0','Tau0.3Ref','Tau0.3Beta0.5','Tau0.3Beta1.0','Tau0.3Beta2.0','Tau0.4Ref','Tau0.4Beta0.5','Tau0.4Beta1.0','Tau0.4Beta2.0','Tau0.5Ref','Tau0.5Beta0.5','Tau0.5Beta1.0','Tau0.5Beta2.0','Tau0.6Ref','Tau0.6Beta0.5','Tau0.6Beta1.0','Tau0.6Beta2.0','Tau0.7Ref','Tau0.7Beta0.5','Tau0.7Beta1.0','Tau0.7Beta2.0'] 

In [10]:
#num_sim = 1

In [None]:
#df_sim = train_loop_garch(path=path,num_sim=num_sim,str_sims=str_sims)

In [13]:
#df_sim

Unnamed: 0,rmse,mae,mape,r2
0,257.556215,257.049398,1.739022,-1013.884241
1,252.047633,251.534881,1.701583,-970.936039
2,254.065677,253.551051,1.715239,-986.56215
3,253.652104,253.138056,1.712441,-983.349625
4,253.310103,252.792535,1.710081,-980.696998
5,249.898896,249.376025,1.686896,-954.434934
6,250.745138,250.225267,1.692669,-960.916736
7,250.322877,249.802546,1.689802,-957.679684
8,250.110942,249.590576,1.688365,-956.057046
9,235.428986,234.871368,1.588414,-846.993076
