# Main

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import itertools
import time

In [2]:
#path = 'C:/Users/Meier/Institut für Statistik Dropbox/Johanna Meier/Structural Breaks + DL/Simulation/Python Code/' 
path = 'C:/Users/root/Desktop/Sim1/'

In [3]:
str_model = 'RNN' # 'RNN', 'LSTM', 'GRU', 'AR', 'ARMA'
setting = 'ARMA' # 'AR', 'ARMA'

In [4]:
reps = 2         # number of repetitions

In [5]:
sim_length = 1000                              # length of simulated sample
tau = np.array([0.2,0.5,0.8])  # break locations
fac_beta = np.array([0.5,1,2])                 # break size factor

In [6]:
test_size = 0.1           # proportion of test set
train_size = 0.85         # proportion of training set
lags = 1                  # number of lags as features

In [7]:
if setting == 'AR':
    params = np.array([0.1,0.4,0.7,-0.4]) # AR (phi)
    params_list = params
if setting == 'ARMA':
    params = [np.array([0.4,-0.4]),np.array([0.3,-0.3])] # ARMA (phi, theta)
    params_list = list(itertools.product(params[0],params[1]))
    print(params_list)

[(0.4, 0.3), (0.4, -0.3), (-0.4, 0.3), (-0.4, -0.3)]


Run external notebooks.

In [8]:
%run "Helper_functions.ipynb" # notebook containing helper functions
%run "Simulate_data.ipynb" # notebook containing simulation functions
%run "DL_models.ipynb" # notebook containing sequential deep learning models
%run "Baseline_models.ipynb" # notebook containing AR, ARIMA and GARCH models

Get names of simulation settings.

In [9]:
str_sims = get_str_sims(tau, fac_beta) # get names of simulation settings

Run simulation.

In [None]:
# start timer
timer_start = time.time()
print('Simulation start: %s' %time.ctime(int(timer_start)))

# delete all files in Temp folder
emtpy_temp(path+'Temp/')

# run specified number of repetitions
for i in range(reps):
    
    # print repetition
    print('Repetition: ',i+1)
    
    # seed
    np.random.seed(i)
    torch.manual_seed(i)
    
    # delete all simulation files in Temp folder
    del_sim(path+'Temp/')
    
    # simulate data for given setting and parameters(save csv-files in Temp)
    sim_data(setting=setting, params=params, tau=tau, fac_beta=fac_beta, test_size=test_size, l=sim_length, path=path+'Temp/', lags=1, verbose=False)
    
    # split all data into train, val, and test (save nzp-files in Temp)
    split_data_val(setting=setting, params=params, path=path+'Temp/', train_size=train_size)
    
    # check simulated data
    #sim = pd.read_csv(path+'Temp/sim1.csv' ,sep=',',na_values = 'NA')
    #data1 = np.load(path+'Temp/sim2_%s.npz' %sim.columns[0])
    #data2 = np.load(path+'Temp/sim2_%s.npz' %sim.columns[1])
    #plt.plot(data1['y_test'])
    #plt.plot(data2['y_test'])
    #plt.show()
    #del data1,data2
             
    # DL forecast
    # if DL: get combination forecasts
    if str_model == 'RNN' or str_model =='LSTM' or str_model =='GRU':
        
        # set parameters
        batch_size = 256                   # batch size
        input_dim = lags                  # number of lagged features in X
        hidden_dim = 10                   # number of hidden nodes per layer
        layer_dim = 1                     # number of layers
        output_dim = 1                    # output dimension (1 for univariate output)
        dropout = 0                       # dropout proportion (only before the last sequential layer)
        learning_rate = 1e-3              # learning rate for Adam optimizer
        weight_decay = 1e-6               # weight decay for Adam optimizer

        # save model parameters in dict
        model_params = {'input_dim': input_dim, 'hidden_dim' : hidden_dim,'layer_dim' : layer_dim, 'output_dim' : output_dim, 'dropout_prob' : dropout}
        
        # train model
        results = []
        for j in range(len(params_list)):
            print('Parameter combination: ', j+1,'/',len(params_list))
            df_sim = train_loop(model_name=str_model,model_params=model_params,num_sim=j+1,str_sims=str_sims,path=path,batch_size=batch_size, learning_rate=learning_rate, weight_decay=weight_decay)
            results.append(df_sim)
        
    
    # forecast AR model
    if str_model == 'AR':
        
        results = []
        for j in range(len(params)):
            print('Parameter combination: ', j+1,'/',len(params))
            df_sim = train_loop_ar(path=path,num_sim=j+1,str_sims=str_sims,plot_res=False)
            results.append(df_sim)
    
    # forecast ARMA model
    if str_model == 'ARMA':
        
        params_list = list(itertools.product(params[0],params[1]))
        
        results = []
        for j in range(len(params_list)):
            print('Parameter combination: ', j+1,'/',len(params_list))
            df_sim = train_loop_arma(path=path,num_sim=j+1,str_sims=str_sims)
            results.append(df_sim)
    
    # forecast GARCH model
    #if str_model == 'GARCH':
        
    #    results = []
    #    for j in range(len(params)):
    #        print('Parameter combination: ', j+1,'/',len(params))
    #        df_sim = train_loop_garch(path=path,num_sim=j+1,str_sims=str_sims)
    #        results.append(df_sim)
        
    
    # save intermediate results
    new_results = np.expand_dims(np.asarray(results),axis=1)
    if i==0:
        np.save(path+'Temp/interm_results.npy',new_results)
    else:
        prev_results = np.load(path+'Temp/interm_results.npy')
        all_results = np.concatenate((prev_results,new_results),axis=1) # shape: no. of params x rep x no. of settings x no. of metrics
        np.save(path+'Temp/interm_results.npy',all_results)
        del prev_results
    
    # calculate metrics
    arr_results = np.load(path+'Temp/interm_results.npy')
    arr_mean, arr_std, arr_min, arr_max, arr_median = get_results(arr_results) # aggregate over reps (axis 1)  
    np.savez(path+'Results/'+setting+'_'+str_model+'_results.npz',mean=arr_mean,std=arr_std,minimum=arr_min,maximum=arr_max,median=arr_median)
    del arr_results
    
    if (i < 10) | (i % 50 == 0):
        print('Elapsed: %s' %time_format(time.time() - timer_start))

print('Simulation end: %s' %time.ctime(int(time.time())))

Simulation start: Mon Jun  5 14:20:24 2023
Repetition:  1
Parameter combination:  1 / 4
Dataset:  Ref ( 1 / 13 )
Dataset:  Tau0.2Ref ( 2 / 13 )
Dataset:  Tau0.2Beta0.5 ( 3 / 13 )
Dataset:  Tau0.2Beta1.0 ( 4 / 13 )
Dataset:  Tau0.2Beta2.0 ( 5 / 13 )
Dataset:  Tau0.5Ref ( 6 / 13 )
Dataset:  Tau0.5Beta0.5 ( 7 / 13 )
Dataset:  Tau0.5Beta1.0 ( 8 / 13 )
Dataset:  Tau0.5Beta2.0 ( 9 / 13 )
Dataset:  Tau0.8Ref ( 10 / 13 )
Dataset:  Tau0.8Beta0.5 ( 11 / 13 )
Dataset:  Tau0.8Beta1.0 ( 12 / 13 )
Dataset:  Tau0.8Beta2.0 ( 13 / 13 )
Parameter combination:  2 / 4
Dataset:  Ref ( 1 / 13 )
Dataset:  Tau0.2Ref ( 2 / 13 )
Dataset:  Tau0.2Beta0.5 ( 3 / 13 )
Dataset:  Tau0.2Beta1.0 ( 4 / 13 )
Dataset:  Tau0.2Beta2.0 ( 5 / 13 )
