# How do the DMA results vary with the parameter settings
In the following I will do a grid search over the parameters

In [5]:
import numpy as np
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path
import pickle
import pandas as pd
import itertools
import random
import plotly.express as px

from src.data import import_data
from src.data.data_class import Data
from src.models.preliminaries import Settings
from src.models.dma import DMA

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Specify Settings

In [6]:
params = Settings() # initialize Settings
# adjust settings
params.use_y = ['CPI']  # use as y
params.use_x = ['unemp',    # or employment
                'cons_private',
                'invest_private_housing',
                'GDP',
                'prod_ind', # or prod_constr
                'interest_rate_short',
                'interest_rate_long',
                'dax',
                'M1',
                'infl_exp_current_year',    # or infl_exp_next_year, infl_exp_2_year_ahead
                'trade_exp',
                'CPI_house_energy',  # or PCI_energy_ or HICP_energy
                'supply_index_global',  # or 'supply_index_eu'
                # 'cons_gov',
                'business_conf_manufacturing'
                ]  # indep vars
# params.tcodesX = [1, 1, 5, 1, 1, 1, 1, 5, 5, 1, 5, 5, 1]
params.tcodey = 1
params.first_sample_ends = '2012-12-31'
params.restricted_vars = ['intercept', 'CPI']
params.forgetting_method = 2
params.expert_opinion = 2
params.h_fore = 4
params.prior_theta = 1
params.plag = 2
params.hlag = 2

In [7]:
# path where processed data is stored
data_path = os.path.join(Path().cwd().parent, 'data', 'processed')
# get seasonally adjusted data
with open(os.path.join(data_path, 'df_sa.pkl'), 'rb') as f:
    df = pickle.load(f) # load raw data

In [8]:
# get transformations for variables
selection = pd.read_csv(os.path.join(data_path, 'selected_data.csv'))
tcodesX = []
for v in params.use_x:
    v_ind = np.where(selection['var code'] == v)[0][0]
    v_tcode = selection['trans_code'][v_ind]
    tcodesX.append(v_tcode)
    # print(f'{v} has tcode {v_tcode}')
params.tcodesX = tcodesX

In [9]:
data = Data(df, params)

## run grid search

In [10]:
alpha_range = np.arange(0.93, 0.991, 0.02)
lambdaa_range = np.arange(0.93, 0.991, 0.02)
grid = np.transpose([np.tile(alpha_range, len(lambdaa_range)), np.repeat(lambdaa_range, len(alpha_range))])
grid

array([[0.93, 0.93],
       [0.95, 0.93],
       [0.97, 0.93],
       [0.99, 0.93],
       [0.93, 0.95],
       [0.95, 0.95],
       [0.97, 0.95],
       [0.99, 0.95],
       [0.93, 0.97],
       [0.95, 0.97],
       [0.97, 0.97],
       [0.99, 0.97],
       [0.93, 0.99],
       [0.95, 0.99],
       [0.97, 0.99],
       [0.99, 0.99]])

In [11]:
stats = ['alpha', 'lambda', 'MAFE_DMA', 'MSFE_DMA', 'BIAS_DMA', 'MAFE_DMS', 'MSFE_DMS', 'BIAS_DMS']
pd_stats = pd.DataFrame(columns=stats)
for g in grid:
    a = g[0]
    l = g[1]
    print(f'run dma for alpha = {a} and lambda = {l}')
    params.alpha = a
    params.lamda = l
    dma = DMA(params, data)
    dma.run_dma()
    dma.forecast_statistics(plot_fe=False, plot_y_fe=False, print_stats=False)
    stats_temp = [a, l, dma.MAFE_DMA, dma.MSFE_DMA, dma.BIAS_DMA, dma.MAFE_DMS, dma.MSFE_DMS, dma.BIAS_DMS]
    pd_stats.loc[str(g)] = stats_temp
pd_stats

run dma for alpha = 0.93 and lambda = 0.93


MemoryError: 

## also try out for different forecast horizons

In [None]:
def grid_search_a_l(params, data):
    stats = ['alpha', 'lambda', 'MAFE_DMA', 'MSFE_DMA', 'BIAS_DMA', 'MAFE_DMS', 'MSFE_DMS', 'BIAS_DMS']
    pd_stats = pd.DataFrame(columns=stats)
    for g in grid:
        a = g[0]
        l = g[1]
        print(f'run dma for alpha = {a} and lambda = {l}')
        params.alpha = a
        params.lamda = l
        dma = DMA(params, data)
        dma.run_dma()
        dma.forecast_statistics(plot_fe=False, plot_y_fe=False)
        stats_temp = [a, l, dma.MAFE_DMA, dma.MSFE_DMA, dma.BIAS_DMA, dma.MAFE_DMS, dma.MSFE_DMS, dma.BIAS_DMS]
        pd_stats.loc[str(g)] = stats_temp
    return pd_stats

In [None]:
params.h_fore = 1
pd_stats_1 = grid_search_a_l(params, data)
params.h_fore = 4
pd_stats_1 = grid_search_a_l(params, data)
params.h_fore = 8
pd_stats_1 = grid_search_a_l(params, data)