# Dynamic Model Averaging (DMA) Application for Germany
This notebooks runs DMA on German data and produces a short analysis.

In [45]:
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path
import pickle
import pandas as pd
import numpy as np
import itertools
import random
import plotly
import plotly.express as px
import scipy.stats as stats
import matplotlib.pyplot as plt

from src.data import import_data
from src.data.data_class import Data
from src.models.preliminaries import Settings
from src.models.dma import DMA
from src.models.tvp import TVP
from src.models.ar import AR

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Preliminaries
Specify options for the DMA. For instance, which variables to include and how to transform them.

In [46]:
params = Settings() # initialize Settings
# adjust settings
params.use_y = ['CPI']  # use as y
params.use_x = ['unemp',    # or employment
                'GDP',
                'prod_ind', # or prod_constr (might be better for h=4)
                # 'interest_rate_short',
                'interest_rate_long',
                'dax',
                'M3',
                'infl_exp_current_year',    # or infl_exp_next_year, infl_exp_2_year_ahead
                # 'trade_exp',
                'CPI_house_energy',  # or PCI_energy_ or HICP_energy
                #'supply_index_global',  # or 'supply_index_eu'
                #'business_conf_manufacturing'
                ]  # indep vars
# params.tcodesX = [1, 1, 5, 1, 1, 1, 1, 5, 5, 1, 5, 5, 1]
params.tcodey = 1
params.first_sample_ends = '1995-12-31'
params.restricted_vars = ['intercept', 'CPI']
params.forgetting_method = 2
params.expert_opinion = 2
params.miss_treatment = 1
params.h_fore = 4
params.prior_theta = 1
params.plag = 3
params.hlag = 1

# params.print_setting_options() # print explanation to settings
params.print_settings() # print settings

The following preliminary settings are specified:
intercept : 1
plag : 3
hlag : 1
use_x : ['unemp', 'GDP', 'prod_ind', 'interest_rate_long', 'dax', 'M3', 'infl_exp_current_year', 'CPI_house_energy']
use_y : ['CPI']
tcodesX : 1
tcodey : 1
miss_treatment : 1
lamda : 0.99
alpha : 0.9
kappa : 0.95
forgetting_method : 2
prior_theta : 1
initial_V_0 : 1
restricted_vars : ['intercept', 'CPI']
initial_DMA_weights : 1
expert_opinion : 2
weighting : normal
degrees : None
h_fore : 4
first_sample_ends : 1995-12-31


## Prepare data
Load the data and transform as specified above.

In [47]:
# path where processed data is stored
data_path = os.path.join(Path().cwd().parent, 'data', 'processed')
# get seasonally adjusted data
with open(os.path.join(data_path, 'df_sa.pkl'), 'rb') as f:
    df = pickle.load(f) # load raw data
df.describe()

var code,CPI_house_energy,PCI_energy_,HICP_energy,HICP_excl_energy,CPI,deflator_GDP,unemp,employment,GDP,cons_private,...,interest_rate_long,M3,M1,business_conf_manufacturing,business_conf_construct,business_conf_service,business_conf_retail,cons_conf_tendency,business_situation,residential_permits
count,123.0,108.0,104.0,108.0,123.0,123.0,123.0,122.0,123.0,123.0,...,123.0,123.0,123.0,123.0,123.0,107.0,123.0,123.0,123.0,111.0
mean,86.044251,87.012686,84.341505,92.755311,0.004721,0.004284,-0.003422,0.001091,0.002326,0.001503,...,3.463803,73.05957,65.05182,-6.727642,-21.869919,16.915888,-15.618699,99.946968,99.928214,29339.023758
std,14.225694,20.686686,20.142079,8.29487,0.004461,0.004434,0.033366,0.004588,0.011852,0.012954,...,2.511013,32.274109,43.14558,13.369695,20.970658,15.694436,10.782244,1.088837,1.342361,12214.43955
min,54.290421,54.877066,50.172049,79.809765,-0.006218,-0.004797,-0.096044,-0.010906,-0.078606,-0.092427,...,-0.604967,28.239989,16.992672,-42.2,-55.7,-22.0,-41.2,96.952924,95.408918,12903.224314
25%,73.991181,66.273204,65.311428,85.461263,0.002036,0.001382,-0.023105,-0.001753,-0.000176,-0.001626,...,1.381471,41.848292,27.878798,-15.4,-42.45,9.85,-24.2,99.334636,99.292014,20146.329471
50%,86.943304,93.19568,90.987561,91.996135,0.004041,0.003839,-0.00673,0.000564,0.003102,0.002517,...,3.829682,70.994604,56.696744,-6.1,-23.0,16.5,-17.8,99.964845,99.990484,25712.133386
75%,99.834497,103.481214,101.972046,99.628004,0.006383,0.005843,0.019722,0.003528,0.005931,0.005418,...,5.147533,93.580208,87.016878,0.8,-7.15,22.55,-6.4,100.702736,100.968382,37431.426816
max,109.463789,148.621775,115.784535,109.9383,0.019812,0.021539,0.101676,0.016532,0.047462,0.050932,...,8.387854,145.8097,178.189434,25.6,20.9,50.1,13.5,102.119658,102.336481,64217.291026


In [48]:
# get transformations for variables
selection = pd.read_csv(os.path.join(data_path, 'selected_data.csv'))
tcodesX = []
for v in params.use_x:
    v_ind = np.where(selection['var code'] == v)[0][0]
    v_tcode = selection['trans_code'][v_ind]
    tcodesX.append(v_tcode)
    # print(f'{v} has tcode {v_tcode}')
params.tcodesX = tcodesX
tcodesX

[1, 1, 1, 2, 5, 5, 1, 5]

In [49]:
data = Data(df, params)
data.X#.describe()

Unnamed: 0,intercept,CPI,CPI_t-1,CPI_t-2,CPI_t-3,unemp,unemp_t-1,GDP,GDP_t-1,prod_ind,...,interest_rate_long,interest_rate_long_t-1,dax,dax_t-1,M3,M3_t-1,infl_exp_current_year,infl_exp_current_year_t-1,CPI_house_energy,CPI_house_energy_t-1
1992-06-30,1,0.051887,0.026074,0.077893,0.073117,0.058676,0.039356,0.000244,0.006485,-0.025882,...,-0.100993,-0.169521,0.203294,0.130635,0.087451,0.068367,0.000000,0.000000,0.109428,0.009703
1992-09-30,1,0.034222,0.051887,0.026074,0.077893,0.051202,0.058676,0.000652,0.000244,-0.007698,...,-0.249259,-0.100993,-0.521336,0.203294,0.082945,0.087451,0.000000,0.000000,0.074084,0.109428
1992-12-31,1,0.021642,0.034222,0.051887,0.026074,0.039888,0.051202,-0.005779,0.000652,-0.044108,...,-0.377744,-0.249259,-0.001904,-0.521336,0.062675,0.082945,0.000000,0.000000,-0.008607,0.074084
1993-03-31,1,0.079248,0.021642,0.034222,0.051887,0.045907,0.039888,-0.011396,-0.005779,-0.024222,...,-0.379998,-0.377744,0.280591,-0.001904,0.074250,0.062675,0.000000,0.000000,0.175808,-0.008607
1993-06-30,1,0.035956,0.079248,0.021642,0.034222,0.033728,0.045907,0.003713,-0.011396,-0.015659,...,-0.300516,-0.379998,0.154760,0.280591,0.057944,0.074250,0.000000,0.000000,0.068710,0.175808
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-31,1,0.015118,-0.010759,0.009828,-0.013438,0.015914,0.030221,0.000529,0.047462,0.023214,...,-0.164526,0.090759,0.260767,0.260248,0.097808,0.074030,0.323362,0.388508,-0.001485,-0.008648
2021-03-31,1,0.051631,0.015118,-0.010759,0.009828,-0.017454,0.015914,-0.002053,0.000529,0.021215,...,0.137813,-0.164526,0.508383,0.260767,0.079375,0.097808,0.892655,0.323362,0.045326,-0.001485
2021-06-30,1,0.028673,0.051631,0.015118,-0.010759,-0.033867,-0.017454,0.042307,-0.002053,0.000572,...,0.085919,0.137813,-0.106134,0.508383,0.042220,0.079375,1.594912,0.892655,0.026074,0.045326
2021-09-30,1,0.065886,0.028673,0.051631,0.015118,-0.018046,-0.033867,-0.029169,0.042307,-0.047596,...,0.013626,0.085919,0.096111,-0.106134,0.082219,0.042220,1.881401,1.594912,0.036153,0.026074


In [50]:
# for v in data.columns:
#     dat_v = data[v]
#     fig = px.line(dat_v, x=dat_v.index, y=dat_v.name)
#     fig.show()

## Run DMA

In [51]:
dma = DMA(params, data)
dma.run_dma()

100%|██████████| 115/115 [02:12<00:00,  1.15s/it]

DMA finished





## Analysis
### Baseline example

In [52]:
dma.forecast_statistics(unit='percent', plot_fe=False, plot_y_fe=True)

         MAFE      MSFE      BIAS
DMA  0.814479  0.011917  0.078286
DMS  0.713983  0.009659  0.092735


In [53]:
fig = dma.plot_inclusion_prob(seperate_plots=True, return_fig=True)
fig.show()

In [54]:
dma.calc_E_size(out='plot')

## Compare results to less sophisticated models

In [55]:
params.use_x = None
data_ar = Data(df, params)
ar = AR(params, data_ar)
ar.fit_predict()
ar.forecast_statistics(unit='percent', print_stats=False)

In [56]:
tvp = TVP(params, data)
tvp.fit()
tvp.forecast_statistics(unit='percent',print_stats=False)

In [57]:
stats = ['MAFE', 'MSFE', 'BIAS']
stats_DMA = [dma.MAFE_DMA, dma.MSFE_DMA, dma.BIAS_DMA]
stats_DMS = [dma.MAFE_DMS, dma.MSFE_DMS, dma.BIAS_DMS]
stats_TVP = [tvp.MAFE, tvp.MSFE, tvp.BIAS]
stats_AR = [ar.MAFE, ar.MSFE, ar.BIAS]
stats_pd = pd.DataFrame.from_dict(data={'DMA': stats_DMA,
                                        'DMS': stats_DMS,
                                        'TVP': stats_TVP,
                                        'AR': stats_AR},
                                orient='index',
                                columns=stats)
print(stats_pd)

         MAFE      MSFE      BIAS
DMA  0.008145  0.000119  0.000783
DMS  0.007140  0.000097  0.000927
TVP  0.009511  0.000165 -0.000965
AR   0.008057  0.000131  0.003743


In [58]:
ar.forecast_statistics(unit='percent', plot_y_fe=True)

                     MAFE      MSFE      BIAS
Forecasts stats  0.805716  0.013119  0.374306


In [59]:
plotly.io.templates.default = "plotly_white"
fig = dma.forecast_statistics(unit='percent', plot_y_fe=True, return_fig=True, print_stats=False)
# fig.write_image("test.png")
fig.show()

In [60]:
z = [1 if np.array_equal(m, np.array([0,1,2,3,4,5])) else 0 for m in dma.models]
ind = np.nonzero(z)
inc_prob_ar = dma.prob_update[:, ind].flatten()
inc_prob_ar.mean()

0.0010922626817450062

In [61]:
dma.models

[array([0, 1, 2, 3, 4, 5, 6, 7]),
 array([0, 1, 2, 3, 4, 5, 6, 8]),
 array([0, 1, 2, 3, 4, 5, 6, 9]),
 array([ 0,  1,  2,  3,  4,  5,  6, 10]),
 array([ 0,  1,  2,  3,  4,  5,  6, 11]),
 array([ 0,  1,  2,  3,  4,  5,  6, 12]),
 array([ 0,  1,  2,  3,  4,  5,  6, 13]),
 array([ 0,  1,  2,  3,  4,  5,  6, 14]),
 array([ 0,  1,  2,  3,  4,  5,  6, 15]),
 array([ 0,  1,  2,  3,  4,  5,  6, 16]),
 array([ 0,  1,  2,  3,  4,  5,  6, 17]),
 array([ 0,  1,  2,  3,  4,  5,  6, 18]),
 array([ 0,  1,  2,  3,  4,  5,  6, 19]),
 array([ 0,  1,  2,  3,  4,  5,  6, 20]),
 array([0, 1, 2, 3, 4, 5, 6, 7, 8]),
 array([0, 1, 2, 3, 4, 5, 6, 7, 9]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7, 10]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7, 11]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7, 12]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7, 13]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7, 14]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7, 15]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7, 16]),
 array([ 0,  1,  2,  3,  4,  5,  6,  7, 