# This Notebook contains examples on simulated data

In [1]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
parentdir = os.path.dirname(parentdir)
sys.path.insert(0,parentdir)
import random
import numpy as np
import pandas as pd
from synthetic_dgp.simulate import SIMULATE
from statsmodels.tsa.statespace.dynamic_factor_mq import DynamicFactorMQ as DFM
from models.ddfm import DDFM
import warnings
warnings.filterwarnings("ignore")

In [2]:
# seed setting
seed = 123456789
random.seed(seed)
np.random.seed(seed)
# set poly degree, if 1 then linear and the encoder structure is modified to linear single layer
poly_degree = 1
# init results storage
results_dfm = np.zeros(2)
results_ddfm = np.zeros()
# params sim data
n_obs = 50
r = 3
portion_missings = 0.2
# simulate data
sim_40 = SIMULATE(seed=seed, n=40, r=r, poly_degree=poly_degree)
sim_150 = SIMULATE(seed=seed, n=150, r=r, poly_degree=poly_degree)

## Simulate data 40

In [3]:
x = sim_40.simulate(n_obs, portion_missings=portion_missings)
r_true = sim_40.f.shape[1]
if poly_degree == 1:
    # For linear to set the following
    structure_encoder = (r_true,)
else:
    structure_encoder = (r_true * 6, r_true * 4, r_true * 2, r_true)

In [4]:
%%time
# estimate dfm
dyn_fact_mdl = DFM(pd.DataFrame(x), factors=min(r_true, x.shape[1]), factor_orders=1)
res_dyn_fact_mdl = dyn_fact_mdl.fit(disp=1000, maxiter=10000)

EM start iterations, llf=-1315.7
EM converged at iteration 548, llf=-1233.3, convergence criterion=9.9807e-07 < tolerance=1e-06
Wall time: 20.8 s


In [5]:
results_dfm[0] = sim_40.evaluate(res_dyn_fact_mdl.factors.smoothed.values, f_true=sim_40.f)
results_dfm[1] = sim_40.evaluate(res_dyn_fact_mdl.factors.filtered.values, f_true=sim_40.f)
print(results_dfm)

[0.91742822 0.91610462]


In [6]:
%%time
# estimate ddfm
deep_dyn_fact_mdl = DDFM(pd.DataFrame(x), structure_encoder=structure_encoder, factor_oder=1,
                             use_bias=False, link='relu', max_iter=1000)
deep_dyn_fact_mdl.fit()

@Info - Note: in data, last datapoint must be last observation.
@Info: Convergence achieved in 3 iterations!
Wall time: 12.8 s


In [None]:
deep_dyn_fact_mdl.factors_filtered

In [7]:
results_ddfm[0] = sim_40.evaluate(np.mean(deep_dyn_fact_mdl.factors, axis=0), f_true=sim_40.f)
print(results_ddfm)

[0.92822108 0.        ]



## Simulate data 150

In [8]:
x = sim_150.simulate(n_obs, portion_missings=portion_missings)
r_true = sim_150.f.shape[1]
if poly_degree == 1:
    # For linear to set the following
    structure_encoder = (r_true,)
else:
    structure_encoder = (r_true * 6, r_true * 4, r_true * 2, r_true)

In [None]:
%%time
# estimate dfm
dyn_fact_mdl = DFM(pd.DataFrame(x), factors=min(r_true, x.shape[1]), factor_orders=1)
res_dyn_fact_mdl = dyn_fact_mdl.fit(disp=1000, maxiter=1000)

EM start iterations, llf=-4879.9


In [None]:
results_dfm[0] = sim_150.evaluate(res_dyn_fact_mdl.factors.smoothed.values, f_true=sim_150.f)
results_dfm[1] = sim_150.evaluate(res_dyn_fact_mdl.factors.filtered.values, f_true=sim_150.f)
print(results_dfm)

In [None]:
%%time
# estimate ddfm
deep_dyn_fact_mdl = DDFM(pd.DataFrame(x), structure_encoder=structure_encoder, factor_oder=1,
                             use_bias=False, link='relu',
                         max_iter=1000)
deep_dyn_fact_mdl.fit()

In [None]:
results_ddfm[0] = sim_150.evaluate(np.mean(deep_dyn_fact_mdl.factors, axis=0), f_true=sim_150.f)
print(results_ddfm)