# This Notebook contains examples on simulated data

In [2]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
parentdir = os.path.dirname(parentdir)
sys.path.insert(0,'/Users/paoloandreini/Desktop/DDFM-main/ddfm/')
import random
import numpy as np
import pandas as pd
from synthetic_dgp.simulate import SIMULATE
from statsmodels.tsa.statespace.dynamic_factor_mq import DynamicFactorMQ as DFM
from models.ddfm import DDFM
import warnings
warnings.filterwarnings("ignore")

In [3]:
# seed setting
seed = 123
random.seed(seed)
np.random.seed(seed)
# set poly degree, if 1 then linear and the encoder structure is modified to linear single layer
poly_degree = 2
# init results storage
results_dfm = np.zeros(2)
results_ddfm = np.zeros(1)
# params sim data
n_obs = 150
r = 3
portion_missings = 0.2
# simulate data
sim_40 = SIMULATE(seed=seed, n=40, r=r, poly_degree=poly_degree)
sim_150 = SIMULATE(seed=seed, n=150, r=r, poly_degree=poly_degree)

## Simulate data 40

In [4]:
x = sim_40.simulate(n_obs, portion_missings=portion_missings)
r_true = sim_40.f.shape[1]
if poly_degree == 1:
    # For linear to set the following
    structure_encoder = (r_true,)
else:
    structure_encoder = (r_true * 6, r_true * 4, r_true * 2, r_true)

In [5]:
%%time
# estimate dfm
dyn_fact_mdl = DFM(pd.DataFrame(x), factors=min(r_true, x.shape[1]), factor_orders=1)
res_dyn_fact_mdl = dyn_fact_mdl.fit(disp=1000, maxiter=10000)

EM start iterations, llf=-4141.3
EM converged at iteration 311, llf=-3765, convergence criterion=9.9547e-07 < tolerance=1e-06
CPU times: user 28.2 s, sys: 1min 10s, total: 1min 38s
Wall time: 17.8 s


In [6]:
results_dfm[0] = sim_40.evaluate(res_dyn_fact_mdl.factors.smoothed.values, f_true=sim_40.f)
results_dfm[1] = sim_40.evaluate(res_dyn_fact_mdl.factors.filtered.values, f_true=sim_40.f)
print(results_dfm)

[0.74008908 0.73827788]


In [7]:
%%time
# estimate ddfm
deep_dyn_fact_mdl = DDFM(pd.DataFrame(x), structure_encoder=structure_encoder, factor_oder=1,
                             use_bias=False, link='relu', max_iter=1000)
deep_dyn_fact_mdl.fit()

@Info - Note: Sorting data.


2023-04-22 01:23:21.372274: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-04-22 01:23:21.373269: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-22 01:23:21.482824: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)


@Info: iteration: 10 - new loss: 0.16868445318095723 - delta: 0.006150200448205526
@Info: iteration: 20 - new loss: 0.16673032082091727 - delta: 0.0014921661234730426
@Info: Convergence achieved in 22 iterations - new loss: 0.16682674339105666 - delta: 0.00033498932928478786 < 0.0005
CPU times: user 48.3 s, sys: 28.8 s, total: 1min 17s
Wall time: 42.9 s


In [8]:
results_ddfm[0] = sim_40.evaluate(np.mean(deep_dyn_fact_mdl.factors, axis=0), f_true=sim_40.f)
print(results_ddfm)

[0.93867971]



## Simulate data 150

In [9]:
x = sim_150.simulate(n_obs, portion_missings=portion_missings)
r_true = sim_150.f.shape[1]
if poly_degree == 1:
    # For linear to set the following
    structure_encoder = (r_true,)
else:
    structure_encoder = (r_true * 6, r_true * 4, r_true * 2, r_true)

In [10]:
%%time
# estimate dfm
dyn_fact_mdl = DFM(pd.DataFrame(x), factors=min(r_true, x.shape[1]), factor_orders=1)
res_dyn_fact_mdl = dyn_fact_mdl.fit(disp=1000, maxiter=1000)

EM start iterations, llf=-11413
EM converged at iteration 320, llf=-10232, convergence criterion=9.9605e-07 < tolerance=1e-06
CPU times: user 14min 4s, sys: 9min 47s, total: 23min 51s
Wall time: 3min 18s


In [11]:
results_dfm[0] = sim_150.evaluate(res_dyn_fact_mdl.factors.smoothed.values, f_true=sim_150.f)
results_dfm[1] = sim_150.evaluate(res_dyn_fact_mdl.factors.filtered.values, f_true=sim_150.f)
print(results_dfm)

[0.77804294 0.77757014]


In [12]:
%%time
# estimate ddfm
deep_dyn_fact_mdl = DDFM(pd.DataFrame(x), structure_encoder=structure_encoder, factor_oder=1,
                             use_bias=False, link='relu',
                         max_iter=1000)
deep_dyn_fact_mdl.fit()

@Info - Note: Sorting data.
@Info: iteration: 10 - new loss: 0.2129121431217014 - delta: 0.0003581649351550417
@Info: Convergence achieved in 10 iterations - new loss: 0.2129121431217014 - delta: 0.0003581649351550417 < 0.0005
CPU times: user 25.3 s, sys: 20.3 s, total: 45.7 s
Wall time: 20.5 s


In [13]:
results_ddfm[0] = sim_150.evaluate(np.mean(deep_dyn_fact_mdl.factors, axis=0), f_true=sim_150.f)
print(results_ddfm)

[0.95944693]
