# 1. Factor model to reduce dimentionality

$$\eta_t=\Phi \eta_{t-1} + \nu_t$$
$$x_t=\Lambda \eta_t + \epsilon_t$$

Where $\nu_t\sim \mathcal{N}_k(0,I)$ and $\epsilon_t\sim \mathcal{N}_p(0, \Sigma_0)$, with $k < p$. We can use the Kalman filter to estimate this model and fill missing values. The latent factors $\eta_t$ are going to be used to build the predictive model.

How can I add structure so that factors can represent elements of different frequency. In other words, elements that change at different paces.

In [1]:
import sys
sys.path.append('../')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from src.d01_data.dengue_data_api import DengueDataApi
from src.d00_utils.utils import variable_analysis

In [2]:
from scipy import signal
dda = DengueDataApi(interpolate=False)
x1, x2, y1, y2 = dda.split_data(random=False)

In [3]:
np.isnan(x1).any().any()

True

In [4]:
from src.d04_modeling.dynamic_factor_model import DynamicFactorModel
from collections import defaultdict
factors = 5
factor_orders = 2
dfm_model = dict()    
dfm_model[(factors, factor_orders)] = DynamicFactorModel(x1.copy(), y1.copy(),
                                      factors=factors, factor_orders=factor_orders, idiosyncratic_ar1=True)
dfm_model[(factors, factor_orders)].fit()
dfm_model[(factors, factor_orders)].insample_model_evaluation()

KeyboardInterrupt: 

In [None]:
city = 'sj'
res_dfm = dfm_model[(factors, factor_orders)].get_model_results(city)
mod_dfm = dfm_model[(factors, factor_orders)].get_model(city)
res_dfm.summary()

In [None]:
fig, ax = plt.subplots(figsize=(16, 8))
# col = np.random.choice(x1.columns)
col = 'ndvi_ne'
endog, exog = dfm_model[(factors, factor_orders)].format_data_arimax(x1.loc[city], y1.loc[city], interpolate=False)
# res = res_dfm.extend(endog=exog)
ax.plot(exog[col], label='obs')
ax.plot(res_dfm.predict()[col], label='predicted')
plt.legend()
plt.show()

In [None]:
eta_filtered = res_dfm.factors['filtered']
eta_filtered.columns = ['x%i' % i for i in range(factors)]
eta_filtered_cov = res_dfm.factors['filtered_cov']

fig, ax = plt.subplots(figsize=(8, 6))
for factor in eta_filtered.columns:
    ax.plot(eta_filtered.index, eta_filtered[factor], label=factor)
plt.legend()
plt.show()

In [None]:
for i in range(factors):
    col = eta_filtered.columns[i]
    print("[x%i] AR coefficient: %.4f" % (i, res_dfm.params['L1.%i->%i' % (i,i)]))
    print(variable_analysis(eta_filtered[col], col, ylim = [1e-2, 1e3]).head())