# Factorizing the synthetic dataset

In [None]:
import numpy as np
import pandas as pd

import networkx as nx

%matplotlib inline
import matplotlib.pyplot as plt

Let's import `TRMFREgressor` which provides scikit-like, but still
incomplatible interface.

In [None]:
from trmf import TRMFRegressor

<br>

## Synthetic dataset

Let's fix the seed

In [None]:
random_state = np.random.RandomState(8945634)

We will play with a latent autoregression dataset that we are about to generate.

In [None]:
n_samples, n_components, n_targets, n_order = 120, 4, 16, 8

<br>

### Generating stationary lag polynomials

Generate some roots within the unit cicrle:
* if $r \sim \mathrm{U}[0, 1]$ and $\phi \sim \mathrm{U}[0, 2\pi]$ then
the complex-values random variable $Z = \sqrt{r} e^{i \phi}$ has a uniform
distribution within the unit disc.

In [None]:
rad = random_state.uniform(0, 1, size=(n_components, n_order))
phi = random_state.uniform(0, 2, size=(n_components, n_order)) * np.pi

phi /= 4

roots = np.sqrt(rad) * (np.cos(phi) + np.sin(phi) * 1.j)

#### Proof

Indeed, in multivariate calculus the change of variables under integration is performed thus
$$
\int_R f(x) dx
    = \int_S
        f(g(u)) \lvert J \rvert du
    \,, $$
where the Jacobian is given by $\tfrac{\partial g}{\partial u^{\mathrm{T}}}$. Therefore, for
any measurable rectangle $A\times B$ in $\mathbb{R}^2$ we have
$$
\int_0^1 \int_0^{2\pi} \tfrac1{2 \pi}
    1_{A \times B} \bigl(\sqrt{r} \cos \phi, \sqrt{r} \sin \phi\bigr) dr d\phi
    = \bigl[
        x,y = \sqrt{r} \cos \phi, \sqrt{r} \sin \phi,\, \det J = \tfrac12
    \bigr]
    % = \iint_{B[0,1]} \tfrac1{2 \pi}
    %     1_{A \times B} \bigl(x, y\bigr) 2 \tfrac12 dx dy
    = \iint_{B[0,1]} \tfrac1{\pi} 1_{A \times B} \bigl(x, y\bigr) dx dy
    \,, $$
where $B[0, 1] = \{x\in \mathbb{R}^2\colon \|x\|_2 \leq 1\}$ and the Jacobian is 
$$
\det J
    = \begin{vmatrix}
        \tfrac{\partial x}{\partial r} & \tfrac{\partial x}{\partial \phi} \\
        \tfrac{\partial y}{\partial r} & \tfrac{\partial y}{\partial \phi} \\
    \end{vmatrix}
    = \begin{vmatrix}
        \tfrac1{2\sqrt{r}} \cos \phi & - \sqrt{r} \sin \phi \\
        \tfrac1{2\sqrt{r}} \sin \phi &   \sqrt{r} \cos \phi
    \end{vmatrix}
    = \tfrac1{2\sqrt{r}} \sqrt{r} \cos^2 \phi - \tfrac1{2\sqrt{r}} (-\sqrt{r}) \sin^2 \phi
    = \tfrac12 \cos^2 \phi + \tfrac12 \sin^2 \phi
    \,. $$
Note that the area of $B[0, 1]$ is $r^2 \pi = 1\cdot \pi$.

Next to avoid negative autocorrelation, let's confine the roots to the right-half of the unit disc.

In [None]:
# roots.real = np.abs(roots.real)

Let's make sure that the complex roots are in conjugate pairs, so that the lag ploynomial has real coefficients.

* we keep at least two real roots and fill the rest with conjugate pairs.

In [None]:
n_cplx = max(0, n_order - 1) // 2

n_real = n_order - n_cplx * 2

roots = np.concatenate([
    roots[:, :n_cplx], np.conj(roots[:, :n_cplx]), np.real(roots[:, -n_real:])
], axis=-1)

For the lag polynomial we have the following:
$$
    \Phi(L)
        = 1 - \sum_{k=1}^p \phi_k L^k
        = L^p \Bigl(
            L^{-p} - \sum_{k=1}^p \phi_k L^{k-p}
        \Bigr)
        = L^p \Bigl(
            z^p + \sum_{k=1}^p (-\phi_k) z^{p-k}
        \Bigr) \Big\vert_{z = L^{-1}}
        = L^p \prod_{k=1}^p (z - z_k) \Big\vert_{z = L^{-1}}
        = \prod_{k=1}^p (1 - z_k L)
    \,, $$

where $(zk)_{k=1}^p\in \mathbb{C}$ are the roots of the $p$-th order polynomial
$q(z) = z^p - \phi_1 z^{p-1} - \cdots - \phi_p = \phi_0 z^p + \sum_{k=1}^p (-\phi_k) z^{p-k}$
with $\phi_0 = 1$.

#### Some intuition

We have the following formal series in terms of the lag operator $L$, provided $\lvert z \rvert < 1$, $z\in \mathbb{C}$:
$$
    (1 - z L)^{-1} = \sum_{k\geq 0} z^k L^k
    \,. $$
If each $z_k$ is such, then its associated order-$1$ lag polynomial $(1-z_k L)$ is invertible. Since the lag operators
commute, the whole lag polynomial $\Phi(L)$ is invertible.

Therefore, if all $z_k\in \mathbb{C}$ lie inside the unit circle, then the lag polynomial represents a stationary (causal) autoregression process of order $p$.

#### From the roots to the ploynomial

The function `np.poly` computes the coefficients of a polynomial $q(z)$ which has roots $(z_k)_{k=1}^p\in \mathbb{C}$,
where
$$
q(z)
    = \alpha_0 z^p + \sum_{k=1}^p \alpha_k z^{p - k}
    = \prod_{k=1}^p (z - z_k)
    \,, $$
with $\alpha_0=1$. Therefore to the get the coefficients of the corresponding lag
polynomial we just need to map $(z_k)_{k=1}^p \mapsto (\alpha_k)_{k=0}^p$ and then
flip the sign of each one $\phi_k = - \alpha_k$ for $k=1,\,\ldots,\,p$.

In [None]:
real_phi = np.stack([- np.poly(zeroes)[1:] for zeroes in roots], axis=0)

Let's make sure the roots are within the unit circle.

In [None]:
colors = np.array([[c] * n_order for c in ["C0", "C1", "C2", "C3"]]).ravel()

fig = plt.figure(figsize=(7, 7))

ax = fig.add_subplot(111, projection="polar")
ax.scatter(np.angle(roots), np.abs(roots), c=colors, s=50)
ax.set_rlim(0, 1.1)

plt.show()

Now $y_{t-p:t}$ returns $y_{t-p}, y_{t-p+1},\,\ldots,\,y_{t-1}$, which means that
to get the next $y_t$ we must multiply each by $\phi_p,\,\ldots,\,\phi_1$ respectively,
i.e. $\phi$ in reverse order.

In [None]:
real_ar_coef = real_phi[:, ::-1].copy()

plt.imshow(real_ar_coef, cmap=plt.cm.hot)

Simulate the autoregressive process.

```python
# this is suitable for filtering, aka onte step ahead in-sample prediction
np.correlate(noise_copy[:-1, 0], real_ar_coef[0], mode="valid")
```

In [None]:
noise = random_state.normal(scale=0.1, size=(n_samples, n_components))
for t in range(n_order, n_samples):
    # the columns in `real_ar_coef` are in ordered from $p$, the least
    #  recent lag, up to $1$, the most recent lag.
    noise[t] += np.einsum("il,li->i", real_ar_coef, noise[t-n_order:t])
#     noise[t] = np.einsum("il,li->i", real_ar_coef, noise[t-n_order:t])

real_factors = noise.copy()

Generate factor loadings

In [None]:
real_loadings = random_state.uniform(-1, 2, size=(n_components, n_targets))
real_loadings = np.maximum(real_loadings, 0)

noise = random_state.normal(scale=0.1, size=(n_samples, n_targets))

mean = random_state.normal(50, scale=0, size=(1, n_targets))
targets = mean + np.dot(real_factors, real_loadings) + noise


<br>

Plot the factors

In [None]:
n_cols = 3
n_rows = (n_components + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(12, 1.5*n_rows),
                         sharex=True, sharey=False)
for j, ax in zip(range(n_components), axes.ravel()):
    ax.plot(real_factors[:, j])

plt.show()
plt.close()

Plot the observed series

In [None]:
n_cols = 3
n_rows = (n_targets + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(12, 1.5*n_rows), sharex=True, sharey=True)
for j, ax in zip(range(n_targets), axes.ravel()):
    ax.plot(targets[:, j], lw=2)
    ax.set_title(f"""target {j}""")

plt.tight_layout()
plt.show()

In [None]:
plt.imshow(real_loadings, cmap=plt.cm.hot)

In [None]:
plt.plot(np.linalg.norm(real_loadings, ord=1, axis=0))

<br>

The tempiral regularized matrix factorization can model only
stationary latent time series. Thus integrated processes
break the model.

In [None]:
# targets = targets.cumsum(axis=0)

### Factorize the matrix with TRMF

Split the synthetic data into train and test periods

In [None]:
from sklearn.model_selection import train_test_split

train_targets, test_targets = train_test_split(
    targets, test_size=.25, shuffle=False)

Never forget to centre and scale the train dataset!

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

scl = StandardScaler(with_mean=True, with_std=True)

Let's fit a model with more factors but the same AR$(p)$ latent process.

In [None]:
n_order, n_components = 8, 4

Original setup: no intercept and no regressors

In [None]:
C_Z, C_F, C_phi = 5e-2, 5e-1, 1e-6
eta_Z, eta_F, adj = 0.95, 0.0, None
C_B, regressors, fit_intercept = 0., None, True

n_max_mf_iter = 2

Better results: fit intercept, place more emphasis on latent autoregression,
regularize the autoregression coefficients more.

In [None]:
C_Z, C_F, C_phi = 5e-2, 5e-1, 1e-2
eta_Z, eta_F, adj = 0.99, 0.0, None
C_B, regressors, fit_intercept = 0., None, True
n_max_mf_iter = 5  # default

RUN!

In [None]:
trmf = TRMFRegressor(n_components, n_order, C_Z=C_Z, C_F=C_F, C_phi=C_phi,
                     eta_Z=eta_Z, eta_F=eta_F, adj=adj, C_B=C_B,
                     fit_regression=False, fit_intercept=fit_intercept,
                     nonnegative_factors=True, n_max_mf_iter=n_max_mf_iter)

YY = scl.fit_transform(train_targets)
trmf.fit(YY)

In [None]:
par = {
    'n_order': 7,
    'n_components': 12,
    'eta_Z': 0.95,
    'C_phi': 0.1,
    'C_Z': 10.0,
    'C_F': 0.01,
}

trmf = TRMFRegressor(**par, eta_F=0., adj=None, C_B=0., fit_regression=False,
                     fit_intercept=True, nonnegative_factors=True, n_max_mf_iter=5)

YY = scl.fit_transform(train_targets)
trmf.fit(YY)

The estimated coefficients

In [None]:
plt.imshow(trmf.loadings_, cmap=plt.cm.hot)

In [None]:
plt.imshow(trmf.ar_coef_, cmap=plt.cm.hot)

In [None]:
plt.plot(trmf.intercept_.T)

In [None]:
plt.imshow(trmf.coef_, cmap=plt.cm.hot)

Let's have a look at the actual values

In [None]:
trmf.loadings_

In [None]:
trmf.ar_coef_

In [None]:
trmf.intercept_

In [None]:
trmf.coef_

<br>

### Autoregression diagnostics

Set the forecast horizon

In [None]:
n_ahead, n_horizon = len(test_targets), 24

Let's inspect the roots of the lag polynomial

In [None]:
roots = 1 / np.stack([np.roots(np.r_[-coef_, 1]) for coef_ in trmf.ar_coef_], axis=0)

The reciprocals of the roots should lie within the $\mathbb{C}$ unit circle

In [None]:
base_colors = plt.cm.plasma(np.linspace(0, 1, num=trmf.n_components))
colors = np.repeat(base_colors, trmf.n_order, axis=0)

fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, projection="polar")

ax.scatter(np.angle(roots), np.abs(roots), c=colors, s=50)
ax.set_rlim(0, 1.1)

# fig.savefig(f"./factor_ar_roots.pdf")
plt.show()
plt.close()

Compute the in-sample $AR(p)$ filter: the would-be predicted value of $y_t$
$$ \hat{y}_{t\mid t-1}
    = \sum_{k=1}^p \phi_k y_{t-k}
    \,. $$

In [None]:
in_sample_filter = np.stack([
    np.correlate(y[:-1], phi, mode="valid")  # phi is in the correct order
    for y, phi in zip(trmf.factors_.T, trmf.ar_coef_)
], axis=1)

in_smaple_forecast = np.concatenate([
    trmf.factors_[:trmf.n_order], in_sample_filter,
])

Get the in-sample $R^2$ score

In [None]:
from sklearn.metrics import r2_score

r2_factor_scores = r2_score(
    trmf.factors_[trmf.n_order:], in_smaple_forecast[trmf.n_order:],
    multioutput='raw_values')


plt.title("$R^2$ score of the autoregression")
plt.plot(r2_factor_scores)

# plt.savefig(f"./factor_insample_r2.pdf")
plt.show()
plt.close()

Sort the factors by their in-sample $R^2$ score from best to worst.

In [None]:
factor_order = np.argsort(r2_factor_scores)[::-1]

Explicitly compute the factor forecasts

In [None]:
dynamic_forecast = trmf.forecast_factors(n_ahead + n_horizon)

Paste them with the ones inferred from the train.

In [None]:
factor_pasted = np.concatenate([trmf.factors_, dynamic_forecast], axis=0)

Paste and plot them together

In [None]:
factor_forecast = np.concatenate([in_smaple_forecast, dynamic_forecast], axis=0)

Make a quilt-plot of the recovred factors, their in-sample forecasts and dynamic out-of-sample forecasts

In [None]:
from math import sqrt, floor, ceil

n_plots, aspect = len(factor_order), (4, 3)

n_rows = round(sqrt(aspect[1] * float(n_plots) / aspect[0]) + 0.15)
n_cols = round(sqrt(aspect[0] * float(n_plots) / aspect[1]) + 0.15)

coef_w, coef_h = 3, 2

figsize = n_cols * coef_w, n_rows * coef_h

fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, squeeze=False,
                         sharex=True, sharey=True,
                         figsize=figsize, facecolor="white", dpi=320)

# plot the datasets
for j, ax in zip(factor_order, axes.flat):
    ax.set_title(f"fc. {j} ({r2_factor_scores[j]:1.2f})")

    l1, = ax.plot(trmf.factors_[:, j], lw=2)
    l2, = ax.plot(factor_forecast[:-(n_ahead + n_horizon), j],
                  zorder=2, alpha=0.75, lw=2)
    l3, = ax.plot(factor_forecast[:, j],
                  zorder=1, alpha=0.5, lw=1)
# end for

# reset all axes
for ax in axes.flat[n_plots:]:
    y_min, y_max = ax.get_ylim()
    ax.plot([0, len(factor_forecast)], [y_min, y_max], c="k", lw=2, alpha=.25)
    ax.plot([0, len(factor_forecast)], [y_max, y_min], c="k", lw=2, alpha=.25)
# end for

ax.legend(*zip((l1, "estimate"), (l2, "forecast")), loc="best")

fig.tight_layout()
# fig.savefig(f"./factor_forcast_best_{ff}.pdf")
plt.show()
plt.close()

<br>

### Analysis and in-sample performance

Compute train estimates

In [None]:
estimated = np.dot(trmf.factors_, trmf.loadings_)
estimated += trmf.intercept_

if regressors is not None:
    # regressors = np.empty((len(trmf.factors_), 0))
    estimated += np.dot(regressors, trmf.coef_)

Determine which regressors to use

In [None]:
predicted_features = None

if regressors is not None:
    predicted_features = np.concatenate([
        test_features, np.zeros((n_horizon, test_features.shape[1])),
    ], axis=0)

Compute the predictions:
$$ 
    \hat{Y}_{t+h\mid t}
        = \hat{X}_{t+h\mid t} F
    \,,\quad
    \hat{X}_{t+h\mid t}
        = \sum_{k=1}^p \mathop{\text{diag}}\bigl(\hat{\theta}_{\cdot k}\bigr) \hat{X}_{t+h-k\mid t}
    \,. $$

In [None]:
predicted = trmf.predict(X=predicted_features, n_ahead=n_ahead + n_horizon)

# paste the train targets and the dynamic forecast
predicted = np.concatenate([estimated, predicted], axis=0)

predicted_target = scl.inverse_transform(predicted)

Compute the $R^2$ scores on the train for the targets

In [None]:
train_predicted_target = predicted_target[:-n_ahead-n_horizon:]
r2_target_score = r2_score(train_targets, train_predicted_target,
                           multioutput="raw_values")

# plt.plot(np.abs(fitted - YY).std(axis=0))
plt.title("$R^2$ score of the reconstruction")
plt.plot(r2_target_score)

# plt.savefig(f"./target_insample_r2.pdf")
plt.show()
plt.close()

Metrics

In [None]:
from sklearn.metrics import mean_squared_error

trmf_mse = mean_squared_error(train_targets, train_predicted_target)
lastknown_mse = mean_squared_error(train_targets[1:], train_targets[:-1])


print(f"""train >>>\nTRMF: {trmf_mse}\nRunning Last: {lastknown_mse}""")

Compute the $R^2$ scores on the test

In [None]:
if n_horizon > 0:
    test_predicted_target = predicted_target[-n_ahead-n_horizon:-n_horizon]
else:
    test_predicted_target = predicted_target[-n_ahead:]

r2_target_test_score = r2_score(test_targets, test_predicted_target,
                                multioutput="raw_values")

plt.title("$R^2$ score the holdout")
plt.plot(r2_target_test_score)

# plt.savefig(f"./target_test_r2.pdf")
plt.show()
plt.close()

In [None]:
trmf_mse = mean_squared_error(test_targets, test_predicted_target)
last_mse = mean_squared_error(test_targets, train_targets[[-1] * len(test_targets)])
lastknown_mse = mean_squared_error(test_targets[1:], test_targets[:-1])


print(f"""test >>>\nTRMF: {trmf_mse}\n"""
      f"""Last train: {last_mse}\n"""
      f"""Running Last: {lastknown_mse}""")

Sort the targets by their in-sample $R^2$ score

In [None]:
target_order = np.argsort(r2_target_test_score)[::-1]

Tile them on one canvas.

In [None]:
n_cols = 4
n_rows = (n_targets + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(12, 1.5 * n_rows),
                         sharex=True, sharey=True)

for j, ax in zip(target_order, axes.flat):
    ax.plot(targets[:, j], lw=2)
    ax.plot(predicted_target[:, j], zorder=2)
    ax.axvspan(0, len(train_targets) - 1, color="k", zorder=-1, alpha=0.05)
    ax.set_title(f"""target {j} ({r2_target_score[j]:4.2f}, {r2_target_test_score[j]:4.2f})""")

for ax in axes.flat[n_targets:]:
    y_min, y_max = ax.get_ylim()
    x_min, x_max = ax.get_xlim()
    ax.plot([0, len(predicted_target)], [y_min, y_max], c="k", lw=2, alpha=.25)
    ax.plot([0, len(predicted_target)], [y_max, y_min], c="k", lw=2, alpha=.25)
#     ax.set_frame_on(False) ; ax.set_xticks([]) ; ax.set_yticks([])

plt.tight_layout()
plt.show()

In fact the model will be reestimated upon arrival of new data, so
this validation strategy, where we compare dynamic forecasts with
the actual data is incompatible with the usage scenario.

<br>

In [None]:
assert False, """STOP!"""

## Grid search over the parameters

In [None]:
from sklearn.model_selection import ParameterGrid

Define a grid for studying the effects the parameters on the test mutli-step ahead forecast accuracy.

In [None]:
grid = ParameterGrid(dict(
    n_components=np.r_[1:17],
    n_order=np.r_[1:17],
    C_Z=np.logspace(-2, 1, num=4),
    C_F=np.logspace(-2, 1, num=4),
    C_phi=np.logspace(-2, 1, num=4),
    eta_Z=np.linspace(0.05, 0.95, num=10),
))

Creat the base instance

In [None]:
base = TRMFRegressor(n_components=1, n_order=0, fit_regression=False,
                     fit_intercept=True, nonnegative_factors=True, n_max_mf_iter=5)

... and a data transformer.

In [None]:
transformer = StandardScaler(with_mean=True, with_std=True)

A helper fit function.

In [None]:
from joblib import Parallel, delayed
from sklearn.base import clone
from sklearn.metrics import mean_squared_error


def helper(par, train, test, base=base,
           transformer=transformer):
    # clone, set parameters and fit
    trmf = clone(base).set_params(**par).fit(train)
    
    # predict and return
    pred = transformer.inverse_transform(
        trmf.predict(n_ahead=len(test)))

    return par, mean_squared_error(test, pred)

Run the grid search in parallel on all virtual cores.

In [None]:
import pickle
import gzip, time

if False:
    # Get a time stamp and a name of the storage
    dttm = time.strftime("%Y%m%d%H%M%S")
    filename = f"./synth_results_{dttm}.gz"
    print(f"results to be saved to {filename}")

    # pretransform the train dataset
    X = transformer.fit_transform(train_targets)

    # run the experiment in parallel
    par_ = Parallel(n_jobs=-1, verbose=1)
    results = par_(delayed(helper)(par, X, test_targets) for par in grid)

    # dump the results to disk
    with gzip.open(filename, "w", compresslevel=6) as fout:
        pickle.dump(results, fout)
# end if

`[Parallel(n_jobs=-1)]: Done 163840 out of 163840 | elapsed: 559.7min finished`

Load the experiment results

In [None]:
import os

if "results" not in globals():
    filename = "./synth_results_20180906004401.gz"
    assert os.path.exists(filename), \
        """Done 163840 out of 163840 | elapsed: 559.7min finished"""

    with gzip.open(filename, "r") as fin:
        results = pickle.load(fin)

Reshape into a pandas series

In [None]:
import pandas as pd

# keys = set(k for g in grid.param_grid for k in g.keys())
keys = ['n_order', 'n_components', 'eta_Z', 'C_phi', 'C_Z', 'C_F']

data = dict((tuple(par[k] for k in keys), rmse,) for par, rmse in results)
sr = pd.Series(data, name="rmse").sort_index().rename_axis(keys)

Gather the results into a data cube

In [None]:
cube = sr.values.reshape(*[len(grid.param_grid[0][k]) for k in keys])

stepping = [grid.param_grid[0][k] for k in keys]

Get the best parameters

In [None]:
# find the flat index of the smallest value
flat_index = np.argmin(cube)

# ... and unravel into into a multidimensional index
index = np.unravel_index(flat_index, cube.shape)

# collect the best paramaters from the grid
best_ = {k: grid.param_grid[0][k][i] for k, i in zip(keys, index)}

Display

In [None]:
best_, cube[index]

Let's inspect the tightness of the parameters around the best:
take all settings with the rmse within one standard deviation
of the minimum.

In [None]:
threshold = np.quantile(cube, 0.001)
indices = np.unravel_index(np.flatnonzero(cube <= threshold), cube.shape)

df_sens = pd.DataFrame({k: s[ii] for k, s, ii in zip(keys, stepping, indices)})
df_sens["rmse"] = cube[indices]

df_sens = df_sens.sort_values("rmse")

Inspect top 15

In [None]:
df_sens.head(n=15)

The meian among the lower $0.1\%$ quantile.

In [None]:
df_sens.median()

Inspect the hyperparameter modes

In [None]:
df_sens.apply({
    "C_F": np.log10, "C_Z": np.log10, "C_phi": np.log10,
    "n_order": lambda x: x, "n_components": lambda x: x,
    "eta_Z": lambda x: x, "rmse": lambda x: x
}).hist()

Chos the x-y-z axes to plot the slices of.

In [None]:
# !conda install pyqt -n py37 -y

In [None]:
%matplotlib qt5

In [None]:
ax = ('C_F', 'log'), ('C_Z', 'log'), ('n_order', 'lin')
# ax = ('C_F', 'log'), ('C_Z', 'log'), ('eta_Z', 'lin')

# ax = ('n_order', 'lin'), ('n_components', 'lin'), ('C_Z', 'log')

# ax = ('C_F', 'log'), ('C_Z', 'log'), ('C_phi', 'log')

In [None]:
ax = ('n_order', 'lin'), ('n_components', 'lin'), ('eta_Z', 'lin')
# ax = ('n_order', 'lin'), ('n_components', 'lin'), ('C_phi', 'log')
# ax = ('n_order', 'lin'), ('n_components', 'lin'), ('C_F', 'log')

Get the axis indices and the meshgrid to plot on

In [None]:
(lx, ly, lz), (sx, sy, sz) = zip(*ax)
axes = [keys.index(k) for k in [lx, ly, lz]]

xy = np.meshgrid(*[stepping[i] for i in axes[:-1]])

Reduce over the unselected dimensions and reorder them in `x-y-z` order.

In [None]:
reduced = cube.min(axis=tuple(np.delete(np.r_[:cube.ndim], axes)))

# shuffle the dimensions: the axes in `reduce` are in
#  natural order 0, 1, 2. The inner `argsort` maps the
#  original hi-dim axes to the `reduce` axes. The outer
#  argmin tells `transposez how to re-shuffle the axes.
reduced = reduced.transpose(np.argsort(np.argsort(axes)))

log-normalize the data cube

In [None]:
values = np.log2(reduced).copy()
values -= values.min(keepdims=True)
values /= values.max(keepdims=True)

Plot the effects on rmse of the different parameters. Make an interactive 3d-plot

In [None]:
from mpl_toolkits.mplot3d import Axes3D


fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d', proj_type='ortho',
                     xlabel=f"{sx}({lx})", ylabel=f"{sy}({ly})",
                     zlabel=f"{sz}({lz})")

(xx, yy), zz = xy, stepping[axes[-1]]
xx = np.log10(xx) if sx == "log" else xx
yy = np.log10(yy) if sy == "log" else yy
zz = np.log10(zz) if sz == "log" else zz

ax.view_init(30, 225)
ax.set_zlim(zz.min(), zz.max())
ax.set_title(f"{lx}-{ly}-{lz}")

for i, zk in enumerate(zz):
    layer = np.full_like(xy[0], zk, dtype=float)
    ax.plot_surface(xx, yy, layer, alpha=0.5, lw=0, shade=False, rstride=1,
                    cstride=1, facecolors=plt.cm.CMRmap(values[..., i].T))

# end for
# plt.tight_layout()
plt.show()

<br>

In [None]:
assert False, """STOP!"""

#### Extra

Generate some basic features

In [None]:
features = np.zeros_like(targets[:, :2])
features[2:] = targets[:-2, :2]

In [None]:
G = nx.erdos_renyi_graph(n_targets, 0.2, directed=True)

In [None]:
adj = nx.adjacency_matrix(G)
adj = None

Plot the dynamics of the latent factors

In [None]:
n_cols = 3
n_rows = (trmf.n_components + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(12, 1.5*n_rows),
                         sharex=True, sharey=False)

for j, ax in zip(range(trmf.n_components), axes.flat):
    ax.plot(factor_pasted[:-(n_ahead + n_horizon), j], lw=2)
    ax.plot(factor_pasted[:-n_horizon, j], zorder=-1)
    ax.plot(factor_pasted[:, j], zorder=-2, alpha=0.5)

for ax in axes.flat[trmf.n_components:]:
    y_min, y_max = ax.get_ylim()
    x_min, x_max = ax.get_xlim()
    ax.plot([0, len(factor_pasted)], [y_min, y_max], c="k", lw=2, alpha=.25)
    ax.plot([0, len(factor_pasted)], [y_max, y_min], c="k", lw=2, alpha=.25)

plt.show()
plt.close()

<br>