In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import gaussian_kde

from skfda.datasets import make_gaussian_process
from skfda.misc.covariances import Exponential, Gaussian, Brownian
from skfda.misc.metrics import l2_distance, l2_norm
from skfda.preprocessing.dim_reduction.feature_extraction import FPCA
from skfda.representation.basis import BSpline

In [None]:
def get_fpca(n_components=1, style=None):
    if style is None:
        fpca = FPCA(n_components=n_components)
        basis = None
    elif style == 'bspline':
        basis = BSpline(n_basis=n_components)
        fpca = FPCA(n_components=n_components, components_basis = basis)
    return fpca, basis 

# decide on a basis for the FPCA
style = 'bspline'

Idea: create three covariance functions and sample Gaussian processes. Then do FPCA and compute Functional Graphical Lasso. Let the variables be called $v_0$, $v_1$ and $v_2$. 

In [None]:
n_var = 3
n_ts = 100
n_samples = 10**3

# define three variables
cov1 = Gaussian(variance=1.0, length_scale=8.0)
cov2 = Gaussian(variance=1.0, length_scale=4.0)
cov3 = Brownian(variance=0.1)

all_cov = [cov1, cov2, cov3]

In [None]:
# plot covariance functions
_ = cov1.heatmap()
_ = cov2.heatmap()
_ = cov3.heatmap()

In [None]:
# sample gaussian processes
samples = list()
labels = list()

for j in np.arange(n_var):

    _ds = make_gaussian_process(
            n_samples=n_samples,
            n_features=n_ts,
            start=0.0,
            stop=25.0,
            cov=all_cov[j],
            random_state=20
            )
    
    
    _lb = np.array([f'v{j}'] * n_samples)
    
    samples.append(_ds)
    labels.append(_lb)


In [None]:
# plot sampled time series

colors = ['darkred', 'C1', 'grey']

fig, axs = plt.subplots(n_var,1, figsize=(10,12))

for j in range(n_var):
    ax = axs.ravel()[j]
    samples[j].plot(axes=ax, group=labels[j], group_colors={f'v{j}': colors[j]}, alpha=0.2, lw=0.95)
    ax.set_title(f'Sampled time series for v{j}')

In [None]:
# do FPCA, reconstruct and plot deviation
q = 5
fig, axs = plt.subplots(n_var, 2, figsize=(20,12))

for j in range(n_var):
    this_ds = samples[j]
    # do FPCA
    fpca,_ = get_fpca(n_components=q, style=style)
    fpca.fit(this_ds)
    # reconstruct ds from FPCA
    recov_ds = fpca.inverse_transform(fpca.transform(this_ds))
    
    diff = this_ds - recov_ds
    
    ax = axs[j,0]
    recov_ds.plot(axes=ax, group=labels[j], group_colors={f'v{j}': colors[j]}, alpha=0.3, lw=0.95)
    ax.set_title(f'FPCA transformed time series for v{j}')
    
    ax2 = axs[j,1]
    diff.plot(axes=ax2, group=labels[j], group_colors={f'v{j}': colors[j]}, alpha=0.3, lw=0.95)
    ax2.set_title(f'Recovery error of FPCA for v{j}')
    ax2.set_ylim(ax.get_ylim())
    
    

In [None]:
# plot error for increasing numper of FPCA components

this_ds = samples[0]
all_err = list()
all_q = range(4,10)

for k in range(len(all_q)):
    
    fpca,_ = get_fpca(n_components=all_q[k], style=style)
    fpca.fit(this_ds)

    print("Data shape after FPCA transformation: ", fpca.transform(this_ds).shape)
    # reconstruct ds from FPCA
    recov_ds = fpca.inverse_transform(fpca.transform(this_ds))
    this_err = l2_distance(this_ds,recov_ds) / l2_norm(this_ds)

    all_err.append(np.median(this_err))
    
plt.figure()
plt.plot(all_q, all_err, c='darkgray', lw = 4, marker='p', markersize=9, markeredgecolor='k')
plt.xlabel('Number of FPCA components')
plt.ylabel('Median l2 error')
plt.grid(ls = '-', lw = .5) 

In [None]:
this_ds.data_matrix.shape

## Fourth variable construction

We want to create a fourth variable that is constructed using the first variable $v_1$ as follows: we do FPCA, then permute the coefficients and then retransform to a time series, but with the basis not permuted. This creates a variable with correlated singular values (wrt FPCA), but where the relation is less obvious if only looking at the time series.

In [None]:
# from now on use 8 components
n_comp = 8

In [None]:
v0_ds = samples[0]
fpca,_ = get_fpca(n_components=n_comp, style=style)
fpca.fit(v0_ds)

Z = fpca.transform(v0_ds)

# permute singular values
_perm = np.random.permutation(n_comp)
Z = Z[:,_perm]

# manual retransform (see: https://github.com/GAA-UAM/scikit-fda/blob/ab9da5c17522ad1b4ef7493bd908d0f1de6f2aec/skfda/preprocessing/dim_reduction/_fpca.py#L543)
if True: #style is None:
    W = fpca.components_.data_matrix
    additional_args = {"data_matrix": np.einsum('nc,c...->n...', Z, W,),}
else:
    W = fpca.components_.coefficients
    additional_args = {"coefficients": Z @ W,}

v3_ds = fpca.mean_.copy(**additional_args, sample_names=(None,) * len(Z),) + fpca.mean_



In [None]:
fpca.transform(v3_ds)

In [None]:
n_var += 1

# add new time series
samples.append(v3_ds)
_lb3 = np.array([f'v3'] * n_samples)
labels.append(_lb3)
colors.append('steelblue')

# plot again
fig, axs = plt.subplots(n_var,1, figsize=(10,12))
for j in range(n_var):
    ax = axs.ravel()[j]
    samples[j].plot(axes=ax, group=labels[j], group_colors={f'v{j}': colors[j]}, alpha=0.2, lw=0.95)
    ax.set_title(f'Sampled time series for v{j}')

## Functional Graphical Lasso

Now, do for each variable a FPCA with $n_{comp}$ components. Then, concatenate the FPCA coefficients for each sample and compute correlations. With this, we are ready to compute Functional Graphical Lasso.

If we choose $n_{comp}=8$, we expect $3\cdot8=24$ variables in total.

In [None]:
from gglasso.helper.basic_linalg import scale_array_by_diagonal

all_traf = list()

# fit FPCA for first dataset!
fpca,_ = get_fpca(n_components=n_comp, style=style)
fpca.fit(samples[0])
    
# compute FPCA coefficients for each variable
# use fitted object and only transform
for j in range(n_var):
    this_ds = samples[j]
    _traf = fpca.transform(this_ds)
    
    # for second variable, permute fpca components in order to get off-diagonal correlation wrt first variable
    #if j==1:
    #    _perm = np.random.permutation(n_comp)
    #    _traf = _traf[:,_perm]
        
    all_traf.append(_traf)
    
fpca_samples = np.hstack(all_traf)
print("(N,p) = ", fpca_samples.shape)

# compute covariances
S = np.cov(fpca_samples.T)
# scale to correlations
S = scale_array_by_diagonal(S)

print("S has shape ", S.shape)



In [None]:
# plot samples
fig, ax = plt.subplots()
sns.heatmap(fpca_samples, ax=ax, alpha=1, vmin=-10, vmax=10, cmap='coolwarm',cbar=True)
ax.vlines([(j+1)*n_comp for j in range(n_var)], 0, n_samples, color='k', lw=4)

# plot heatmap
fig, ax = plt.subplots()
sns.heatmap(S, ax=ax, cmap="coolwarm", vmax=1, vmin=-1, alpha=1.)
ax.hlines([(j+1)*n_comp for j in range(n_var)], 0, n_var*n_comp, color='k', lw=3)
ax.vlines([(j+1)*n_comp for j in range(n_var)], 0, n_var*n_comp, color='k', lw=3)


In [None]:
from gglasso.helper.utils import lambda_max_fsgl
from gglasso.solver.functional_sgl_admm import ADMM_FSGL

p = n_var
M = n_comp
pM = p*M

lambda_max = lambda_max_fsgl(S, M)
lambda_min = 0.1 * lambda_max

lambda_range = np.logspace(np.log10(lambda_min), np.log10(lambda_max), 10)[::-1]

In [None]:
lambda_range

In [None]:
def plot_fsgl_heatmap(sol, p, M, ax=None):
    
    pM = p*M
    
    if ax is None:
        fig, ax = plt.subplots()

    _v = 0.5 * np.abs(sol['Theta']).max()

    sns.heatmap(sol['Theta'], cmap="coolwarm", vmin=-_v, vmax=_v, ax = ax)
    ax.hlines([(j+1)*M for j in range(p)], 0, pM, color='k', lw=3)
    ax.vlines([(j+1)*M for j in range(p)], 0, pM, color='k', lw=3)

    return 

In [None]:
Omega_0 = np.eye(pM)
all_sol = dict()

for j in range(len(lambda_range)):
    _lam = lambda_range[j]
    
    sol, info = ADMM_FSGL(S, _lam, M, Omega_0,
                         rho=1., max_iter=100, tol=1e-7, rtol=1e-6,\
                         update_rho=True, verbose=True, measure=True, latent=False, mu1=None)
    
    Omega_0 = sol['Omega']
    all_sol[_lam] = sol.copy()

In [None]:
fig, axs = plt.subplots(len(lambda_range),1, figsize=(11,100))

for j in range(len(lambda_range)):
    _lam = lambda_range[j]

    ax = axs[j]
    plot_fsgl_heatmap(all_sol[_lam], p, M, ax=ax)
    ax.set_title(f"Functional Graphical Lasso for lambda={_lam}")