In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import gaussian_kde

from skfda.datasets import make_gaussian_process
from skfda.misc.covariances import *
from skfda.misc.metrics import l2_distance, l2_norm
from skfda.preprocessing.dim_reduction.feature_extraction import FPCA
from skfda.representation.basis import BSpline, Fourier

In [None]:
time_range = (0,25)

def get_basis(style, n_components):
    if style == 'bspline':
        basis = BSpline(n_basis=n_components, order=4)
    elif style == 'fourier':
        basis = Fourier(domain_range=time_range, n_basis=n_components, period=None)
    return basis

# decide on a basis
style = 'bspline'

Idea: create three covariance functions and sample Gaussian processes. Then transform into a chosen basis representation and compute Functional Graphical Lasso. Let the variables be called $v_0$, $v_1$ and $v_2$. 

In [None]:
n_var = 3
n_ts = 100
n_samples = 10**3

# define three variables
cov1 = Gaussian(variance=0.5, length_scale=8.0)
cov2 = Gaussian(variance=0.5, length_scale=4.0)
cov3 = Brownian(variance=0.2)

all_cov = [cov1, cov2, cov3]

In [None]:
# plot covariance functions
_ = cov1.heatmap()
_ = cov2.heatmap()
_ = cov3.heatmap()

In [None]:
# sample gaussian processes
samples = list()
labels = list()

for j in np.arange(n_var):

    _ds = make_gaussian_process(
            n_samples=n_samples,
            n_features=n_ts,
            start=time_range[0],
            stop=time_range[1],
            cov=all_cov[j],
            random_state=20
            )
    
    _lb = np.array([f'v{j}'] * n_samples)
    
    samples.append(_ds)
    labels.append(_lb)

In [None]:
# plot sampled time series

colors = ['darkred', 'C1', 'grey']

fig, axs = plt.subplots(n_var,1, figsize=(10,12))

for j in range(n_var):
    ax = axs.ravel()[j]
    samples[j].plot(axes=ax, group=labels[j], group_colors={f'v{j}': colors[j]}, alpha=0.2, lw=0.95)
    ax.set_title(f'Sampled time series for v{j}')

In [None]:
# do basis transformation, reconstruct and plot deviation
q = 7
fig, axs = plt.subplots(n_var, 2, figsize=(20,12))

for j in range(n_var):
    basis = get_basis(style, q)
    traf = samples[j].to_basis(basis)
        
    # reconstruct ds from FPCA
    recov_ds = traf.to_grid(samples[j].grid_points)
    
    # reconvert to grid if necessary
    diff = samples[j] - recov_ds
        
    ax = axs[j,0]
    recov_ds.plot(axes=ax, group=labels[j], group_colors={f'v{j}': colors[j]}, alpha=0.3, lw=0.95)
    ax.set_title(f'Basis representation - recomputed time series for v{j}')
    
    ax2 = axs[j,1]
    diff.plot(axes=ax2, group=labels[j], group_colors={f'v{j}': colors[j]}, alpha=0.3, lw=0.95)
    ax2.set_title(f'Recovery error for v{j}')
    ax2.set_ylim(ax.get_ylim())
    
    

In [None]:
traf

## Fourth variable construction

We want to create a fourth variable that is constructed using the first variable $v_1$ as follows: we do FPCA, then permute the coefficients and then retransform to a time series, but with the basis not permuted. This creates a variable with correlated singular values (wrt FPCA), but where the relation is less obvious if only looking at the time series.

In [None]:
# from now on use 9 components
n_comp = 9

In [None]:
basis = get_basis(style, n_comp)
_traf = samples[0].to_basis(basis)   

rng = np.random.default_rng(1917)
_perm = rng.permutation(n_comp)

# permute
_traf.coefficients = _traf.coefficients[:, _perm]

# retransform to time series
v3_ds = _traf.to_grid(samples[0].grid_points)

In [None]:
n_var += 1

# add new time series
samples.append(v3_ds)
_lb3 = np.array([f'v3'] * n_samples)
labels.append(_lb3)
colors.append('steelblue')

# plot again
fig, axs = plt.subplots(n_var,1, figsize=(10,12))
for j in range(n_var):
    ax = axs.ravel()[j]
    samples[j].plot(axes=ax, group=labels[j], group_colors={f'v{j}': colors[j]}, alpha=0.2, lw=0.95)
    ax.set_title(f'Sampled time series for v{j}')

In [None]:
# plot basis functions
fig, ax = plt.subplots(figsize=(10,5))
basis = get_basis(style, n_comp)
basis.plot(axes=ax)

fig.suptitle("Plot of the basis functions")

## Reconstruction error

In [None]:
# plot error for increasing numper of FPCA components
fig, ax = plt.subplots()
    
for j in range(n_var):
    all_err = list()
    
    if style != 'fourier':
        all_q = range(4,12)
    else:
        all_q = [5,7,9,11,13] # Fourier has always odd number of basis elements
        
    for k in range(len(all_q)):
        basis = get_basis(style, all_q[k])
        this_traf = samples[j].to_basis(basis)
        
        # reconstruct ds from FPCA
        recov_ds = this_traf.to_grid(samples[j].grid_points)
    
        # compute error
        this_err = l2_distance(samples[j], recov_ds) / l2_norm(samples[j])
        all_err.append(np.median(this_err))

   
    ax.plot(all_q, all_err, c=colors[j], lw = 4, marker='p', markersize=10, markeredgecolor='k', alpha=0.8, label=f'v{j}')
    
ax.set_xlabel('Number of FPCA components')
ax.set_ylabel('Median l2 error')
ax.grid(ls = '-', lw = .5) 
ax.legend()

## Functional Graphical Lasso

Now, do for each variable a FPCA with $n_{comp}$ components. Then, concatenate the FPCA coefficients for each sample and compute correlations. With this, we are ready to compute Functional Graphical Lasso.

If we choose $n_{comp}=9$, we expect $4\cdot9=36$ variables in total.

In [None]:
from gglasso.helper.basic_linalg import scale_array_by_diagonal
from gglasso.helper.utils import lambda_max_fsgl, frob_norm_per_block
from gglasso.solver.functional_sgl_admm import ADMM_FSGL

all_traf = list()
    
# compute FPCA coefficients for each variable
for j in range(n_var):
    basis = get_basis(style, n_comp)
    _traf = samples[j].to_basis(basis)
    
    all_traf.append(_traf.coefficients)
    
fpca_samples = np.hstack(all_traf)
print("(N,p) = ", fpca_samples.shape)

# compute covariances
S = np.cov(fpca_samples.T)
# scale to correlations
S = scale_array_by_diagonal(S)

print("S has shape ", S.shape)

In [None]:
# plot samples
fig, ax = plt.subplots()
sns.heatmap(fpca_samples, ax=ax, alpha=1, vmin=-5, vmax=5, cmap='coolwarm',cbar=True)
ax.vlines([(j+1)*n_comp for j in range(n_var)], 0, n_samples, color='k', lw=4)

# plot heatmap
fig, ax = plt.subplots()
_v = 1.
sns.heatmap(S, ax=ax, cmap="coolwarm", vmax=_v, vmin=-_v, alpha=1.)
ax.hlines([(j+1)*n_comp for j in range(n_var)], 0, n_var*n_comp, color='k', lw=3)
ax.vlines([(j+1)*n_comp for j in range(n_var)], 0, n_var*n_comp, color='k', lw=3)

# plot Frobenius norm of each subblock
fig, ax = plt.subplots()
sns.heatmap(np.round(frob_norm_per_block(S, n_comp),3), annot=True, ax = ax)

In [None]:
p = n_var
M = n_comp
pM = p*M

lambda_max = lambda_max_fsgl(S, M)
lambda_min = 0.1 * lambda_max

lambda_range = np.logspace(np.log10(lambda_min), np.log10(lambda_max), 10)[::-1]

In [None]:
lambda_range

In [None]:
np.abs(S).max()

In [None]:
from matplotlib.colors import LogNorm, Normalize

def plot_fsgl_heatmap(Omega, p, M, ax=None):
    
    pM = p*M
    
    if ax is None:
        fig, ax = plt.subplots()
    
    _v = 0.1*np.abs(Omega).max()
    
    sns.heatmap(Omega, cmap="coolwarm", vmin=-_v, vmax=_v, ax = ax)
    
    ax.hlines([(j+1)*M for j in range(p)], 0, pM, color='k', lw=3)
    ax.vlines([(j+1)*M for j in range(p)], 0, pM, color='k', lw=3)

    return 

In [None]:
Omega_0 = np.eye(pM)
all_sol = dict()

for j in range(len(lambda_range)):
    _lam = lambda_range[j]
    
    sol, info = ADMM_FSGL(S, _lam, M, Omega_0,
                         rho=1., max_iter=1000, tol=1e-7, rtol=1e-7,\
                         update_rho=True, verbose=False, measure=True, latent=False, mu1=None)
    
    Omega_0 = sol['Omega'].copy()
    all_sol[_lam] = sol.copy()

In [None]:
fig, axs = plt.subplots(len(lambda_range), figsize=(7,50))

fig2, axs2 = plt.subplots(len(lambda_range), figsize=(5,25))

for j in range(len(lambda_range)):
    _lam = lambda_range[j]

    ax = axs[j]
    plot_fsgl_heatmap(all_sol[_lam]['Theta'], p, M, ax=ax)
    ax.set_title(f"Functional Graphical Lasso for lambda={_lam}")
    
    ax = axs2[j]
    sns.heatmap(np.round(frob_norm_per_block(all_sol[_lam]['Theta'], M),2), vmin=0, vmax=.1, ax = ax, annot=True)
    ax.set_title(f"lambda={_lam}")

In [None]:
Omega = all_sol[lambda_range[1]]['Theta']

sns.heatmap(Omega, cmap="coolwarm")    