## Forward-backward splitting for time-varying graphical lasso
This notebook shows how to minimise the time-varying graphical lasso with element-wise penalty norms across time-points.

First of all, as always, let's create a bunch of data.
For this task, we generate eah variable to change according to a certain behaviour which can be described as evolution via tigonometric functions, such as `sin` and `cos`.

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from scipy.spatial.distance import squareform
from scipy import signal
from regain import datasets, utils

from sklearn.datasets import load_iris
from sklearn.svm import SVC 
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, ShuffleSplit

from skopt.searchcv import BayesSearchCV
from skopt.space import Real, Categorical, Integer

In [None]:
np.random.seed(7)

fs = 10e3
N = 100
amp = 2*np.sqrt(2)
freq = 1.0
noise_power = 0.001 * fs / 2
time = np.arange(N) / fs
z = amp*np.sin(2*np.pi*freq*time)
z += np.random.normal(scale=np.sqrt(noise_power), size=time.shape)
plt.plot(z);

In [None]:
# T = 4

# x = np.tile(np.linspace(0, T-1, T), (n_interactions, 1))
# zz = amp * signal.square(2 * np.pi * freq * x + phase, duty=.5)
# plt.plot(x.T, zz.T);

In [None]:
np.random.seed(7)
n_samples = 10
n_dim_obs = 4
n_dim_lat = 0  # we want to use the TGL, without latent variables
T = 5

upper_idx = np.triu_indices(n_dim_obs, 1)
n_interactions = len(upper_idx[0])

x = np.tile(np.linspace(0, T-1, T), (n_interactions, 1))
phase = np.random.rand(n_interactions, 1)
freq = np.random.rand(n_interactions, 1) - .70
A = np.random.rand(n_interactions, 1)
y = A * np.sin(2. * np.pi * freq * x + phase)

# threshold
y = np.maximum(y, 0)

plt.plot(x.T, y.T);

In [None]:
Y = np.array([squareform(y[:, j]) + np.eye(n_dim_obs) for j in range(y.shape[1])])
assert utils.positive_definite(Y)

In [None]:
from sklearn.gaussian_process import kernels
ker = np.array([kernels.RBF(length_scale=.2)(y[:, j][:, None]) for j in range(y.shape[1])])

Generate the data starting from the inverse covariance matrices.

In [None]:
np.random.seed(7)

n_samples = 1000
n_dim_obs = 5
T = 10

def make_sin(n_dim_obs, n_dim_lat, T, shape='smooth', closeness=1):
    upper_idx = np.triu_indices(n_dim_obs, 1)
    n_interactions = len(upper_idx[0])
    x = np.tile(np.linspace(0, (T-1.) / closeness, T), (n_interactions, 1))
    phase = np.random.rand(n_interactions, 1)
    freq = np.random.rand(n_interactions, 1) - .50
    A = (np.random.rand(n_interactions, 1) + 1) / 2.
    
    if shape == 'smooth':
        y = A * np.sin(2. * np.pi * freq * x + phase)
    else:
        y = A * signal.square(2 * np.pi * freq * x + phase, duty=.5)

    # threshold
    y = np.maximum(y, 0)

    Y = np.array([squareform(y[:, j]) + 2 * np.eye(n_dim_obs) for j in range(y.shape[1])]) / 2.

    assert utils.positive_definite(Y)
    return Y, Y, np.zeros_like(Y)

data = datasets.make_dataset(n_samples=n_samples, n_dim_obs=n_dim_obs, n_dim_lat=0, T=T,
                             time_on_axis='last',
                             mode=make_sin, shape='square', closeness=2.4)

plt.plot(np.array([squareform(y, checks=None) for y in data.thetas]), '-');

### Let's run 

In [None]:
X = data.data
X_tr, X_ts = train_test_split(X)

In [None]:
from regain import update_rules; reload(update_rules);
from regain.forward_backward import time_graph_lasso_; reload(time_graph_lasso_)
tglfb = time_graph_lasso_.TimeGraphLassoForwardBackward(
    verbose=1, gamma='max', alpha='max', beta=10, eps=.8, delta=.5,
    time_norm=1, max_iter=500, time_on_axis='last').fit(X_tr)

In [None]:
tglfb.alpha

In [None]:
utils.structure_error(data.thetas, tglfb.precision_, no_diagonal=0)

### BayesOptimisation
Since we have lots of hyper-parameters, we rely on a Bayesian optimisation procedure in order to select the best hyper-parameters, treating the scoring function of our algorithm as a black-box for the gaussian process underlying the Bayesian optimisation.

Such procedure is performed via the `scikit-optimize` package.

In [None]:
from regain import utils; reload(utils)
from regain import prox; reload(prox);
reload(time_graph_lasso_)

from skopt import searchcv; reload(searchcv)
data_grid = np.array(data.data)

domain = {'alpha': Real(75, 150, prior='uniform'),
          'beta': Real(1e0, 1e2, prior='uniform'),
#           'time_norm': Categorical([1, 2])
         }

mdl = time_graph_lasso_.TimeGraphLassoForwardBackward(
    verbose=1, tol=1e-4, max_iter=10000, gamma='max', beta=20, alpha=150, time_norm=1,
    time_on_axis='last', eps=0.25, choose='gamma')
    
cv = ShuffleSplit(3, test_size=0.5)
    
ltgl = searchcv.BayesSearchCV(
    mdl, domain, n_iter=50, cv=cv, verbose=2, n_jobs=-1, iid=False, n_points=5)

# callback handler
def on_step(optim_result):
    score = ltgl.best_score_
    print("best score: %s" % score)
#     if score >= 0.98:
#         print('Interrupting!')
#         return True


# ltgl.fit(data_grid, callback=on_step)
mdl.fit(data_grid)

In [None]:
utils.structure_error(data.thetas, mdl.precision_, no_diagonal=0, thresholding=1, eps=1e-7)

In [None]:
mdl = ltgl.best_estimator_

In [None]:
mdl.n_iter_

In [None]:
mdl.score(data_grid)

In [None]:
ltgl.cv_results_

In [None]:
ltgl.best_params_

In [None]:
mdl.precision_

### GridSearchCV
As for the hyper-parameters tuning, one may choose to fix a grid of parameters and select the best ones.
For this we can use `GridSearchCV`, from the `scikit-learn` library.

In [None]:
# data_grid = np.array(data.data).transpose(1,2,0)
param_grid=dict(alpha=np.logspace(-2,0,3), beta=np.logspace(-2,0,3), gamma=np.logspace(-2, 0, 3),
               time_norm=[1, 2])

mdl = time_graph_lasso_.TimeGraphLassoForwardBackward(
    verbose=0, time_on_axis='last')
    
cv = ShuffleSplit(2, test_size=0.2)
ltgl = GridSearchCV(mdl, param_grid, cv=cv, verbose=1)
ltgl.fit(data_grid)