# Forward-backward splitting for time-varying graphical lasso
This notebook shows how to minimise the time-varying graphical lasso with element-wise penalty norms across time-points.

First of all, as always, let's create a bunch of data.
For this task, we generate eah variable to change according to a certain behaviour which can be described as evolution via tigonometric functions, such as `sin` and `cos`.

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from scipy.spatial.distance import squareform
from regain import datasets, utils

from sklearn.datasets import load_iris
from sklearn.svm import SVC 
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, ShuffleSplit

from skopt.searchcv import BayesSearchCV
from skopt.space import Real, Categorical, Integer

In [2]:
# np.random.seed(7)

# fs = 10e3
# N = 100
# amp = 2*np.sqrt(2)
# freq = 1.0
# noise_power = 0.001 * fs / 2
# time = np.arange(N) / fs
# z = amp*np.sin(2*np.pi*freq*time)
# z += np.random.normal(scale=np.sqrt(noise_power), size=time.shape)
# plt.plot(z);

In [3]:
# T = 4

# x = np.tile(np.linspace(0, T-1, T), (n_interactions, 1))
# zz = amp * signal.square(2 * np.pi * freq * x + phase, duty=.5)
# plt.plot(x.T, zz.T);

Generate the data starting from the inverse covariance matrices.

In [4]:
np.random.seed(7)

# square
n_samples = 100
n_dim_obs = 10
T = 10

reload(datasets)
data = datasets.make_dataset(n_samples=n_samples, n_dim_obs=n_dim_obs, n_dim_lat=0, T=T,
                             time_on_axis='last',
                             mode='sin', shape='square', closeness=2.4, normalize=1)

# # smooth
# n_samples = 100
# n_dim_obs = 10
# T = 10

# reload(datasets)
# data = datasets.make_dataset(n_samples=n_samples, n_dim_obs=n_dim_obs, n_dim_lat=0, T=T,
#                              time_on_axis='last',
#                              mode='sin', shape='smooth', closeness=2.4, normalize=1)

# plt.step(np.array([squareform(y, checks=None) for y in data.thetas]), '-|');
# plt.savefig("/home/fede/Dropbox/Latent variables networks/forward backward time varying graphical lasso/smooth_signal.pdf")

### Let's run 

In [5]:
X = data.data
X_tr, X_ts = train_test_split(X)

In [7]:
from regain import update_rules; reload(update_rules);
from regain.forward_backward import time_graph_lasso_; reload(time_graph_lasso_)

tglfb = time_graph_lasso_.TimeGraphLassoForwardBackward(
    verbose=2, gamma=1, alpha='max', beta=2.5,
    delta=.0001, choose='gamma',
    lamda=1, tol=1e-2, eps=0.8,
    time_norm=1, max_iter=200, time_on_axis='last').fit(X)

obj: 10816.3431, rnorm: 0.0508305, snorm: 8.7482,eps_pri: 0.2535, eps_dual: 0.0100
obj: 10807.3082, rnorm: 0.0571810, snorm: 9.0349,eps_pri: 0.2533, eps_dual: 0.0100
obj: 10797.8971, rnorm: 0.0653418, snorm: 9.4111,eps_pri: 0.2531, eps_dual: 0.0100
obj: 10788.3819, rnorm: 0.0731711, snorm: 9.5152,eps_pri: 0.2528, eps_dual: 0.0100
obj: 10778.6975, rnorm: 0.0823681, snorm: 9.6843,eps_pri: 0.2525, eps_dual: 0.0100
obj: 10769.1126, rnorm: 0.0913933, snorm: 9.5849,eps_pri: 0.2522, eps_dual: 0.0100
obj: 10759.8130, rnorm: 0.1000273, snorm: 9.2996,eps_pri: 0.2518, eps_dual: 0.0100
obj: 10751.4027, rnorm: 0.1092637, snorm: 8.4103,eps_pri: 0.2514, eps_dual: 0.0100
obj: 10751.3882, rnorm: 0.1289854, snorm: 0.0145,eps_pri: 0.2509, eps_dual: 0.0100
obj: 10748.9953, rnorm: 0.1201356, snorm: 2.3929,eps_pri: 0.2507, eps_dual: 0.0100
obj: 10744.5265, rnorm: 0.1015227, snorm: 4.4687,eps_pri: 0.2504, eps_dual: 0.0100
obj: 10742.5018, rnorm: 0.0841034, snorm: 2.0247,eps_pri: 0.2504, eps_dual: 0.0100
obj:

obj: 10725.6433, rnorm: 0.0052906, snorm: 0.0063,eps_pri: 0.2468, eps_dual: 0.0100
obj: 10725.6459, rnorm: 0.0049207, snorm: 0.0026,eps_pri: 0.2467, eps_dual: 0.0100
obj: 10725.6508, rnorm: 0.0059412, snorm: 0.0049,eps_pri: 0.2467, eps_dual: 0.0100
obj: 10725.6829, rnorm: 0.0086201, snorm: 0.0321,eps_pri: 0.2467, eps_dual: 0.0100
obj: 10725.6418, rnorm: 0.0032059, snorm: 0.0411,eps_pri: 0.2467, eps_dual: 0.0100
obj: 10725.6339, rnorm: 0.0015985, snorm: 0.0079,eps_pri: 0.2467, eps_dual: 0.0100
obj: 10725.6328, rnorm: 0.0006555, snorm: 0.0011,eps_pri: 0.2467, eps_dual: 0.0100


In [8]:
tglfb.alpha

0.9773705360453293

In [9]:
tglfb.n_iter_

114

In [84]:
utils.structure_error(data.thetas, tglfb.precision_, no_diagonal=0, thresholding=0, eps=1e-5)

{'accuracy': 0.638,
 'average_precision': 0.9426096157848678,
 'balanced_accuracy': 0.5516666666666666,
 'dor': 8.045454545454545,
 'f1': 0.7652399481193256,
 'fall_out': 0.88,
 'false_omission_rate': 0.1724137931034483,
 'fdr': 0.37367303609341823,
 'fn': 10,
 'fp': 352,
 'miss_rate': 0.016666666666666666,
 'nlr': 0.1388888888888889,
 'npv': 0.8275862068965517,
 'plr': 1.1174242424242424,
 'precision': 0.6263269639065817,
 'prevalence': 0.6,
 'recall': 0.9833333333333333,
 'specificity': 0.12,
 'tn': 48,
 'tp': 590}

In [32]:
tglfb = mdl

In [21]:
import sys; sys.path.append("/home/fede/src/TVGL")
import inferGraphL1; reload(inferGraphL1)
import TVGL; reload(TVGL)
thetaSet, empCovSet, status, gvx = TVGL.TVGL(
    np.vstack(X.transpose(2,0,1)), X.shape[0],
    lamb=tglfb.alpha, beta=tglfb.beta, indexOfPenalty=1, verbose=False)

Use l-1 penalty function
10
lambda = 0.9773705360453293, beta = 2.5


In [22]:
gvx.n_iter_

52

In [24]:
thetaSet

[array([[0.86479843, 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.76847489, 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.76836952, 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.80576534, 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.84808489,
         0.        , 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.76312566, 0.        , 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.85336229, 0.        , 0.        , 0.        ],
        [0.        , 0.    

In [23]:
utils.structure_error(data.thetas, np.array(thetaSet), no_diagonal=0, thresholding=0, eps=1e-5)

{'accuracy': 0.5,
 'average_precision': 0.5555555555555556,
 'balanced_accuracy': 0.5833333333333334,
 'dor': 0.0,
 'f1': 0.2857142857142857,
 'fall_out': 0.0,
 'false_omission_rate': 0.5555555555555556,
 'fdr': 0.0,
 'fn': 500,
 'fp': 0,
 'miss_rate': 0.8333333333333334,
 'nlr': 0.8333333333333334,
 'npv': 0.4444444444444444,
 'plr': 0,
 'precision': 1.0,
 'prevalence': 0.6,
 'recall': 0.16666666666666666,
 'specificity': 1.0,
 'tn': 400,
 'tp': 100}

In [10]:
from regain.utils import positive_definite
positive_definite(tglfb.precision_)

True

### BayesOptimisation
Since we have lots of hyper-parameters, we rely on a Bayesian optimisation procedure in order to select the best hyper-parameters, treating the scoring function of our algorithm as a black-box for the gaussian process underlying the Bayesian optimisation.

Such procedure is performed via the `scikit-optimize` package.

In [15]:
from regain import utils; reload(utils)
from regain import prox; reload(prox);
from regain.forward_backward import time_graph_lasso_; reload(time_graph_lasso_)

from skopt import searchcv; reload(searchcv)

domain = {#'alpha': Real(1e-1, 1, prior='uniform'),
          'beta': Real(1e-1, 1e1, prior='log-uniform'),
#           'time_norm': Categorical([1, 2])
#           'eps': Categorical([0.5, 0.7, 0.8, 0.9])
         }

mdl = time_graph_lasso_.TimeGraphLassoForwardBackward(
    verbose=0, tol=1e-2, max_iter=200, gamma=1, alpha='max', beta=1, time_norm=1,
    time_on_axis='last', eps=0.9, delta=1e-5, choose='gamma')
    
cv = ShuffleSplit(5, test_size=0.2)
    
bscv = searchcv.BayesSearchCV(
    mdl, domain, n_iter=100, cv=cv, verbose=1, n_jobs=1, iid=True, n_points=3,
    error_score=-3e5)

# callback handler
def on_step(optim_result):
    score = bscv.best_score_
    print("best score: %s" % score)
#     if score >= 0.98:
#         print('Interrupting!')
#         return True

bscv.fit(X, callback=on_step)
# mdl.fit(data_grid)

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   27.8s finished


best score: -149.86907082331908
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   21.6s finished


best score: -149.03756300100068
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   25.1s finished


best score: -149.03756300100068
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   23.8s finished


best score: -149.03756300100068
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   29.1s finished


best score: -149.03756300100068
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   20.3s finished


best score: -149.03756300100068
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   22.7s finished


best score: -149.03756300100068
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   28.3s finished


best score: -149.03756300100068
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   27.0s finished


best score: -145.66565643010628
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   27.5s finished


best score: -145.66565643010628
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   30.1s finished


best score: -145.66565643010628
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   21.6s finished


best score: -145.4031372874342
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   29.3s finished


best score: -145.4031372874342
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   30.2s finished


best score: -145.4031372874342
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   24.2s finished


best score: -145.20749661564454
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   29.7s finished


best score: -145.20749661564454
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   28.9s finished


best score: -145.20749661564454
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   17.3s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   30.5s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   28.9s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   28.3s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   29.3s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   30.6s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   30.4s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   28.8s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   22.6s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   30.1s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   30.2s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   29.8s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   29.8s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   30.1s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   30.1s finished


best score: -142.84519173384504
Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:   28.8s finished


best score: -142.84519173384504
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    9.9s finished


best score: -142.84519173384504


BayesSearchCV(cv=ShuffleSplit(n_splits=5, random_state=None, test_size=0.2, train_size=None),
       error_score=-300000.0,
       estimator=TimeGraphLassoForwardBackward(alpha='max', assume_centered=False, beta=1,
               choose='gamma', compute_objective=True, delta=1e-05,
               eps=0.9, gamma=1, lamda=1, lamda_criterion='b',
               max_iter=200, time_norm=1, time_on_axis='last', tol=0.01,
               verbose=0),
       fit_params=None, iid=True, n_iter=100, n_jobs=1, n_points=3,
       optimizer_kwargs=None, pre_dispatch='2*n_jobs', random_state=None,
       refit=True, return_train_score=False, scoring=None,
       search_spaces={'beta': Real(low=0.1, high=10.0, prior='log-uniform', transform='identity')},
       verbose=1)

In [19]:
bscv.best_params_

{'beta': 0.5977079011073209}

In [17]:
mdl = bscv.best_estimator_

In [20]:
utils.structure_error(data.thetas, mdl.precision_, no_diagonal=0, thresholding=1, eps=1e-8)

{'accuracy': 0.646,
 'average_precision': 0.9252520246848773,
 'balanced_accuracy': 0.5625,
 'dor': 8.309941520467834,
 'f1': 0.7686274509803921,
 'fall_out': 0.855,
 'false_omission_rate': 0.17142857142857143,
 'fdr': 0.36774193548387096,
 'fn': 12,
 'fp': 342,
 'miss_rate': 0.02,
 'nlr': 0.13793103448275865,
 'npv': 0.8285714285714286,
 'plr': 1.1461988304093567,
 'precision': 0.632258064516129,
 'prevalence': 0.6,
 'recall': 0.98,
 'specificity': 0.145,
 'tn': 58,
 'tp': 588}

In [17]:
mdl.score(X)

-35.837641703814484

In [18]:
mdl.precision_

array([[[ 1.10743819,  0.        ,  0.68071616, -0.05748638,
          0.94732079,  0.        ,  0.20918433,  0.        ,
          1.78611225,  1.85242287],
        [ 0.        ,  1.07193912, -0.50367029,  1.92016201,
          0.21529756,  1.59502318,  2.31934029,  0.42497577,
          0.31269722,  1.75750906],
        [ 0.68071616, -0.50367029,  1.06188303,  1.68245203,
         -0.72654564,  3.31609044,  0.70047032,  1.23391116,
          0.55243893,  1.10080506],
        [-0.05748638,  1.92016201,  1.68245203,  1.13201969,
          4.03359096,  0.66731923, -0.58454924,  0.        ,
          1.06881747,  0.        ],
        [ 0.94732079,  0.21529756, -0.72654564,  4.03359096,
          1.16540114,  0.77208652,  1.10715018,  0.        ,
         -0.14574355,  0.2445326 ],
        [ 0.        ,  1.59502318,  3.31609044,  0.66731923,
          0.77208652,  1.0764689 ,  0.20839135, -1.00066514,
          1.02100579,  1.43229819],
        [ 0.20918433,  2.31934029,  0.70047032, -0.5

### GridSearchCV
As for the hyper-parameters tuning, one may choose to fix a grid of parameters and select the best ones.
For this we can use `GridSearchCV`, from the `scikit-learn` library.

In [None]:
# data_grid = np.array(data.data).transpose(1,2,0)
param_grid=dict(alpha=np.logspace(-2,0,3), beta=np.logspace(-2,0,3), gamma=np.logspace(-2, 0, 3),
               time_norm=[1, 2])

mdl = time_graph_lasso_.TimeGraphLassoForwardBackward(
    verbose=0, time_on_axis='last')
    
cv = ShuffleSplit(2, test_size=0.2)
ltgl = GridSearchCV(mdl, param_grid, cv=cv, verbose=1)
ltgl.fit(data_grid)