# Forward-backward splitting for time-varying graphical lasso
This notebook shows how to minimise the time-varying graphical lasso with element-wise penalty norms across time-points.

First of all, as always, let's create a bunch of data.
For this task, we generate eah variable to change according to a certain behaviour which can be described as evolution via tigonometric functions, such as `sin` and `cos`.

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from scipy.spatial.distance import squareform
from regain import datasets, utils

from sklearn.datasets import load_iris
from sklearn.svm import SVC 
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, ShuffleSplit

from skopt.searchcv import BayesSearchCV
from skopt.space import Real, Categorical, Integer

In [2]:
# np.random.seed(7)

# fs = 10e3
# N = 100
# amp = 2*np.sqrt(2)
# freq = 1.0
# noise_power = 0.001 * fs / 2
# time = np.arange(N) / fs
# z = amp*np.sin(2*np.pi*freq*time)
# z += np.random.normal(scale=np.sqrt(noise_power), size=time.shape)
# plt.plot(z);

In [3]:
# T = 4

# x = np.tile(np.linspace(0, T-1, T), (n_interactions, 1))
# zz = amp * signal.square(2 * np.pi * freq * x + phase, duty=.5)
# plt.plot(x.T, zz.T);

Generate the data starting from the inverse covariance matrices.

In [4]:
np.random.seed(7)

n_samples = 100
n_dim_obs = 10
T = 10

reload(datasets)
data = datasets.make_dataset(n_samples=n_samples, n_dim_obs=n_dim_obs, n_dim_lat=0, T=T,
                             time_on_axis='last',
                             mode='sin', shape='square', closeness=2.4, normalize=1)

# plt.step(np.array([squareform(y, checks=None) for y in data.thetas]), '-|');
# plt.savefig("/home/fede/Dropbox/Latent variables networks/forward backward time varying graphical lasso/smooth_signal.pdf")

### Let's run 

In [5]:
X = data.data
X_tr, X_ts = train_test_split(X)

In [6]:
from regain import update_rules; reload(update_rules);
from regain.forward_backward import time_graph_lasso_; reload(time_graph_lasso_)

tglfb = time_graph_lasso_.TimeGraphLassoForwardBackward(
    verbose=0, gamma=1, alpha='max', beta=1.5, eps=.9, delta=.00001, choose='gamma',
    lamda=1, tol=1e-2,
    time_norm=1, max_iter=200, time_on_axis='last').fit(X)

In [7]:
tglfb.n_iter_

130

In [8]:
utils.structure_error(data.thetas, tglfb.precision_, no_diagonal=0, thresholding=1, eps=1e-5)

{'accuracy': 0.664,
 'average_precision': 0.9355807255228603,
 'balanced_accuracy': 0.585,
 'dor': 11.493827160493826,
 'f1': 0.7777777777777777,
 'fall_out': 0.81,
 'false_omission_rate': 0.13636363636363635,
 'fdr': 0.35526315789473684,
 'fn': 12,
 'fp': 324,
 'miss_rate': 0.02,
 'nlr': 0.10526315789473684,
 'npv': 0.8636363636363636,
 'plr': 1.2098765432098764,
 'precision': 0.6447368421052632,
 'prevalence': 0.6,
 'recall': 0.98,
 'specificity': 0.19,
 'tn': 76,
 'tp': 588}

In [241]:
import sys; sys.path.append("/home/fede/src/TVGL")
import inferGraphL1; reload(inferGraphL1)
import TVGL; reload(TVGL)
thetaSet, empCovSet, status, gvx = TVGL.TVGL(
    np.vstack(X.transpose(2,0,1)), X.shape[0],
    lamb=tglfb.alpha, beta=tglfb.beta, indexOfPenalty=1, verbose=True)

Use l-1 penalty function
10
lambda = 1.3773705360453294, beta = 1
Distributed ADMM (16 processors)
Iteration 1
  r: 0.7234802350294842
  e_pri: 0.030767965693643255
  s: 12.050617227209594
  e_dual: 0.04025832691051104
Iteration 2
  r: 1.006297146544303
  e_pri: 0.03378169590090212
  s: 5.0687060352172635
  e_dual: 0.041456626786197964
Iteration 3
  r: 1.007733044724051
  e_pri: 0.03532169184335626
  s: 2.647133936423939
  e_dual: 0.04256741780002595
Iteration 4
  r: 0.8882457223708263
  e_pri: 0.03614127892484979
  s: 1.4748141206775707
  e_dual: 0.0434814482750229
Iteration 5
  r: 0.7260282410604402
  e_pri: 0.03657725897522541
  s: 0.8717684270282581
  e_dual: 0.0441799685376243
Iteration 6
  r: 0.5660178699335849
  e_pri: 0.036808159917789116
  s: 0.5610100053611744
  e_dual: 0.04468031438251593
Iteration 7
  r: 0.4318629150959403
  e_pri: 0.03693105053388051
  s: 0.3888977780725584
  e_dual: 0.04501653639339101
Iteration 8
  r: 0.33200926730834357
  e_pri: 0.03699718726591823
  s:

In [242]:
gvx.n_iter_

52

In [236]:
utils.structure_error(data.thetas, np.array(thetaSet), no_diagonal=0, thresholding=1, eps=1e-5)

{'accuracy': 0.5,
 'average_precision': 0.5555555555555556,
 'balanced_accuracy': 0.5833333333333334,
 'dor': 0.0,
 'f1': 0.2857142857142857,
 'fall_out': 0.0,
 'false_omission_rate': 0.5555555555555556,
 'fdr': 0.0,
 'fn': 500,
 'fp': 0,
 'miss_rate': 0.8333333333333334,
 'nlr': 0.8333333333333334,
 'npv': 0.4444444444444444,
 'plr': 0,
 'precision': 1.0,
 'prevalence': 0.6,
 'recall': 0.16666666666666666,
 'specificity': 1.0,
 'tn': 400,
 'tp': 100}

In [10]:
from regain.utils import positive_definite
positive_definite(tglfb.precision_)

True

In [13]:
import sys; sys.path.append("/home/fede/src/TVGL")
import TVGL

def hallac_results(data_grid, K, K_obs, ells, beta, alpha, penalty=2):
#     with suppress_stdout():
    tic = time.time()
    thetaSet, empCovSet, status, gvx = TVGL.TVGL(
        np.vstack(data_grid.transpose(2,0,1)), data_grid.shape[0], lamb=alpha, beta=beta,
        indexOfPenalty=penalty)
    tac = time.time()

    if status != "Optimal":
        print ("not converged")
    precisions = np.array(thetaSet)
    ss = utils.structure_error(K, precisions)
    MSE_observed = None
    MSE_precision = utils.error_norm(K, precisions)
    MSE_latent = None
    mean_rank_error = None

    res = dict(n_dim_obs=K.shape[1],
               time=tac-tic,
               iterations=gvx.n_iter_,
               MSE_precision=MSE_precision,
               MSE_observed=MSE_observed,
               MSE_latent=MSE_latent,
               mean_rank_error=mean_rank_error,
               note=status,
               estimator=gvx)
    res = dict(res, **ss)

    return res

### BayesOptimisation
Since we have lots of hyper-parameters, we rely on a Bayesian optimisation procedure in order to select the best hyper-parameters, treating the scoring function of our algorithm as a black-box for the gaussian process underlying the Bayesian optimisation.

Such procedure is performed via the `scikit-optimize` package.

In [None]:
from regain import utils; reload(utils)
from regain import prox; reload(prox);
reload(time_graph_lasso_)

from skopt import searchcv; reload(searchcv)

domain = {'alpha': Real(1e-1, 1e2, prior='log-uniform'),
          'beta': Real(1e-1, 1e2, prior='log-uniform'),
#           'time_norm': Categorical([1, 2])
          'eps': Real(0.1, 0.9, prior='uniform')
         }

mdl = time_graph_lasso_.TimeGraphLassoForwardBackward(
    verbose=0, tol=1e-2, max_iter=300, gamma=1, alpha='max', beta=1, time_norm=1,
    time_on_axis='last', eps=0.9, delta=1e-5, choose='gamma')
    
cv = ShuffleSplit(7, test_size=0.2)
    
bscv = searchcv.BayesSearchCV(
    mdl, domain, n_iter=200, cv=cv, verbose=1, n_jobs=-1, iid=True, n_points=3,
    error_score=-np.inf)

# callback handler
def on_step(optim_result):
    score = bscv.best_score_
    print("best score: %s" % score)
#     if score >= 0.98:
#         print('Interrupting!')
#         return True

bscv.fit(X, callback=on_step)
# mdl.fit(data_grid)

Fitting 7 folds for each of 3 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  12 out of  21 | elapsed:    1.7s remaining:    1.3s


best score: -66.22080876097317
Fitting 7 folds for each of 3 candidates, totalling 21 fits


[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:    4.8s finished
  tolerance = delta * squared_norm(iter_diff) / (gamma * lamda)
  subgrad = (x_hat - K) / gamma
  res_norm = norm(K - k_previous) / gamma
[Parallel(n_jobs=-1)]: Done  12 out of  21 | elapsed:    6.5s remaining:    4.9s


In [218]:
utils.structure_error(data.thetas, mdl.precision_, no_diagonal=0, thresholding=1, eps=1e-8)

{'accuracy': 0.55696,
 'average_precision': 0.6493819873359272,
 'balanced_accuracy': 0.5080440514116944,
 'dor': 1.1194835525548184,
 'f1': 0.6843365253077975,
 'fall_out': 0.8196351325935678,
 'false_omission_rate': 0.5516596540439458,
 'fdr': 0.4206157706785059,
 'fn': 2360,
 'fp': 8716,
 'miss_rate': 0.1642767645830433,
 'nlr': 0.9108024580688646,
 'npv': 0.4483403459560542,
 'plr': 1.0196283714345935,
 'precision': 0.579384229321494,
 'prevalence': 0.57464,
 'recall': 0.8357232354169567,
 'specificity': 0.1803648674064322,
 'tn': 1918,
 'tp': 12006}

In [217]:
mdl = ltgl.best_estimator_

In [132]:
mdl.score(data_grid)

-648.3521475107405

In [113]:
ltgl.cv_results_

defaultdict(list,
            {'mean_fit_time': [59.08424797058105,
              66.17720727920532,
              66.97275977134704,
              64.39573440551757,
              61.84775342941284,
              59.56634469032288,
              63.95092988014221,
              2507.8452457904814,
              65.97609577178955,
              66.9068528175354,
              712.355920791626,
              1271.9847212791442,
              1444.4501823425294,
              1413.6207597732543,
              1554.165789794922,
              1777.535684633255,
              2018.0050109863282,
              1872.2741337776183,
              2012.9741005420685,
              1825.9705711841584,
              2219.35692782402,
              1189.926883840561,
              1956.9852410316466,
              1975.1635250091554,
              2616.8063933849335,
              1731.3013670444489,
              2541.313151407242,
              1935.2838866233826,
              1868.329342603683

In [125]:
ltgl.best_params_

{'alpha': 2.1945375330861676, 'beta': 0.17319499438515196}

In [115]:
mdl.precision_

array([[[ 1.69545767, -0.06819919,  0.05624769, ...,  0.00866332,
         -0.06240769,  0.09895316],
        [-0.06819919,  1.59819469, -0.08103202, ..., -0.00466499,
          0.        , -0.08148264],
        [ 0.05624769, -0.08103202,  1.41736224, ...,  0.07821061,
          0.08692673,  0.        ],
        ...,
        [ 0.00866332, -0.00466499,  0.07821061, ...,  1.58992992,
         -0.03837632, -0.0036484 ],
        [-0.06240769,  0.        ,  0.08692673, ..., -0.03837632,
          1.12858971,  0.        ],
        [ 0.09895316, -0.08148264,  0.        , ..., -0.0036484 ,
          0.        ,  1.73790145]],

       [[ 1.63205302,  0.02653864,  0.01943224, ...,  0.        ,
          0.        ,  0.02423671],
        [ 0.02653864,  1.16472631, -0.02983537, ..., -0.00466499,
          0.08179029,  0.        ],
        [ 0.01943224, -0.02983537,  1.09253107, ...,  0.07670778,
          0.02475099, -0.07210559],
        ...,
        [ 0.        , -0.00466499,  0.07670778, ...,  

### GridSearchCV
As for the hyper-parameters tuning, one may choose to fix a grid of parameters and select the best ones.
For this we can use `GridSearchCV`, from the `scikit-learn` library.

In [None]:
# data_grid = np.array(data.data).transpose(1,2,0)
param_grid=dict(alpha=np.logspace(-2,0,3), beta=np.logspace(-2,0,3), gamma=np.logspace(-2, 0, 3),
               time_norm=[1, 2])

mdl = time_graph_lasso_.TimeGraphLassoForwardBackward(
    verbose=0, time_on_axis='last')
    
cv = ShuffleSplit(2, test_size=0.2)
ltgl = GridSearchCV(mdl, param_grid, cv=cv, verbose=1)
ltgl.fit(data_grid)