# Cross-validation
This notebook shows an example on how it is possible to cross-validate parameters fot LTGL.

<font color='red'><b>Note</b></font>: at the moment we rely on `GPyOpt` for Bayesian optimisation. 
If you don't have this package installed, you can choose a grid of parameters and use `GridSearchCV` from `scikit-learn`.

In [1]:
import numpy as np
import pandas as pd
import time

from sklearn.model_selection import GridSearchCV, ShuffleSplit

from regain import datasets, model_selection, utils
from regain.admm.latent_time_graph_lasso_ import LatentTimeGraphLasso

np.random.seed(20)

## 1. Prepare data set
Let's prepare a synthetic dataset which we can work on.

In [2]:
# setting 1
alpha = 0.45
tau = 3
beta = 50
eta = 10

n_samples = 100
n_dim_lat = 20
T = 10
n_dim_obs = 100

data = datasets.generate_dataset(
    mode='norm', n_samples=n_samples, n_dim_lat=n_dim_lat, n_dim_obs=n_dim_obs,  T=T, epsilon=1e-1,
    proportional=True, degree=2, keep_sparsity=True)

## 2.1 Cross-validation via GridSearchCV

In [5]:
data_grid = np.array(data.data).transpose(1,2,0)
param_grid=dict(tau=[1, 3], alpha=[.45, 1], beta=[20, 50], eta=[5, 10])

mdl = LatentTimeGraphLasso(
    bypass_transpose=False, assume_centered=0, verbose=0, rtol=1e-5, tol=1e-5,
    max_iter=250, rho=1./ np.sqrt(data_grid.shape[0]))
    
cv = ShuffleSplit(10, test_size=0.2)
ltgl = GridSearchCV(mdl, param_grid, cv=cv, verbose=2)
ltgl.fit(data_grid)

Fitting 10 folds for each of 16 candidates, totalling 160 fits
[CV] alpha=0.45, beta=20, eta=5, tau=1 ...............................
[CV] ................ alpha=0.45, beta=20, eta=5, tau=1, total=   5.9s
[CV] alpha=0.45, beta=20, eta=5, tau=1 ...............................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.9s remaining:    0.0s


[CV] ................ alpha=0.45, beta=20, eta=5, tau=1, total=   5.2s
[CV] alpha=0.45, beta=20, eta=5, tau=1 ...............................
[CV] ................ alpha=0.45, beta=20, eta=5, tau=1, total=   5.1s
[CV] alpha=0.45, beta=20, eta=5, tau=1 ...............................
[CV] ................ alpha=0.45, beta=20, eta=5, tau=1, total=   5.2s
[CV] alpha=0.45, beta=20, eta=5, tau=1 ...............................
[CV] ................ alpha=0.45, beta=20, eta=5, tau=1, total=   5.2s
[CV] alpha=0.45, beta=20, eta=5, tau=1 ...............................
[CV] ................ alpha=0.45, beta=20, eta=5, tau=1, total=   5.4s
[CV] alpha=0.45, beta=20, eta=5, tau=1 ...............................
[CV] ................ alpha=0.45, beta=20, eta=5, tau=1, total=   5.1s
[CV] alpha=0.45, beta=20, eta=5, tau=1 ...............................
[CV] ................ alpha=0.45, beta=20, eta=5, tau=1, total=   5.1s
[CV] alpha=0.45, beta=20, eta=5, tau=1 ...............................
[CV] .

[CV] ................ alpha=0.45, beta=50, eta=5, tau=3, total=   8.3s
[CV] alpha=0.45, beta=50, eta=10, tau=1 ..............................
[CV] ............... alpha=0.45, beta=50, eta=10, tau=1, total=   7.3s
[CV] alpha=0.45, beta=50, eta=10, tau=1 ..............................
[CV] ............... alpha=0.45, beta=50, eta=10, tau=1, total=   7.2s
[CV] alpha=0.45, beta=50, eta=10, tau=1 ..............................
[CV] ............... alpha=0.45, beta=50, eta=10, tau=1, total=   7.3s
[CV] alpha=0.45, beta=50, eta=10, tau=1 ..............................
[CV] ............... alpha=0.45, beta=50, eta=10, tau=1, total=   7.4s
[CV] alpha=0.45, beta=50, eta=10, tau=1 ..............................
[CV] ............... alpha=0.45, beta=50, eta=10, tau=1, total=   7.3s
[CV] alpha=0.45, beta=50, eta=10, tau=1 ..............................
[CV] ............... alpha=0.45, beta=50, eta=10, tau=1, total=   7.2s
[CV] alpha=0.45, beta=50, eta=10, tau=1 ..............................
[CV] .

[CV] .................. alpha=1, beta=20, eta=10, tau=3, total=   6.6s
[CV] alpha=1, beta=20, eta=10, tau=3 .................................
[CV] .................. alpha=1, beta=20, eta=10, tau=3, total=   6.6s
[CV] alpha=1, beta=20, eta=10, tau=3 .................................
[CV] .................. alpha=1, beta=20, eta=10, tau=3, total=   6.7s
[CV] alpha=1, beta=50, eta=5, tau=1 ..................................
[CV] ................... alpha=1, beta=50, eta=5, tau=1, total=   6.7s
[CV] alpha=1, beta=50, eta=5, tau=1 ..................................
[CV] ................... alpha=1, beta=50, eta=5, tau=1, total=   6.6s
[CV] alpha=1, beta=50, eta=5, tau=1 ..................................
[CV] ................... alpha=1, beta=50, eta=5, tau=1, total=   6.6s
[CV] alpha=1, beta=50, eta=5, tau=1 ..................................
[CV] ................... alpha=1, beta=50, eta=5, tau=1, total=   6.6s
[CV] alpha=1, beta=50, eta=5, tau=1 ..................................
[CV] .

[Parallel(n_jobs=1)]: Done 160 out of 160 | elapsed: 18.4min finished


GridSearchCV(cv=ShuffleSplit(n_splits=10, random_state=None, test_size=0.2, train_size=None),
       error_score='raise',
       estimator=LatentTimeGraphLasso(alpha=1.0, assume_centered=0, beta=1.0,
           bypass_transpose=False, compute_objective=True, eta=1.0,
           max_iter=250, mode='cd', phi='laplacian', psi='laplacian',
           rho=0.10000000000000001, rtol=1e-05, tau=1.0, tol=1e-05,
           update_rho_options={}, verbose=0),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'alpha': [0.45, 1], 'beta': [20, 50], 'eta': [5, 10], 'tau': [1, 3]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=2)

## 2.2 skopt

In [3]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.datasets import load_iris
from sklearn.svm import SVC 
from sklearn.model_selection import train_test_split

In [None]:
data_grid = np.array(data.data).transpose(1,2,0)

domain = {'alpha': Real(1e-1, 1e0, prior='log-uniform'),
          'tau': Real(1e-1, 1e+1, prior='log-uniform'),
          'beta': Integer(1, 60),
          'eta': Integer(1, 10)}

mdl = LatentTimeGraphLasso(
    bypass_transpose=False, assume_centered=0, verbose=0, rtol=1e-5, tol=1e-5,
    max_iter=250, rho=1./ np.sqrt(data_grid.shape[0]))
    
cv = ShuffleSplit(10, test_size=0.2)
    
ltgl = BayesSearchCV(
    mdl, domain, n_iter=32, cv=cv)

ltgl.fit(data_grid)

## 3. Score

In [34]:
utils.structure_error(data.thetas, ltgl.best_estimator_.precision_)

{'accuracy': 0.97464,
 'dor': 5301.725257323832,
 'f1': 0.5236664162283996,
 'fall_out': 0.00010407993338884263,
 'false_omission_rate': 0.025619700596373078,
 'fdr': 0.007122507122507123,
 'fn': 2526,
 'fp': 10,
 'miss_rate': 0.6443877551020408,
 'nlr': 0.6444548299178108,
 'npv': 0.9743802994036269,
 'plr': 3416.7224489795917,
 'precision': 0.9928774928774928,
 'prevalence': 0.0392,
 'recall': 0.3556122448979592,
 'specificity': 0.9998959200666112,
 'tn': 96070,
 'tp': 1394}

In [35]:
utils.error_norm_time(data.thetas, ltgl.best_estimator_.precision_)

1.1729257196869867

In [36]:
utils.error_rank(data.ells, ltgl.best_estimator_.latent_)

10.699999999999999