# Cross-validation
This notebook shows an example on how it is possible to cross-validate parameters fot LTGL.

<font color='red'><b>Note</b></font>: at the moment we rely on `scikit-optimize` for Bayesian optimisation. 
If you don't have this package installed, you can choose a grid of parameters and use `GridSearchCV` from `scikit-learn`.

In [None]:
import numpy as np
import pandas as pd
import time

from sklearn.model_selection import GridSearchCV, ShuffleSplit

from regain import datasets, utils
from regain.covariance.latent_time_graphical_lasso_ import LatentTimeGraphicalLasso

np.random.seed(20)

## 1. Prepare data set
Let's prepare a synthetic dataset which we can work on.

In [None]:
np.random.seed(0)

# setting 1
alpha = 0.45
tau = 3
beta = 50
eta = 10

n_samples = 100
n_dim_lat = 20
T = 10
n_dim_obs = 100

data = datasets.make_dataset(
    n_samples=n_samples, n_dim_lat=n_dim_lat, n_dim_obs=n_dim_obs,  T=T, epsilon=1e-1,
    proportional=True, degree=2, keep_sparsity=True, update_ell='l2',
    update_theta='l2', normalize_starting_matrices=True)

## 2.1 Cross-validation via GridSearchCV

In [None]:
data_grid = np.array(data.data).transpose(1,2,0)
param_grid=dict(tau=[1, 3], alpha=[.45, 1], beta=[20, 50], eta=[5, 10])

mdl = LatentTimeGraphicalLasso(
    time_on_axis='last', assume_centered=0, verbose=0, rtol=1e-5, tol=1e-5,
    max_iter=250, rho=1./ np.sqrt(data_grid.shape[0]))
    
cv = ShuffleSplit(10, test_size=0.2)
ltgl = GridSearchCV(mdl, param_grid, cv=cv, verbose=2)
ltgl.fit(data_grid)

## 2.2 skopt

In [None]:
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.datasets import load_iris
from sklearn.svm import SVC 
from sklearn.model_selection import train_test_split

In [None]:
data_grid = np.array(data.data).transpose(1,2,0)

domain = {'alpha': Real(1e-1, 1e0, prior='log-uniform'),
          'tau': Real(1e-1, 1e+1, prior='log-uniform'),
          'beta': Integer(1, 60),
          'eta': Integer(1, 10)}

mdl = LatentTimeGraphicalLasso(
    time_on_axis='last', assume_centered=0, verbose=0, rtol=1e-5, tol=1e-5,
    max_iter=250, rho=1./ np.sqrt(data_grid.shape[0]))
    
cv = ShuffleSplit(10, test_size=0.2)
    
ltgl = BayesSearchCV(
    mdl, domain, n_iter=32, cv=cv)

ltgl.fit(data_grid)

## 3. Score

In [None]:
utils.structure_error(data.thetas, ltgl.best_estimator_.precision_)

In [None]:
utils.error_norm_time(data.thetas, ltgl.best_estimator_.precision_)

In [None]:
utils.error_rank(data.ells, ltgl.best_estimator_.latent_)