# [Dev notebook]
## How does STGL work with TICC data?

In [None]:
%matplotlib inline
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from sklearn.utils.extmath import squared_norm
from sklearn.covariance import empirical_covariance
from sklearn.cluster.hierarchical import AgglomerativeClustering
from scipy import linalg
from sklearn.metrics import v_measure_score
from sklearn.model_selection import StratifiedShuffleSplit

from regain.utils import structure_error, error_norm_time, normalize_matrix

In [None]:
from imp import reload
from regain.covariance import kernel_time_graphical_lasso_, kernel_latent_time_graphical_lasso_
from regain.datasets import kernels; reload(kernels)

n_dim = 10
w_size = 1
n_samples = 50
clusters=(0,0,0,0,1,1,1,1,0,0,0,0)

In [None]:
data = kernels.make_ticc_dataset_v3(
    clusters=clusters,
    w_size=w_size, n_dim=n_dim, n_samples=n_samples)
X, y = data.X, data.y

In [None]:
# # print the inv matrix
# res = sns.clustermap(data.inv, row_cluster=False, col_cluster=False)
# ax = res.ax_heatmap
# for i in range(n_samples * len(clusters)):
#     ax.axvline((i + 1) * n_dim)
#     ax.axhline((i + 1) * n_dim)

In [None]:
cov = []
for c in np.unique(y):
    idx = y == c
    cov.append(empirical_covariance(X[idx]))
cov = np.asarray(cov)

kernel = np.zeros((cov.shape[0], cov.shape[0]))
for i in range(cov.shape[0]):
    for j in range(i + 1, cov.shape[0]):
        kernel[i, j] = kernel[j, i] = (np.linalg.norm((cov[i]) - (cov[j])))

mm = np.sum(np.abs(kernel), axis=1)
kernel += np.eye(cov.shape[0]) * mm

normalize_matrix(kernel)

STGL

In [None]:
reload(kernel_time_graphical_lasso_)
reload(kernel_latent_time_graphical_lasso_)

In [None]:
from regain.norm import l1_od_norm
mdl = kernel_time_graphical_lasso_.SimilarityTimeGraphicalLasso(
    psi='l1', max_iter=500, alpha=0.1, beta=1, kernel=kernel)

SLTGL

In [None]:
n_times = np.unique(y).size

# let's use latent variable as well
eta = 0.25
kernel_phi = np.eye(n_times)
np.fill_diagonal(kernel_phi[:, 1:], eta)
np.fill_diagonal(kernel_phi[1:], eta)

mdl = kernel_latent_time_graphical_lasso_.SimilarityLatentTimeGraphicalLasso(
    alpha=1, kernel_phi=kernel_phi, tau=0.9, beta=2,
)

In [None]:
# from sklearn.gaussian_process import kernels as skkernels
# rbf = kernels.RBF

# mdl = kernel_time_graphical_lasso_.KernelTimeGraphicalLasso(psi='l1', max_iter=500, alpha=0.1, beta=1, kernel=rbf).fit(X, y)

# from regain.norm import l1_od_norm
# mdl.similarity_matrix = kernel_time_graphical_lasso_.precision_similarity(mdl.precision_, l1_od_norm)

In [None]:
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.base import clone
from regain.covariance import time_graphical_lasso_, latent_time_graphical_lasso_

In [None]:
stgl = kernel_time_graphical_lasso_.SimilarityTimeGraphicalLasso(alpha=1, beta=2).fit(X, y)
sltgl = kernel_latent_time_graphical_lasso_.SimilarityLatentTimeGraphicalLasso(alpha=1, tau=0.9, beta=2)
clust = AgglomerativeClustering()

In [None]:
from ticc import TICC_solver; 
from imp import reload; reload(TICC_solver)
ticc = TICC_solver.TICC(number_of_clusters=np.unique(clusters).size, window_size=1)
cluster_assignment, cluster_MRFs = ticc.fit(X)

In [None]:
from sklearn.metrics import v_measure_score
v_measure_score(data.id_cluster, cluster_assignment)

In [None]:
pipe_stgl = Pipeline([('stgl', clone(stgl)), ('clust', clone(clust))])
labels_pred = pipe_stgl.set_params(clust__n_clusters=np.unique(clusters).size).fit_predict(X, y)

In [None]:
v_measure_score(data.id_cluster, np.repeat(labels_pred, n_samples))

In [None]:
from time import time

In [None]:
# partitions = np.arange(0.1, 1, 0.1)
partitions = [10, 50, 100, 200, 500, 1000]
n_splits = 10

n_dim = 50
w_size = 1
n_samples = 1000

vs, ne, se = {}, {}, {}
for i in partitions:
    # v_scores, norm_errors, structure_errors = [], [], []
    
    # for train, _, in StratifiedShuffleSplit(n_splits=n_splits, train_size=float(i)).split(X, y):
    for j in range(n_splits):
        n_clusts = np.random.randint(10) + 2
        clusters = np.random.choice(list(range(n_clusts)), size=15)
        data = kernels.make_ticc_dataset_v3(
            clusters=clusters,
            w_size=w_size, n_dim=n_dim, n_samples=i)
        X, y = data.X, data.y
        
        n_times = np.unique(y).size
        thetas_true = data.precs
        thetas_true_sparse = data.sparse_precs
        labels_true = data.id_cluster #[::(len(clusters) * i // n_times)]
        
        thetas_true_sparse_rep = np.array([thetas_true_sparse[l] for l in y])
        thetas_true_rep = np.array([thetas_true[l] for l in y])
        
        # STGL
        pipe_stgl = Pipeline([('stgl', clone(stgl)), ('clust', clone(clust))])
        tic = time()
        labels_pred = pipe_stgl.set_params(clust__n_clusters=np.unique(clusters).size).fit_predict(X, y)
        tac = time() - tic
        labels_pred = np.repeat(labels_pred, i)
        
        mdl = pipe_stgl['stgl']
        obs_precs = np.array([mdl.get_observed_precision()[l] for l in y])
        obs_precs_sparse = np.array([mdl.get_precision()[l] for l in y])
        
        vs.setdefault(('stgl', i), {}).setdefault('model', []).append(pipe_stgl)
        vs.setdefault(('stgl', i), {}).setdefault('v_measure', []).append(
            v_measure_score(labels_true, labels_pred))
        vs.setdefault(('stgl', i), {}).setdefault('structure_error', []).append(
            structure_error(thetas_true_sparse, obs_precs_sparse, no_diagonal=True))
        vs.setdefault(('stgl', i), {}).setdefault('error_norm', []).append(
            error_norm_time(thetas_true_rep, obs_precs))
        vs.setdefault(('stgl', i), {}).setdefault('time', []).append(tac)
        
        # SLTGL
        pipe_sltgl = Pipeline([('sltgl', clone(sltgl)), ('clust', clone(clust))])
        eta = 0.25
        kernel_phi = np.eye(n_times)
        np.fill_diagonal(kernel_phi[:, 1:], eta)
        np.fill_diagonal(kernel_phi[1:], eta)
        pipe_sltgl.set_params(sltgl__kernel_phi=kernel_phi, clust__n_clusters=np.unique(clusters).size)
        tic = time()
        labels_pred = pipe_sltgl.fit_predict(X, y)
        tac = time() - tic
        labels_pred = np.repeat(labels_pred, i)
        
        mdl = pipe_sltgl['sltgl']
        obs_precs = np.array([mdl.get_observed_precision()[l] for l in y])
        obs_precs_sparse = np.array([mdl.get_precision()[l] for l in y])
        
        vs.setdefault(('sltgl', i), {}).setdefault('model', []).append(pipe_sltgl)
        vs.setdefault(('sltgl', i), {}).setdefault('v_measure', []).append(
            v_measure_score(labels_true, labels_pred))
        vs.setdefault(('sltgl', i), {}).setdefault('structure_error', []).append(
            structure_error(thetas_true_sparse, obs_precs_sparse, no_diagonal=True))
        vs.setdefault(('sltgl', i), {}).setdefault('error_norm', []).append(
            error_norm_time(thetas_true_rep, obs_precs))
        vs.setdefault(('sltgl', i), {}).setdefault('time', []).append(tac)
        
        # TGL + similarity
        tgl = time_graphical_lasso_.TimeGraphicalLasso(alpha=1, beta=2)
        tic = time()
        tgl.fit(X, y)        
        tac = time() - tic
        psi = kernel_time_graphical_lasso_.check_norm_prox(tgl.psi)[0]
        tgl_prec_sims = kernel_time_graphical_lasso_.precision_similarity(tgl.get_observed_precision(), psi)
        labels_pred = clust.set_params(n_clusters=np.unique(clusters).size).fit_predict(tgl_prec_sims)
        labels_pred = np.repeat(labels_pred, i)
        mdl = tgl
        obs_precs = np.array([mdl.get_observed_precision()[l] for l in y])
        obs_precs_sparse = np.array([mdl.get_precision()[l] for l in y])
        
        vs.setdefault(('tgl', i), {}).setdefault('model', []).append(mdl)
        vs.setdefault(('tgl', i), {}).setdefault('v_measure', []).append(
            v_measure_score(labels_true, labels_pred))
        vs.setdefault(('tgl', i), {}).setdefault('structure_error', []).append(
            structure_error(thetas_true_sparse, obs_precs_sparse, no_diagonal=True))
        vs.setdefault(('tgl', i), {}).setdefault('error_norm', []).append(
            error_norm_time(thetas_true_rep, obs_precs))
        vs.setdefault(('tgl', i), {}).setdefault('time', []).append(tac)
        
        # LTGL + similarity
        ltgl = latent_time_graphical_lasso_.LatentTimeGraphicalLasso(alpha=1, beta=2, tau=0.9, eta=0.25)
        tic = time()
        ltgl.fit(X, y)
        tac = time() - tic
        psi = kernel_latent_time_graphical_lasso_.check_norm_prox(ltgl.psi)[0]
        ltgl_prec_sims = kernel_time_graphical_lasso_.precision_similarity(ltgl.get_observed_precision(), psi)
        labels_pred = clust.set_params(n_clusters=np.unique(clusters).size).fit_predict(ltgl_prec_sims)
        labels_pred = np.repeat(labels_pred, i)
        mdl = ltgl
        
        obs_precs = np.array([mdl.get_observed_precision()[l] for l in y])
        obs_precs_sparse = np.array([mdl.get_precision()[l] for l in y])
        
        vs.setdefault(('ltgl', i), {}).setdefault('model', []).append(mdl)
        vs.setdefault(('ltgl', i), {}).setdefault('v_measure', []).append(
            v_measure_score(labels_true, labels_pred))
        vs.setdefault(('ltgl', i), {}).setdefault('structure_error', []).append(
            structure_error(thetas_true_sparse, obs_precs_sparse, no_diagonal=True))
        vs.setdefault(('ltgl', i), {}).setdefault('error_norm', []).append(
            error_norm_time(thetas_true_rep, obs_precs))
        vs.setdefault(('ltgl', i), {}).setdefault('time', []).append(tac)
        
        ticc = TICC_solver.TICC(number_of_clusters=np.unique(clusters).size, window_size=1)
        tic = time()
        cluster_assignment, cluster_MRFs = ticc.fit(X)
        tac = time() - tic
        
        obs_precs = np.array([cluster_MRFs[l] for l in cluster_assignment])
        obs_precs_sparse = obs_precs
        
        vs.setdefault(('ticc', i), {}).setdefault('model', []).append(ticc)
        vs.setdefault(('ticc', i), {}).setdefault('v_measure', []).append(
            v_measure_score(labels_true, cluster_assignment))
        vs.setdefault(('ticc', i), {}).setdefault('structure_error', []).append(
            structure_error(thetas_true_sparse_rep, obs_precs_sparse, no_diagonal=True))
        vs.setdefault(('ticc', i), {}).setdefault('error_norm', []).append(
            error_norm_time(thetas_true_rep, obs_precs)
        vs.setdefault(('ticc', i), {}).setdefault('time', []).append(tac)
    
#     vs.append(v_scores)
#     ne.append(norm_errors)
#     se.append(structure_errors)

In [None]:
list(thetas_true_sparse)

In [None]:
np.array([thetas_true_sparse[l] for l in labels_pred]).shape

In [None]:
d = []
for i, index in enumerate(partitions):
    index = int(index)
    for col in range(n_splits):
        d.append((dict(sample=index, col=col, val=np.array(vs)[i,col], type='v_scores')))
        d.append(dict(sample=index, col=col, val=np.array(ne)[i,col], type='norm_errors'))
        d.append(dict(sample=index, col=col, val=np.array(se)[i,col]['balanced_accuracy'], type='structure_errors'))

A = pd.DataFrame(d)

ax = sns.pointplot(x='sample', y='val', data=A, hue='type')

plt.pyplot.gcf().savefig("error.png")

In [None]:
# thetas_true = np.array([np.array(data.precs)[x].mean(axis=0) for x in 
#                         [np.arange(i*(len(clusters) * (n_samples) // n_times),
#                                    (i+1)*(len(clusters) * (n_samples) // n_times))  for i in range(len(clusters) * (n_samples) // n_times - 1)]])
# thetas_true_sparse= np.array([np.array(data.sparse_precs)[x].mean(axis=0) for x in [np.arange(i*(len(clusters) * (n_samples) // n_times),(i+1)*(len(clusters) * (n_samples) // n_times)) 
#  for i in range(len(clusters) * (n_samples) // n_times - 1)]])

In [None]:
print(labels_true)
print(labels_pred)

print(v_measure_score(labels_true, labels_pred))
print(structure_error(thetas_true_sparse, mdl.get_precision(), no_diagonal=True))
print(error_norm_time(thetas_true, mdl.get_observed_precision()))

makeTICC v3 + STGL, SLTGL, TICC, TGL + similarity, LTGL + similarity

cluster representative + STGL, SLTGL, TICC, TGL + similarity, LTGL + similarity

plot con MCC, vscore, norm error, + 6 plot roc curves coi 10 split e plot_precision_recall