In [1]:
from regain.datasets.kernels import make_cluster_representative
from regain.datasets.ising import ising_theta_generator, ising_sampler

from itertools import chain, combinations

import numpy as np
from scipy import linalg
from scipy.spatial.distance import squareform
from sklearn.cluster import AgglomerativeClustering
from sklearn.datasets.base import Bunch

from regain.norm import l1_od_norm
from regain.utils import is_pos_def

In [2]:
def make_cluster_representative(
        n_dim=10, degree=2, n_clusters=3, T=15, n_samples=100, repetitions=False,
        cluster_series=None, shuffle=False):
    """Based on the cluster representative, generate similar graphs."""
    import networkx as nx
    adjacencies = []
    if cluster_series is not None:
        n_clusters = np.unique(cluster_series).size

    for i in range(n_clusters):
        adjacencies.append(ising_theta_generator(p=n_dim, T=1, time_on_axis='first',responses=[-1, 1])[0])
    pos = np.arange(0, T, T // (n_clusters + 1))
    pos = list(pos) + [T - 1]

    if cluster_series is None:
        cluster_series = np.tile(range(n_clusters), (len(pos) // n_clusters) + 1)[:len(pos)]
        if shuffle:
            np.random.shuffle(cluster_series)
        # print(pos)
        # print(cluster_series)
        # pos = np.arange(0, T, T // (clusters + 1))
        # pos = list(pos) + [T - 1]
    else:
        assert len(cluster_series) == len(pos)
    #     a = np.where(cluster_series[:-1] != cluster_series[1:])[0] + 1
    #     T = len(cluster_series) # overwrites T
    #     pos = np.concatenate(([0], a, [T-1]))

    thetas = []
    for i in range(len(pos) - 1):
        # last one is always a representative
        how_many = int(pos[i + 1]) - int(pos[i]) - 1
        new_list = [adjacencies[cluster_series[i]]]
        target = adjacencies[cluster_series[i + 1]]

        for i in range(how_many):
            new = new_list[-1].copy()
            diffs = (new != 0).astype(int) - (target != 0).astype(int)
            diff = np.where(diffs != 0)
            if diff == ():
                break
            if (i == 0):
                edges_per_change = int(
                    (np.nonzero(diffs)[0].shape[0] / 2) // (how_many + 1))
                if edges_per_change == 0:
                    edges_per_change += 1
            ixs = np.arange(diff[0].shape[0])
            np.random.shuffle(ixs)

            xs = diff[0][ixs[:edges_per_change]]
            ys = diff[1][ixs[:edges_per_change]]
            for j in range(xs.shape[0]):
                if diffs[xs[j], ys[j]] == -1:
                    new[xs[j], ys[j]] = np.random.choice([-1,1])
                    new[ys[j], xs[j]] = new[xs[j], ys[j]]
                else:
                    new[xs[j], ys[j]] = 0
                    new[ys[j], xs[j]] = 0
            new_list.append(new)

        thetas += new_list
    thetas.append(target)
    X = np.vstack(
        [
            ising_sampler(t,thresholds=np.zeros(n_samples), n=n_samples, responses=[-1,1])
            for t in thetas
        ])
    y = np.repeat(np.arange(len(thetas)), n_samples)

    distances = squareform(
        [l1_od_norm(t1 - t2) for t1, t2 in combinations(thetas, 2)])
    distances /= np.max(distances)
    labels_pred = AgglomerativeClustering(
        n_clusters=n_clusters, affinity='precomputed',
        linkage='complete').fit_predict(distances)

    id_cluster = np.repeat(labels_pred, n_samples)
    data = Bunch(
        X=X, y=y, id_cluster=labels_pred, samples_cluster=id_cluster, precs=np.array(thetas),
        thetas=np.array(thetas), sparse_precs=np.array(thetas),
        cluster_reps=adjacencies, cluster_series=cluster_series)
    return data

In [4]:
res = make_cluster_representative(n_samples=1000)

In [5]:
from importlib import reload
import regain.generalized_temporal_linear_model.ising; reload(regain.generalized_temporal_linear_model.ising)
from regain.generalized_temporal_linear_model.ising import SimilarityTemporalIsingModel

In [9]:
mdl = SimilarityTemporalIsingModel(alpha=0.25,n_clusters=3, beta=12, psi='l1')
mdl.fit(res['X'], res['y'])

(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)



(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)
(15, 10, 10)



SimilarityTemporalIsingModel(alpha=0.25, assume_centered=False, beta=12,
               compute_objective=True, eps=1e-06, init='empirical',
               ker_param=1, kernel=None, max_iter=100, max_iter_ext=100,
               n_clusters=3, psi='l1', return_history=False, rho=1.0,
               rtol=0.0001, tol=0.0001, update_rho_options=None,
               verbose=False)

In [10]:
labels_pred = AgglomerativeClustering(
                    n_clusters=3, affinity='precomputed',
                    linkage='complete').fit_predict(mdl.similarity_matrix_)

In [None]:
res['id_cluster']

In [None]:
labels_pred

In [11]:
from sklearn.metrics import v_measure_score
v_measure_score(res['id_cluster'], labels_pred)

0.009672771338712492

In [12]:
from regain.utils import structure_error

structure_error(res['thetas'], mdl.precision_, no_diagonal=True)

{'tp': 652,
 'tn': 208,
 'fp': 206,
 'fn': 284,
 'precision': 0.7599067599067599,
 'recall': 0.6965811965811965,
 'f1': 0.7268673355629877,
 'accuracy': 0.5733333333333334,
 'false_omission_rate': 0.5772357723577236,
 'fdr': 0.2400932400932401,
 'npv': 0.42276422764227645,
 'prevalence': 0.624,
 'miss_rate': 0.3034188034188034,
 'fall_out': 0.4975845410628019,
 'specificity': 0.5024154589371981,
 'plr': 1.3999253174010455,
 'nlr': 0.6039201183431953,
 'dor': 2.3180637221386573,
 'balanced_accuracy': 0.5994983277591973,
 'average_precision': 0.7127001529242163,
 'mcc': 0.5215597203322369}