# Kernel Latent Variable Time-varying Graphical Lasso
More than 1-Markovian!

In [21]:
%matplotlib inline
from __future__ import print_function, division

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from itertools import product
from functools import partial
from sklearn.datasets import make_sparse_spd_matrix
from sklearn.datasets.base import Bunch
from sklearn.utils.extmath import squared_norm
from sklearn.covariance import GraphLasso, empirical_covariance
from sklearn.datasets.base import Bunch
from sklearn.model_selection import GridSearchCV, ShuffleSplit
from sklearn.gaussian_process import kernels

from regain import prox; reload(prox)
from regain.covariance import time_graph_lasso_; reload(time_graph_lasso_);
from regain.covariance import latent_time_graph_lasso_; reload(latent_time_graph_lasso_);
import time

from regainpr.bayesian import wishart_process_; reload(wishart_process_)
from regain.bayesian import stats; reload(stats)

from regainpr.covariance import kernel_time_graphical_lasso_;
reload(kernel_time_graphical_lasso_);
from regainpr.covariance import kernel_latent_time_graphical_lasso_;
reload(kernel_latent_time_graphical_lasso_);

from regainpr import datasets; reload(datasets);
from regainpr import utils; reload(utils);

In [30]:
import seaborn as sns
# kernel used to generate data
sns.heatmap(kernels.ExpSineSquared(periodicity=np.pi,
                                      length_scale=2)(np.arange(10)[:,None]))

In [90]:
# setting 1
n_samples = 100
T = 10
n_dim_obs = 5

k = (n_dim_obs, T)

np.random.seed(0)

reload(datasets)
data = {(dim, T) : datasets.make_dataset(
    mode='sin', shape='smooth',
    update_theta='l2', normalize_starting_matrices=False,
    n_samples=n_samples, n_dim_lat=0, n_dim_obs=dim,  T=T, epsilon=1e-1,
    proportional=True, degree=2, keep_sparsity=True)
    for dim in [n_dim_obs]}

In [42]:
# setting 2 - sample from GPs
n_samples = 100
T = 10
n_dim = 10

np.random.seed(0)   

reload(datasets)
data = [datasets.make_dataset(
    mode='gp', n_samples=n_samples, n_dim_lat=1, n_dim_obs=dim,  T=T, epsilon=0.4)
    for i in range(10)]

In [44]:
# info on the data set
K = data[0].thetas
print ("Percentual of non-zero components at each time: {}".format(
    [(i!=0).sum() / i.size for i in K]))

print ("Percentual of total non-zero components: {}".format(
    (K != 0).sum() / (n_dim_obs ** 2 * T)))

Percentual of non-zero components at each time: [0.72, 0.68, 0.7, 0.66, 0.7, 0.66, 0.66, 0.72, 0.68, 0.74]
Percentual of total non-zero components: 0.692


In [45]:
# prepare dataframe for results
n_dims = range(10)
n_times = [T]
methods = ['TGL', 'KTGL-exp', 'KTGL-rbf', 'LTGL', 'KLTGL-exp', 'KLTGL-rbf', 'WP']
scores = sorted([
    "MSE_precision", "MSE_observed", "MSE_latent", 'estimator', "mean_rank_error",
    'time', 'iterations', 'precision', 'recall', 'accuracy', 'balanced_accuracy',
    'f1', 'npv', 'prevalence', 'miss_rate', 'likelihood',
    'specificity', 'plr', 'nlr'])

cols = pd.MultiIndex.from_product([scores, n_dims], names=('score', 'dim'))
rows = pd.MultiIndex.from_product([methods, n_times], names=('method', 'time'))

dff = pd.DataFrame(columns=cols, index=rows)
idx = pd.IndexSlice

In [104]:
%run kltgl_performance.py

In [105]:
# beta = 0.5
# rng = np.full(T-1, 1000)
# kernel_phi = np.diag(rng,1) +np.diag(rng,-1) + np.eye(T)
# kernel_phi = np.full((T ,T), 1000)

from kltgl_performance import *

In [None]:
run_results(data, dff, scores)

Start with: dim=10, T=10 (it 4)
starting KLTGL - rbf ...

In [100]:
dff

Unnamed: 0_level_0,score,MSE_latent,MSE_latent,MSE_latent,MSE_latent,MSE_latent,MSE_latent,MSE_latent,MSE_latent,MSE_latent,MSE_latent,...,time,time,time,time,time,time,time,time,time,time
Unnamed: 0_level_1,dim,0,1,2,3,4,5,6,7,8,9,...,0,1,2,3,4,5,6,7,8,9
method,time,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
TGL,10,,,,,,,,,,,...,768.107,747.888,714.045,729.131,751.498,72.1115,82.4011,64.7267,66.352,74.2699
KTGL-exp,10,,,,,,,,,,,...,888.629,817.166,885.807,911.11,853.956,89.3559,91.8826,91.4578,75.0154,85.6782
KTGL-rbf,10,,,,,,,,,,,...,884.727,941.035,895.528,964.712,896.624,92.6371,96.3485,76.3457,96.8005,101.958
LTGL,10,0.846779,0.18046,1.1252,0.0115461,0.849721,0.035268,0.0373468,0.00803797,0.964676,0.020255,...,1178.57,1198.19,1117.18,1242.43,1202.65,178.818,155.573,195.261,135.768,137.24
KLTGL-exp,10,0.00809063,0.535109,0.8695,0.869703,0.0208067,0.00538949,0.00677294,0.00689713,0.00883654,0.0139776,...,1606.58,2060.0,2044.6,2030.52,279.841,376.548,307.998,290.409,248.869,394.067
KLTGL-rbf,10,0.246064,0.419639,1.17388,0.996487,0.0199635,0.00527615,0.331384,0.0069021,0.686457,0.0652385,...,1534.41,2010.62,2115.32,2059.04,386.333,436.92,308.832,316.601,418.809,405.123
WP,10,,,,,,,,,,,...,241.967,249.101,249.43,245.571,248.164,268.528,254.855,256.784,252.193,250.345


In [93]:
dff_mean_std = pd.DataFrame()
for s in scores:
    dff_mean_std["%s_mean" % s] = dff[s].mean(axis=1)
    dff_mean_std["%s_std" % s] = dff[s].std(axis=1)

In [94]:
dff_mean_std

Unnamed: 0_level_0,Unnamed: 1_level_0,MSE_latent_mean,MSE_latent_std,MSE_observed_mean,MSE_observed_std,MSE_precision_mean,MSE_precision_std,accuracy_mean,accuracy_std,balanced_accuracy_mean,balanced_accuracy_std,...,precision_mean,precision_std,prevalence_mean,prevalence_std,recall_mean,recall_std,specificity_mean,specificity_std,time_mean,time_std
method,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
TGL,10,,,,,0.242236,0.050977,0.7278,0.043761,0.534103,0.01889,...,0.7319,0.044495,0.7178,0.042567,0.979539,0.010445,0.088666,0.040322,69.684961,6.812494
KTGL-exp,10,,,,,0.14161,0.029601,0.722,0.042489,0.510413,0.018288,...,0.722226,0.042577,0.7178,0.042567,0.995769,0.008009,0.025056,0.044563,87.781408,7.851088
KTGL-rbf,10,,,,,0.218617,0.046398,0.7188,0.042155,0.511533,0.01834,...,0.722774,0.042462,0.7178,0.042567,0.986817,0.017716,0.036248,0.051258,93.177345,8.269476
LTGL,10,0.468205,0.475809,0.311364,0.076213,0.5386,0.342668,0.6882,0.058099,0.570052,0.042048,...,0.757029,0.040608,0.7178,0.042567,0.841561,0.14004,0.298544,0.191793,164.887269,21.668368
KLTGL-exp,10,0.151853,0.265568,0.161338,0.036177,0.245507,0.215056,0.7142,0.031923,0.51357,0.018043,...,0.723566,0.043352,0.7178,0.042567,0.979192,0.042786,0.047947,0.064876,321.72704,47.821437
KLTGL-rbf,10,0.439789,0.448872,0.30313,0.081935,0.514439,0.312534,0.7028,0.031919,0.519443,0.019372,...,0.726736,0.041823,0.7178,0.042567,0.942501,0.054767,0.096384,0.085991,379.77974,43.927944
WP,10,,,,,1.310017,0.289704,0.7178,0.042567,0.5,0.0,...,0.7178,0.042567,0.7178,0.042567,1.0,0.0,0.0,0.0,253.941942,6.560123


In [37]:
mm = dff.xs(n_dim_obs, level='dim', axis=1).xs(T, level='time')
# mm['likelihood']
mm

score,MSE_latent,MSE_observed,MSE_precision,accuracy,balanced_accuracy,estimator,f1,iterations,likelihood,mean_rank_error,miss_rate,nlr,npv,plr,precision,prevalence,recall,specificity,time
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
TGL,,,0.441912,0.728,0.598078,"TimeGraphLasso(alpha=0.6, assume_centered=0, b...",0.826531,68,-5513.8,,0.0635838,0.244798,0.645161,1.26498,0.739726,0.692,0.936416,0.25974,0.128443
KTGL-exp,,,0.268091,0.736,0.593049,"KernelTimeGraphicalLasso(alpha=0.6, assume_cen...",0.835,71,-5472.83,,0.0346821,0.157089,0.73913,1.23882,0.735683,0.692,0.965318,0.220779,0.246496
KTGL-rbf,,,0.365332,0.724,0.59699,"KernelTimeGraphicalLasso(alpha=0.6, assume_cen...",0.823077,64,-5411.06,,0.0722543,0.271394,0.621212,1.26436,0.739631,0.692,0.927746,0.266234,0.215958
LTGL,0.172769,0.435454,0.555427,0.746,0.675944,"LatentTimeGraphLasso(alpha=0.6, assume_centere...",0.823856,61,-5528.62,4.1,0.141618,0.286964,0.608,1.69475,0.792,0.692,0.858382,0.493506,0.227341
KLTGL-exp,0.038413,0.247017,0.286924,0.748,0.619736,"KernelLatentTimeGraphicalLasso(alpha=0.6, assu...",0.839695,192,-5472.96,3.6,0.0462428,0.16185,0.733333,1.33526,0.75,0.692,0.953757,0.285714,1.28166
KLTGL-rbf,0.0288049,0.345412,0.366312,0.738,0.617915,"KernelLatentTimeGraphicalLasso(alpha=0.6, assu...",0.830968,185,-5412.0,2.2,0.0693642,0.227278,0.661972,1.33942,0.750583,0.692,0.930636,0.305195,1.27938
WP,,,1.3773,0.692,0.5,"WishartProcess(assume_centered=False, burn_in=...",0.817967,1000,-27117.7,,0.0,0.0,1.0,1.0,0.692,0.692,1.0,0.0,258.963


In [38]:
tmp = mm[["f1", "accuracy", "mean_rank_error", "MSE_precision", "time"]]

In [41]:
import os
from decimal import Decimal
def highlight_max(s):
    '''
    highlight the maximum in a Series yellow.
    '''
    m = s.max() if s.name in ["f1","accuracy"] else s.min()
    is_max = s == m

    s_ = s.copy()
    s_[is_max] = '\textbf{%s}' % ('%.3f' % Decimal(m) if isinstance(m, float) else m)
    return s_

path = "/home/fede/Dropbox (DIBRIS)/projects/graphical models in time/kernel LTGL/exps"
tmp.apply(highlight_max).fillna("-").to_latex(
    os.path.join(path, "results.tex"), float_format=lambda x: '%.3f' % x,
    escape=False)

In [30]:
mdl = kernel_time_graph_lasso_.KernelTimeGraphLasso(
    time_on_axis='last', assume_centered=0, verbose=0, rtol=1e-5, tol=1e-5,
    max_iter=500, rho=1./ np.sqrt(data_grid.shape[0]), update_rho_options=dict(mu=5))
mdl.fit(data_grid)
mdl.score(data_grid)

-6648.427421079336

In [31]:
X = np.vstack(data_list)
y = np.array([np.ones(x.shape[0]) *i for i, x in enumerate(data_list)]).flatten().astype(int)

In [37]:
reload(prox)
reload(time_graph_lasso_)
reload(kernel_time_graph_lasso_);

from functools import partial

from sklearn.model_selection import StratifiedKFold

from skopt.optimizer import optimizer; reload(optimizer)
from skopt import searchcv; reload(searchcv)
from skopt.space import Categorical
# include below until https://github.com/scikit-optimize/scikit-optimize/issues/718 is resolved
class BayesSearchCV(searchcv.BayesSearchCV):
    def _run_search(self, x): raise BaseException('Use newer skopt')

mdl = kernel_time_graph_lasso_.NewKernelTimeGraphLasso(
    alpha=0.5, psi='laplacian',
    assume_centered=0, verbose=0, rtol=1e-5, tol=1e-5,
    max_iter=500, rho=1./ np.sqrt(data_grid.shape[0]), update_rho_options=dict(mu=5),
    kernel=partial(kernels.ExpSineSquared, periodicity=np.pi), length_scale=2)

bscv = BayesSearchCV(mdl, search_spaces={
        'alpha': (1e-4, 1e+1, 'log-uniform'),  
        'length_scale': (1e-4, 1e+1, 'log-uniform'),
#         'kernel': Categorical([partial(kernels.ExpSineSquared, periodicity=np.pi),
#                                kernels.RBF]),  # categorical parameter
    },
    n_iter=50, n_points=3, cv=StratifiedKFold(3))

bscv.fit(X, y)
bscv.score(X, y)

-6664.273982392405

In [38]:
bscv.best_params_

{'alpha': 0.5792194050871016, 'length_scale': 0.38781388320536275}

In [None]:
reload(wishart_process_)
wp = wishart_process_.WishartProcess(time_on_axis='first', verbose=True).fit(data_list)

wp.likelihood(wp.D_map)

wp.loglikes_after_burnin.max()

wp.store_precision = True

wp.score(data_list)

In [None]:
mm = dff.xs(n_dim_obs, level='dim', axis=1).xs(T, level='time')
mm

In [None]:
np.abs(dff.estimator[100]['TGL'][10].precision_ - dff.estimator[100]['KTGL'][10].precision_)

In [None]:
from decimal import Decimal
' & '.join(['%.3f' % Decimal(i) for i in mm['MSE_precision']])

In [None]:
dff[[s for s in scores if s != 'estimator']].to_pickle("dff_setting_1.pkl")

In [None]:
l1 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LTGL ($\ell_2^2$)'].latent_])
l2 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LTGL ($\ell_1$)'].latent_])
l3 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LVGLASSO'].L])

l4 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LTGL ($\ell_2^2$)'].latent_])
l5 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LTGL ($\ell_1$)'].latent_])
l6 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LVGLASSO'].L])

In [None]:
l1,l2,l3,l4,l5,l6 = utils.load_pickle(filename="ells.pkl")

In [None]:
import collections
import matplotlib.pyplot as plt

f, (ax1, ax2) = plt.subplots(2,1, sharey=False, figsize=(10,5), dpi=600)

colors = ['white', 'lightblue', 'C7']
alpha = 0.95

counter=collections.Counter(l1)
ax1.bar(counter.keys(), np.array(counter.values())/len(l1), 
        alpha=alpha, width=0.24, label='LTGL ($\ell_2^2$)', color=colors[0], edgecolor='k')
counter=collections.Counter(l2)
ax1.bar(np.array(counter.keys())+0.25, np.array(counter.values())/len(l1), 
        alpha=alpha, width=0.24, label='LTGL ($\ell_1$)', color=colors[1], edgecolor='k')
counter=collections.Counter(l3)
ax1.bar(np.array(counter.keys())-0.25, np.array(counter.values())/len(l1), 
        alpha=alpha, width=0.24, label='LVGLASSO', color=colors[2], edgecolor='k')

ax1.set_xticks(range(0,30, 2))
#ax1.set_ylim(0,5)
ax1.axvline(20, c='r', ls='--')
ax1.set_xlabel(r'ranks of L obtained with ($p_2$)')
ax1.set_ylabel('frequency')
# ax1.set_xscale("log")
# ax1.set_xlim([10, 100])
ax1.xaxis.label.set_size(15)
ax1.yaxis.label.set_size(15)

#ax1.legend()
# ax0.legend(prop={'size': 10})
# ax0.set_title('bars with legend')


counter=collections.Counter(l4)
ax2.bar(counter.keys(), np.array(counter.values())/len(l4), 
        alpha=alpha, width=0.24, label='LTGL ($\ell_2^2$)', color=colors[0], edgecolor='k')
counter=collections.Counter(l5)
ax2.bar(np.array(counter.keys())+0.25,  
        np.array(counter.values())/len(l4), alpha=alpha, width=0.24, label='LTGL ($\ell_1$)', color=colors[1],
        edgecolor='k')
counter=collections.Counter(l6)
ax2.bar(np.array(counter.keys())-0.25,  
        np.array(counter.values())/len(l4), alpha=alpha, width=0.24, label='LVGLASSO', color=colors[2],
       edgecolor='k')

ax2.set_xticks(range(0,30,2))
# ax2.set_xlim(2.5,6.7)
ax2.set_xlabel(r'ranks of L obtained with ($p_1$)')
ax2.set_ylabel('frequency')
ax2.xaxis.label.set_size(15)
ax2.yaxis.label.set_size(15)
ax2.axvline(5, c='r', ls='--')
ax1.legend(loc='upper left', fontsize='x-large')
plt.tight_layout()
plt.show()

In [None]:
import matplotlib
f.savefig("ranks_distribution_vertical.pdf", dpi=600, transparent=True, bbox_inches='tight')

In [None]:
import collections
import matplotlib.pyplot as plt

f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10,2.6), dpi=600)

colors = ['white', 'lightblue', 'C7']
alpha = 0.5

counter=collections.Counter(l1)
ax1.plot(range(len(l1)), l1, 
        alpha=alpha, label='LTGL ($\ell_2^2$)', color=colors[0])
counter=collections.Counter(l2)
ax1.plot(np.arange(len(l1))+.2, l2, 
        alpha=alpha,  label='LTGL ($\ell_1$)', color=colors[1])
counter=collections.Counter(l3)
ax1.plot(np.arange(len(l1))+.4, l3,
        alpha=alpha, label='LVGLASSO', color=colors[2])

# ax1.set_xticks(range(15,25, 1))
#ax1.set_ylim(0,5)
ax1.axhline(20, c='r', ls='--')
ax1.set_xlabel(r'ranks of L obtained with ($p_2$)')
ax1.set_ylabel('frequency')
ax1.xaxis.label.set_size(15)
ax1.yaxis.label.set_size(15)

#ax1.legend()
# ax0.legend(prop={'size': 10})
# ax0.set_title('bars with legend')


counter=collections.Counter(l4)
ax2.plot(range(len(l4)), l4, 
        alpha=alpha,label='LTGL ($\ell_2^2$)', color=colors[0])
counter=collections.Counter(l5)
ax2.plot(np.arange(len(l4))+.2, l5,
        alpha=alpha,  label='LTGL ($\ell_1$)', color=colors[1])
counter=collections.Counter(l6)
ax2.plot(np.arange(len(l4))+.4, l6, alpha=alpha,label='LVGLASSO', color=colors[2])

# ax2.set_xticks(range(10))
# ax2.set_xlim(2.5,6.7)
ax2.set_xlabel(r'ranks of L obtained with ($p_1$)')
ax2.xaxis.label.set_size(15)
ax2.axhline(5, c='r', ls='--')
ax1.legend(loc='best', fontsize='large')
plt.tight_layout()
plt.show()