# Kernel Time-varying Graphical Lasso
More than 1-Markovian!

In [1]:
%matplotlib inline
from __future__ import print_function, division

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from itertools import product
from sklearn.datasets import make_sparse_spd_matrix
from sklearn.datasets.base import Bunch
from sklearn.utils.extmath import squared_norm
from sklearn.covariance import GraphLasso, empirical_covariance
from sklearn.datasets.base import Bunch
from sklearn.model_selection import GridSearchCV, ShuffleSplit

from regain import prox; reload(prox)
from regain.covariance import time_graph_lasso_; reload(time_graph_lasso_);
from regain import utils; reload(utils)
import time

In [29]:
from regainpr.covariance import time_graph_lasso_kernel_; reload(time_graph_lasso_kernel_);
from regainpr import datasets; reload(datasets);

In [30]:
def tgl_results(data_grid, K, **params):
    mdl = time_graph_lasso_.TimeGraphLasso(
        time_on_axis='last', assume_centered=0, verbose=0, rtol=1e-5, tol=1e-5,
        max_iter=500, rho=1./ np.sqrt(data_grid.shape[0]))

    tic = time.time()
    ll = mdl.set_params(**params).fit(data_grid)
    tac = time.time()
    iterations = ll.n_iter_
    F1score = utils.structure_error(K, ll.precision_)['f1']
    MSE_precision = utils.error_norm(K, ll.precision_)
    ss = utils.structure_error(K, ll.precision_)#, thresholding=1, eps=1e-5)

    res = dict(n_dim_obs=K.shape[1],
               time=tac-tic,
               iterations=iterations,
               F1score=F1score,
               MSE_precision=MSE_precision,
               likelihood=mdl.score(data_grid),
               estimator=ll)
    res = dict(res, **ss)
    return res

def ktgl_results(data_grid, K, **params):
    mdl = time_graph_lasso_kernel_.KernelTimeGraphLasso(
        time_on_axis='last', assume_centered=0, verbose=0, rtol=1e-5, tol=1e-5,
        max_iter=500, rho=1./ np.sqrt(data_grid.shape[0]))

    tic = time.time()
    ll = mdl.set_params(**params).fit(data_grid)
    tac = time.time()
    iterations = ll.n_iter_
    F1score = utils.structure_error(K, ll.precision_)['f1']
    MSE_precision = utils.error_norm(K, ll.precision_)
    ss = utils.structure_error(K, ll.precision_)#, thresholding=1, eps=1e-5)

    res = dict(n_dim_obs=K.shape[1],
               time=tac-tic,
               iterations=iterations,
               F1score=F1score,
               MSE_precision=MSE_precision,
               likelihood=mdl.score(data_grid),
               estimator=ll)
    res = dict(res, **ss)
    return res

In [31]:
# setting 1
alpha = 0.45 #0.0025
beta = 0 # 1000

n_samples = 100
T = 10
n_dim_obs = 100

k = (n_dim_obs, T)

np.random.seed(20)

reload(datasets)
data = {(dim, T) : datasets.make_dataset(
    update_theta='l2', normalize_starting_matrices=True,
    n_samples=n_samples, n_dim_lat=0, n_dim_obs=dim,  T=T, epsilon=1e-1,
    proportional=True, degree=2, keep_sparsity=True)
    for dim in [n_dim_obs]}

In [32]:
K = data[k].thetas
print ([(i!=0).sum() for i in K])

[480, 480, 480, 480, 480, 480, 480, 480, 480, 480]


In [33]:
(data[k].thetas == 0).sum() / (n_dim_obs ** 2 * T)

0.952

In [34]:
print([np.linalg.norm(data[k].thetas[i] - data[k].thetas[i+1]) for i in range(T-1)])
print([np.linalg.norm(data[k].ells[i] - data[k].ells[i+1]) for i in range(T-1)])

[0.08693145744887006, 0.0911228876833933, 0.0913770764418117, 0.08856328541201983, 0.08873366888607306, 0.09016180776209934, 0.08849098911088601, 0.08821535520955946, 0.0874366725232461]
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [35]:
# prepare dataframe for results
n_dims = [n_dim_obs]
n_times = [T]
methods = ['TGL', 'KTGL']
scores = sorted([
    "MSE_precision", 'estimator',
    'time', 'iterations', 'precision', 'recall', 'accuracy', 'balanced_accuracy',
    'f1', 'npv', 'prevalence', 'miss_rate', 'likelihood',
    'specificity', 'plr', 'nlr'])

cols = pd.MultiIndex.from_product([scores, n_dims], names=('score', 'dim'))
rows = pd.MultiIndex.from_product([methods, n_times], names=('method', 'time'))

dff = pd.DataFrame(columns=cols, index=rows)
idx = pd.IndexSlice

In [72]:
from sklearn.gaussian_process import kernels
rbf = kernels.RBF(length_scale=0.5)
kernel = rbf(np.arange(T)[:,None])
kernel

array([[1.00000000e+00, 1.35335283e-01, 3.35462628e-04, 1.52299797e-08,
        1.26641655e-14, 1.92874985e-22, 5.38018616e-32, 2.74878501e-43,
        2.57220937e-56, 4.40853133e-71],
       [1.35335283e-01, 1.00000000e+00, 1.35335283e-01, 3.35462628e-04,
        1.52299797e-08, 1.26641655e-14, 1.92874985e-22, 5.38018616e-32,
        2.74878501e-43, 2.57220937e-56],
       [3.35462628e-04, 1.35335283e-01, 1.00000000e+00, 1.35335283e-01,
        3.35462628e-04, 1.52299797e-08, 1.26641655e-14, 1.92874985e-22,
        5.38018616e-32, 2.74878501e-43],
       [1.52299797e-08, 3.35462628e-04, 1.35335283e-01, 1.00000000e+00,
        1.35335283e-01, 3.35462628e-04, 1.52299797e-08, 1.26641655e-14,
        1.92874985e-22, 5.38018616e-32],
       [1.26641655e-14, 1.52299797e-08, 3.35462628e-04, 1.35335283e-01,
        1.00000000e+00, 1.35335283e-01, 3.35462628e-04, 1.52299797e-08,
        1.26641655e-14, 1.92874985e-22],
       [1.92874985e-22, 1.26641655e-14, 1.52299797e-08, 3.35462628e-04,
   

In [92]:
beta = 0.5
rng = np.full(T-1, beta)

kernel = np.diag(rng,1) +np.diag(rng,-1) + np.eye(T)

kernel

array([[1. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0.5, 1. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0.5, 1. , 0.5, 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0.5, 1. , 0.5, 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0.5, 1. , 0.5, 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0.5, 1. , 0.5, 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0.5, 1. , 0.5, 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0.5, 1. , 0.5, 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5, 1. , 0.5],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5, 1. ]])

In [93]:
# setting 1
alpha = 0.361 #289 #0.0025
# beta = kernel[0,1]

for i, (k, res) in enumerate(sorted(data.items())[:5]):
    dim = k[0]
    print("Start with: dim=%d, T=%d (it %d)" % (k[0],k[1], i))
    data_list = res.data
    K = res.thetas
    data_grid = np.array(data_list).transpose(1,2,0)  # to use it later for grid search

    print("starting TGL ...\r", end='')
    res = tgl_results(data_grid, K, beta=beta, alpha=alpha)
    dff.loc[idx['TGL', k[1]], idx[:, k[0]]] = [res[x] for x in scores]
    
    print("starting KTGL ...\r", end='')
    res = ktgl_results(data_grid, K, kernel=kernel, alpha=alpha)
    dff.loc[idx['KTGL', k[1]], idx[:, k[0]]] = [res[x] for x in scores]

Start with: dim=100, T=10 (it 0)
starting KTGL ...

In [94]:
mm = dff.xs(n_dim_obs, level='dim', axis=1).xs(T, level='time')
# mm['likelihood']
mm

score,MSE_precision,accuracy,balanced_accuracy,estimator,f1,iterations,likelihood,miss_rate,nlr,npv,plr,precision,prevalence,recall,specificity,time
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
TGL,0.116255,0.12632,0.538563,"TimeGraphLasso(alpha=0.361, assume_centered=0,...",0.0985183,106,-91038.0,0.00541667,0.0656231,0.996702,1.08406,0.051826,0.048,0.994583,0.082542,2.17929
KTGL,0.116254,0.1266,0.53871,"KernelTimeGraphLasso(alpha=0.361, assume_cente...",0.0985468,119,-91038.1,0.00541667,0.0653901,0.996714,1.08441,0.0518417,0.048,0.994583,0.0828361,4.08452


In [104]:
np.abs(dff.estimator[100]['TGL'][10].precision_ - dff.estimator[100]['KTGL'][10].precision_)

array([[[7.67158676e-06, 5.10945376e-06, 1.63747381e-05, ...,
         2.45893531e-05, 6.87941458e-06, 1.74204470e-05],
        [5.10945376e-06, 3.24269850e-06, 0.00000000e+00, ...,
         5.57347990e-05, 2.97723100e-05, 2.35970008e-06],
        [1.63747381e-05, 0.00000000e+00, 3.73031631e-06, ...,
         3.08820034e-05, 5.78244088e-06, 2.52879074e-05],
        ...,
        [2.45893531e-05, 5.57347990e-05, 3.08820034e-05, ...,
         2.35548626e-05, 2.48699872e-05, 4.15581528e-07],
        [6.87941458e-06, 2.97723100e-05, 5.78244088e-06, ...,
         2.48699872e-05, 1.44069894e-05, 4.90696665e-07],
        [1.74204470e-05, 2.35970008e-06, 2.52879074e-05, ...,
         4.15581528e-07, 4.90696665e-07, 2.34622589e-05]],

       [[6.48759374e-06, 3.99265242e-06, 4.90111504e-06, ...,
         2.35680591e-05, 2.05623719e-05, 1.91715804e-06],
        [3.99265242e-06, 7.21150680e-06, 3.80772351e-05, ...,
         1.85119035e-05, 1.94628003e-05, 1.80587662e-05],
        [4.90111504e-06, 

In [None]:
from decimal import Decimal
' & '.join(['%.3f' % Decimal(i) for i in mm['MSE_precision']])

In [None]:
dff[[s for s in scores if s != 'estimator']].to_pickle("dff_setting_1.pkl")

In [None]:
l1 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LTGL ($\ell_2^2$)'].latent_])
l2 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LTGL ($\ell_1$)'].latent_])
l3 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LVGLASSO'].L])

l4 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LTGL ($\ell_2^2$)'].latent_])
l5 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LTGL ($\ell_1$)'].latent_])
l6 = ([np.linalg.matrix_rank(r) for r in mm.estimator['LVGLASSO'].L])

In [None]:
l1,l2,l3,l4,l5,l6 = utils.load_pickle(filename="ells.pkl")

In [None]:
import collections
import matplotlib.pyplot as plt

f, (ax1, ax2) = plt.subplots(2,1, sharey=False, figsize=(10,5), dpi=600)

colors = ['white', 'lightblue', 'C7']
alpha = 0.95

counter=collections.Counter(l1)
ax1.bar(counter.keys(), np.array(counter.values())/len(l1), 
        alpha=alpha, width=0.24, label='LTGL ($\ell_2^2$)', color=colors[0], edgecolor='k')
counter=collections.Counter(l2)
ax1.bar(np.array(counter.keys())+0.25, np.array(counter.values())/len(l1), 
        alpha=alpha, width=0.24, label='LTGL ($\ell_1$)', color=colors[1], edgecolor='k')
counter=collections.Counter(l3)
ax1.bar(np.array(counter.keys())-0.25, np.array(counter.values())/len(l1), 
        alpha=alpha, width=0.24, label='LVGLASSO', color=colors[2], edgecolor='k')

ax1.set_xticks(range(0,30, 2))
#ax1.set_ylim(0,5)
ax1.axvline(20, c='r', ls='--')
ax1.set_xlabel(r'ranks of L obtained with ($p_2$)')
ax1.set_ylabel('frequency')
# ax1.set_xscale("log")
# ax1.set_xlim([10, 100])
ax1.xaxis.label.set_size(15)
ax1.yaxis.label.set_size(15)

#ax1.legend()
# ax0.legend(prop={'size': 10})
# ax0.set_title('bars with legend')


counter=collections.Counter(l4)
ax2.bar(counter.keys(), np.array(counter.values())/len(l4), 
        alpha=alpha, width=0.24, label='LTGL ($\ell_2^2$)', color=colors[0], edgecolor='k')
counter=collections.Counter(l5)
ax2.bar(np.array(counter.keys())+0.25,  
        np.array(counter.values())/len(l4), alpha=alpha, width=0.24, label='LTGL ($\ell_1$)', color=colors[1],
        edgecolor='k')
counter=collections.Counter(l6)
ax2.bar(np.array(counter.keys())-0.25,  
        np.array(counter.values())/len(l4), alpha=alpha, width=0.24, label='LVGLASSO', color=colors[2],
       edgecolor='k')

ax2.set_xticks(range(0,30,2))
# ax2.set_xlim(2.5,6.7)
ax2.set_xlabel(r'ranks of L obtained with ($p_1$)')
ax2.set_ylabel('frequency')
ax2.xaxis.label.set_size(15)
ax2.yaxis.label.set_size(15)
ax2.axvline(5, c='r', ls='--')
ax1.legend(loc='upper left', fontsize='x-large')
plt.tight_layout()
plt.show()

In [None]:
import matplotlib
f.savefig("ranks_distribution_vertical.pdf", dpi=600, transparent=True, bbox_inches='tight')

In [None]:
import collections
import matplotlib.pyplot as plt

f, (ax1, ax2) = plt.subplots(1, 2, sharey=True, figsize=(10,2.6), dpi=600)

colors = ['white', 'lightblue', 'C7']
alpha = 0.5

counter=collections.Counter(l1)
ax1.plot(range(len(l1)), l1, 
        alpha=alpha, label='LTGL ($\ell_2^2$)', color=colors[0])
counter=collections.Counter(l2)
ax1.plot(np.arange(len(l1))+.2, l2, 
        alpha=alpha,  label='LTGL ($\ell_1$)', color=colors[1])
counter=collections.Counter(l3)
ax1.plot(np.arange(len(l1))+.4, l3,
        alpha=alpha, label='LVGLASSO', color=colors[2])

# ax1.set_xticks(range(15,25, 1))
#ax1.set_ylim(0,5)
ax1.axhline(20, c='r', ls='--')
ax1.set_xlabel(r'ranks of L obtained with ($p_2$)')
ax1.set_ylabel('frequency')
ax1.xaxis.label.set_size(15)
ax1.yaxis.label.set_size(15)

#ax1.legend()
# ax0.legend(prop={'size': 10})
# ax0.set_title('bars with legend')


counter=collections.Counter(l4)
ax2.plot(range(len(l4)), l4, 
        alpha=alpha,label='LTGL ($\ell_2^2$)', color=colors[0])
counter=collections.Counter(l5)
ax2.plot(np.arange(len(l4))+.2, l5,
        alpha=alpha,  label='LTGL ($\ell_1$)', color=colors[1])
counter=collections.Counter(l6)
ax2.plot(np.arange(len(l4))+.4, l6, alpha=alpha,label='LVGLASSO', color=colors[2])

# ax2.set_xticks(range(10))
# ax2.set_xlim(2.5,6.7)
ax2.set_xlabel(r'ranks of L obtained with ($p_1$)')
ax2.xaxis.label.set_size(15)
ax2.axhline(5, c='r', ls='--')
ax1.legend(loc='best', fontsize='large')
plt.tight_layout()
plt.show()