In [1]:
import sys
import time
import numpy as np
import pandas as pd
import itertools
import plotly.express as px

from sklearn.covariance import GraphicalLasso as sk_GL
from sklearn.covariance import empirical_covariance

sys.path.append('..')
from gglasso.solver.single_admm_solver import ADMM_SGL
from gglasso.solver.single_admm_solver import block_SGL
from gglasso.helper.data_generation import time_varying_power_network, group_power_network, sample_covariance_matrix
from gglasso.helper.model_selection import single_grid_search
from gglasso.benchmarks import models_to_dict, sklearn_time_benchmark, admm_time_benchmark

from regain.covariance import GraphicalLasso as rg_GL

In [2]:
p = 100
N = 200
Omega_0 = np.eye(p)
lambda1 = 0.01
tol_list = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8]
enet_list = [1]

Sigma, Theta = group_power_network(p, K=5, M=2)  # Theta is true precision matrix

S, samples = sample_covariance_matrix(Sigma, N)

S = S[0, :, :]
Theta = Theta[0, :, :]  # true precision matrix

# Model solution Z

In [3]:
start = time.time()
Z = rg_GL(alpha=lambda1, max_iter=50000, tol = 1e-10).fit(samples[0,:,:].T)
end = time.time()

hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
Z_time = "{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds)
Z = Z.precision_
print(Z_time)

00:00:00.18


In [4]:
models = models_to_dict(models=["sklearn", "regain"], tol_list=tol_list, rtol_list=tol_list, enet_list=enet_list, 
                        max_iter=50000)

# Solution Z_i

In [5]:
%%capture
sk_time, sk_accuracy = sklearn_time_benchmark(models, X=samples[0, :, :].T, Z=Z)
admm_time, admm_accuracy = admm_time_benchmark(S=S, Omega_0=Omega_0, Z=Z, lambda1=lambda1, 
                                               method_list=["single", "block"],
                                               stop_list=['boyd', 'kkt'], 
                                               tol_list=tol_list, 
                                               rtol_list=tol_list)

In [11]:
sk_accuracy

{'0_0_precision_sklearn_tol_0.1_enet_1': 0.5604009872265182,
 '0_0_precision_sklearn_tol_0.01_enet_1': 0.5603329305371371,
 '0_0_precision_sklearn_tol_0.001_enet_1': 0.5603362459788321,
 '0_0_precision_sklearn_tol_0.0001_enet_1': 0.5603362459788321,
 '0_0_precision_sklearn_tol_1e-05_enet_1': 0.560336144162861,
 '0_0_precision_sklearn_tol_1e-06_enet_1': 0.56033614697553,
 '0_0_precision_sklearn_tol_1e-07_enet_1': 0.56033614697553,
 '0_0_precision_sklearn_tol_1e-08_enet_1': 0.56033614697553,
 '0_0_precision_regain_tol_0.1_rtol_0.1': 0.2871162306138639,
 '0_0_precision_regain_tol_0.1_rtol_0.01': 0.2871162306138639,
 '0_0_precision_regain_tol_0.1_rtol_0.001': 0.2871162306138639,
 '0_0_precision_regain_tol_0.1_rtol_0.0001': 0.2871162306138639,
 '0_0_precision_regain_tol_0.1_rtol_1e-05': 0.2871162306138639,
 '0_0_precision_regain_tol_0.1_rtol_1e-06': 0.2871162306138639,
 '0_0_precision_regain_tol_0.1_rtol_1e-07': 0.2871162306138639,
 '0_0_precision_regain_tol_0.1_rtol_1e-08': 0.2871162306138

# Data formating

In [7]:
df_sk = pd.DataFrame(data={'name': sk_time.keys(), 'time': sk_time.values(), "accuracy": sk_accuracy.values()})
df_admm = pd.DataFrame(data={'name': admm_time.keys(), 'time': admm_time.values(), "accuracy": admm_accuracy.values()})
df = pd.concat([df_sk, df_admm])
df.head()

ValueError: arrays must all be same length

In [8]:
df['split'] = df['name'].str.split('_')
df[["iter", "method", "tol_str", "tol", "rtol_str", "rtol"]] = pd.DataFrame(df['split'].tolist(), index=df['split'].index)
df = df.drop(["name", 'split', "tol_str", "rtol_str"], axis=1)
df.head()

NameError: name 'df' is not defined

Here in sklearn, "rtol" corresponds to "enet_tol", see [sklearn.covariance.graphical_lasso](https://scikit-learn.org/stable/modules/generated/sklearn.covariance.graphical_lasso.html)

In [9]:
fig = px.line(df, x="tol", y="accuracy", color= "method", 
              log_y = True,
              labels={
                     "tol": "Tolerance rate",
                     "accuracy": "Log_distance",
                     "method": "method"
                 },
              template = "plotly_white",
              title="Log-distance between Z and Z' with respect to ADMM convergence rates")
fig.show()

NameError: name 'df' is not defined