In [393]:
# !pip install numba==0.48
# !pip install regain
# !pip install plotly

In [422]:
%matplotlib inline

import sys
import time
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.covariance import GraphicalLasso as sk_GL
from sklearn.covariance import empirical_covariance

sys.path.append('..')
from gglasso.solver.single_admm_solver import ADMM_SGL
#from helper.data_generation import time_varying_power_network, group_power_network, sample_covariance_matrix

from regain.covariance import GraphicalLasso as rg_GL
#from regain.covariance.graphical_lasso_ import GraphicalLasso
# from regain.covariance import LatentTimeGraphicalLasso
# from regain.covariance import latent_graphical_lasso_, latent_time_graphical_lasso_

# Sample data

In [423]:
true_cov = np.array([[0.8, 0.0, 0.2, 0.0],
                     [0.0, 0.4, 0.0, 0.0],
                     [0.2, 0.0, 0.3, 0.1],
                     [0.0, 0.0, 0.1, 0.7]])

In [424]:
np.random.seed(243)
X = np.random.multivariate_normal(mean=[0, 0, 0, 0], cov=true_cov, size=100)

X_trans = X.T  #Regain and Sklearn transpose data during training
X_cov = np.cov(X_trans) #input for ADMM

X_cov.shape, X_trans.shape

((4, 4), (4, 100))

X is a randomly generated data which can serve as an input to sklearn and regain methods.
ADMM works with covarince matrix, so we have to extract it first.

# Model solution Z

In [425]:
Z = sk_GL(max_iter=1000, tol = 1e-10).fit(X)
Z.get_params()

{'alpha': 0.01,
 'assume_centered': False,
 'enet_tol': 0.0001,
 'max_iter': 1000,
 'mode': 'cd',
 'tol': 1e-10,
 'verbose': False}

In [426]:
Z.precision_

array([[ 1.53284181,  0.2366127 , -0.59705507,  0.22230787],
       [ 0.2366127 ,  2.26999954,  0.22522913,  0.05631177],
       [-0.59705507,  0.22522913,  3.0262301 , -0.28105847],
       [ 0.22230787,  0.05631177, -0.28105847,  1.61698823]])

# Import sklearn

In [427]:
tol_list = [1e-4, 1e-5, 1e-6, 1e-7]
# tol_list = np.linspace(1e-10, 1e-4, num=100)
# tol_list = [1e-1, 1e-2]
cov_list = []
precision_list = []
time_list = []
for tol in tol_list:
    start = time.time()
    rg = sk_GL(tol=tol, max_iter = 1000).fit(X)
    end = time.time()
    cov_list.append(rg.covariance_)
    precision_list.append(rg.precision_)
    time_list.append(end-start)

In [428]:
accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = 1 - np.linalg.norm(Z.precision_ - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [429]:
method_list = ["sklearn"]*len(accuracy_list)

In [430]:
str_tol = [str(x) for x in tol_list]

In [431]:
df_sk = pd.DataFrame(data={'time': time_list, 
                           'accuracy': accuracy_list,
                           'method':method_list,
                            'tol_rate': str_tol})
df_sk.head()

Unnamed: 0,time,accuracy,method,tol_rate
0,0.004803,0.999993,sklearn,0.0001
1,0.002603,0.999993,sklearn,1e-05
2,0.001518,0.999993,sklearn,1e-06
3,0.002371,1.0,sklearn,1e-07


# Import Regain GL

In [432]:
tol_list = [1e-4, 1e-5, 1e-6, 1e-7]
# tol_list = np.linspace(1e-10, 1e-4, num=100)
# tol_list = [1e-1, 1e-2]
cov_list = []
precision_list = []
time_list = []
for tol in tol_list:
    start = time.time()
    rg = rg_GL(alpha=0.01, rho=1.,tol=tol, rtol=1e-7, max_iter=1000, verbose=False).fit(X)
    end = time.time()
    cov_list.append(rg.covariance_)
    precision_list.append(rg.precision_)
    time_list.append(end-start)

In [433]:
accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = 1 - np.linalg.norm(Z.precision_ - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [434]:
method_list = ["regain"]*len(accuracy_list)

In [435]:
df_rg = pd.DataFrame(data={'time': time_list, 
                           'accuracy': accuracy_list,
                           'method': method_list,
                            'tol_rate': str_tol})
df_rg.head()

Unnamed: 0,time,accuracy,method,tol_rate
0,0.004609,0.997916,regain,0.0001
1,0.003277,0.999697,regain,1e-05
2,0.003674,0.999976,regain,1e-06
3,0.003002,0.999999,regain,1e-07


# Import ADMM SGL

In [436]:
# Omega_0 = np.eye(len(X_cov))
Omega_0 = np.eye(len(X_cov))
lambda1 = 0.01

In [437]:
tol_list = [1e-4, 1e-5, 1e-6, 1e-7]
# tol_list = [1e-1, 1e-2]
cov_list = []
precision_list = []
time_list = []
for tol in tol_list:
    start = time.time()
#     sol, info = ADMM_SGL(X_cov, lambda1, Omega_0, max_iter=1000, tol = tol, verbose = False, latent = False)
    sol, info = ADMM_SGL(X_cov, lambda1, Omega_0, max_iter=1000, tol = tol, verbose = False, latent = False)
    end = time.time()
    cov_list.append(sol['Omega'])
    precision_list.append(sol['Theta'])
    time_list.append(end-start)

ADMM terminated after 54 iterations with accuracy 0.0009581011918730356
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 77 iterations with accuracy 0.00012422398836316805
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 89 iterations with accuracy 4.2885779226089456e-05
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 92 iterations with accuracy 3.2875226093742374e-05
ADMM status: {'dual optimal', 'primal optimal'}


In [438]:
accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = 1 - np.linalg.norm(Z.precision_ - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [439]:
method_list = ["admm"]*len(accuracy_list)

In [440]:
df_admm = pd.DataFrame(data={'time': time_list, 
                             'accuracy': accuracy_list,
                             'method':method_list,
                            'tol_rate': str_tol})
df_admm.head()

Unnamed: 0,time,accuracy,method,tol_rate
0,0.010292,0.947838,admm,0.0001
1,0.010935,0.954838,admm,1e-05
2,0.00891,0.955482,admm,1e-06
3,0.017366,0.95556,admm,1e-07


# Visualization

In [441]:
frames = [df_sk, df_rg, df_admm]

df = pd.concat(frames)
df

Unnamed: 0,time,accuracy,method,tol_rate
0,0.004803,0.999993,sklearn,0.0001
1,0.002603,0.999993,sklearn,1e-05
2,0.001518,0.999993,sklearn,1e-06
3,0.002371,1.0,sklearn,1e-07
0,0.004609,0.997916,regain,0.0001
1,0.003277,0.999697,regain,1e-05
2,0.003674,0.999976,regain,1e-06
3,0.003002,0.999999,regain,1e-07
0,0.010292,0.947838,admm,0.0001
1,0.010935,0.954838,admm,1e-05


In [442]:
#normalised accuracy [0,1]
norm_ac = np.array(df['accuracy'])
df['accuracy'] = (norm_ac - np.min(norm_ac))/np.ptp(norm_ac)
df

Unnamed: 0,time,accuracy,method,tol_rate
0,0.004803,0.999865,sklearn,0.0001
1,0.002603,0.999865,sklearn,1e-05
2,0.001518,0.999865,sklearn,1e-06
3,0.002371,1.0,sklearn,1e-07
0,0.004609,0.960045,regain,0.0001
1,0.003277,0.994192,regain,1e-05
2,0.003674,0.999543,regain,1e-06
3,0.003002,0.999983,regain,1e-07
0,0.010292,0.0,admm,0.0001
1,0.010935,0.13421,admm,1e-05


In [443]:
# fig = px.line(df, x="time", y="accuracy", color= "method",text="tol_rate", title='Perfomance comparison')
fig = px.line(df, x="time", y="accuracy", color= "method", title='Perfomance comparison')
fig.show()

# Import LatentTimeGraphicalLasso

In [416]:
# np.random.seed(243)

# tau = 0.1 #mu1 in ADMM_SGL
# alpha = 0.1 # lambda in ADMM_SGL

# sol = latent_graphical_lasso_.latent_graphical_lasso(
#     true_cov, alpha=alpha, tau=tau, tol=1e-5, rtol=1e-5,
#     rho=1. / true_cov.shape[0], verbose=0, max_iter=500)

# prec, cov, n_iter = sol[0], sol[1], sol[2]