In [306]:
%matplotlib inline

import sys
import time
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.covariance import GraphicalLasso as sk_GL
from sklearn.covariance import empirical_covariance

sys.path.append('..')
from gglasso.solver.single_admm_solver import ADMM_SGL
from gglasso.solver.single_admm_solver import block_SGL
from helper.data_generation import time_varying_power_network, group_power_network, sample_covariance_matrix
from helper.model_selection import single_grid_search


from regain.covariance import GraphicalLasso as rg_GL

# Sampling from power networks

In [307]:
p = 100
N = 200
Sigma, Theta = group_power_network(p, K=5, M=2) #Theta is true precision matrix

S, samples = sample_covariance_matrix(Sigma, N)

S = S[0,:,:]
Theta = Theta[0,:,:] #true precision matrix

samples.shape

(5, 100, 200)

In [308]:
lambda1 = 0.01
Omega_0 = np.eye(p)

n_iter = 50000
tol_list = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8]

## Model solution Z

In [309]:
start = time.time()
model = rg_GL(alpha=lambda1, max_iter=n_iter, tol = 1e-10).fit(samples[0,:,:].T)
end = time.time()

hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
Z_time = "{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds)
print(Z_time)

00:00:00.21


In [310]:
Z = model.precision_

In [311]:
start = time.time()
sol, info = ADMM_SGL(S, lambda1, Omega_0, max_iter=n_iter, 
                     tol = 1e-10, rtol = 1e-10, stopping_criterion = "boyd",
                     verbose = False, latent = False)
end = time.time()

hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
Z_time = "{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds)
print(Z_time)

ADMM terminated after 176 iterations with accuracy 4.7601370439699934e-07
ADMM status: {'primal optimal', 'dual optimal'}
00:00:00.41


In [312]:
Z1 = sol["Theta"]

In [313]:
np.linalg.norm(Z1-Z)

0.0010338222877164305

In [314]:
# [1e-1, 5e-2, 25e-3, 1e-2,5e-3, 25e-4, 1e-3, 5e-4]

## Sklearn

In [315]:
cov_list = []
precision_list = []
time_list = []
for tol in tol_list:
    start = time.time()
    Z_n = sk_GL(alpha=lambda1, max_iter=n_iter, tol = tol).fit(samples[0,:,:].T)
    end = time.time()
    
    hours, rem = divmod(end-start, 3600)
    minutes, seconds = divmod(rem, 60)
    time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
    
    cov_list.append(Z_n.covariance_)
    precision_list.append(Z_n.precision_)
    
accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [316]:
str_tol = [str(x) for x in tol_list]
method_list = ["sklearn"]*len(accuracy_list)

df_sk = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_sk

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.07,0.011971,sklearn,0.1,0.1
1,00:00:00.06,0.001343,sklearn,0.01,0.01
2,00:00:00.05,0.001343,sklearn,0.001,0.001
3,00:00:00.08,0.001173,sklearn,0.0001,0.0001
4,00:00:00.08,0.001173,sklearn,1e-05,1e-05
5,00:00:00.15,0.001037,sklearn,1e-06,1e-06
6,00:00:00.33,0.001028,sklearn,1e-07,1e-07
7,00:00:00.45,0.001028,sklearn,1e-08,1e-08


## Regain

In [317]:
cov_list = []
precision_list = []
time_list = []
for tol in tol_list:
    start = time.time()
    Z_n = rg_GL(alpha=lambda1, max_iter=n_iter, tol = tol, rtol = tol).fit(samples[0,:,:].T)
    end = time.time()
    
    hours, rem = divmod(end-start, 3600)
    minutes, seconds = divmod(rem, 60)
    time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
    
    cov_list.append(Z_n.covariance_)
    precision_list.append(Z_n.precision_)

accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [318]:
str_tol = [str(x) for x in tol_list]
method_list = ["regain"]*len(accuracy_list)

df_rg = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_rg

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.02,6.982505,regain,0.1,0.1
1,00:00:00.02,6.982505,regain,0.01,0.01
2,00:00:00.06,1.188609,regain,0.001,0.001
3,00:00:00.08,0.106799,regain,0.0001,0.0001
4,00:00:00.14,0.010227,regain,1e-05,1e-05
5,00:00:00.17,0.000165,regain,1e-06,1e-06
6,00:00:00.21,0.000918,regain,1e-07,1e-07
7,00:00:00.25,0.001016,regain,1e-08,1e-08


## ADMM 

We run the algorithm 2 times because on the first iteration time costly numba initialization happens.

In [319]:
for i in [0,1]:
    cov_list = []
    precision_list = []
    time_list = []
    for tol in tol_list:
        start = time.time()
        sol, info = ADMM_SGL(S, lambda1, Omega_0, max_iter=n_iter, tol = tol, rtol = tol, stopping_criterion = "boyd",
                             verbose = False, latent = False)
        end = time.time()

        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

        cov_list.append(sol['Omega'])
        precision_list.append(sol['Theta'])

    accuracy_list = []
    for i in range(0, len(precision_list)):
        accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
        accuracy_list.append(accuracy)

ADMM terminated after 1 iterations with accuracy 2.626743753886294
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 2.626743753886294
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 2.626743753886294
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 8 iterations with accuracy 0.4452724731385852
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 28 iterations with accuracy 0.0494413429160997
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 56 iterations with accuracy 0.004783179246036787
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 85 iterations with accuracy 0.0004928442712129888
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 115 iterations with accuracy 4.916706498800297e-05
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 2.62674375388

In [320]:
str_tol = [str(x) for x in tol_list]
method_list = ["admm"]*len(accuracy_list)

df_admm = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_admm

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.01,9.607049,admm,0.1,0.1
1,00:00:00.00,9.607049,admm,0.01,0.01
2,00:00:00.00,9.607049,admm,0.001,0.001
3,00:00:00.02,3.590272,admm,0.0001,0.0001
4,00:00:00.06,0.558221,admm,1e-05,1e-05
5,00:00:00.14,0.059364,admm,1e-06,1e-06
6,00:00:00.22,0.007172,admm,1e-07,1e-07
7,00:00:00.31,0.001644,admm,1e-08,1e-08


## kkt_ADMM

We run the algorithm 2 times because on the first iteration time costly numba initialization happens.

In [321]:
for i in [0,1]:
    cov_list = []
    precision_list = []
    time_list = []
    for tol in tol_list:
        start = time.time()
        sol, info = ADMM_SGL(S, lambda1, Omega_0, max_iter=n_iter, tol = tol, stopping_criterion = "kkt",
                             verbose = False, latent = False)
        end = time.time()

        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

        cov_list.append(sol['Omega'])
        precision_list.append(sol['Theta'])

    accuracy_list = []
    for i in range(0, len(precision_list)):
        accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
        accuracy_list.append(accuracy)

ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.1
ADMM terminated after 2 iterations with accuracy 0.09435681699767277
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.01
ADMM terminated after 14 iterations with accuracy 0.009754336202586257
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.001
ADMM terminated after 38 iterations with accuracy 0.0009699014638720622
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.0001
ADMM terminated after 67 iterations with accuracy 9.32258353556232e-05
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 1e-05
ADMM terminated after 96 iterations with accuracy 9.822358674532827e-06
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optima

In [322]:
str_tol = [str(x) for x in tol_list]
method_list = ["kkt_admm"]*len(accuracy_list)

df_kkt = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_kkt

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.01,7.971542,kkt_admm,0.1,0.1
1,00:00:00.07,1.935066,kkt_admm,0.01,0.01
2,00:00:00.20,0.245395,kkt_admm,0.001,0.001
3,00:00:00.35,0.025737,kkt_admm,0.0001,0.0001
4,00:00:00.38,0.003667,kkt_admm,1e-05,1e-05
5,00:00:00.61,0.001294,kkt_admm,1e-06,1e-06
6,00:00:00.71,0.001053,kkt_admm,1e-07,1e-07
7,00:00:00.78,0.00103,kkt_admm,1e-08,1e-08


## Block ADMM

In [323]:
for i in [0,1]:
    cov_list = []
    precision_list = []
    time_list = []
    for tol in tol_list:
        start = time.time()
        sol = block_SGL(S, lambda1, Omega_0, max_iter=n_iter, tol = tol, rtol = tol, stopping_criterion = "boyd",
                             verbose = False)
        end = time.time()

        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

        cov_list.append(sol['Omega'])
        precision_list.append(sol['Theta'])

    accuracy_list = []
    for i in range(0, len(precision_list)):
        accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
        accuracy_list.append(accuracy)

ADMM terminated after 1 iterations with accuracy 2.626743753886294
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 2.626743753886294
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 2.626743753886294
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 8 iterations with accuracy 0.4452724731385852
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 28 iterations with accuracy 0.0494413429160997
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 56 iterations with accuracy 0.004783179246036787
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 85 iterations with accuracy 0.0004928442712129888
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 115 iterations with accuracy 4.916706498800297e-05
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 2.62674375388

In [324]:
str_tol = [str(x) for x in tol_list]
method_list = ["block_admm"]*len(accuracy_list)

df_block = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_block

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.01,9.607049,block_admm,0.1,0.1
1,00:00:00.01,9.607049,block_admm,0.01,0.01
2,00:00:00.01,9.607049,block_admm,0.001,0.001
3,00:00:00.02,3.590272,block_admm,0.0001,0.0001
4,00:00:00.09,0.558221,block_admm,1e-05,1e-05
5,00:00:00.16,0.059364,block_admm,1e-06,1e-06
6,00:00:00.21,0.007172,block_admm,1e-07,1e-07
7,00:00:00.30,0.001644,block_admm,1e-08,1e-08


## Block kkt_ADMM

In [325]:
for i in [0,1]:
    cov_list = []
    precision_list = []
    time_list = []
    for tol in tol_list:
        start = time.time()
        sol = block_SGL(S, lambda1, Omega_0, max_iter=n_iter, tol = tol, stopping_criterion = "kkt",
                             verbose = False)
        end = time.time()

        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

        cov_list.append(sol['Omega'])
        precision_list.append(sol['Theta'])

    accuracy_list = []
    for i in range(0, len(precision_list)):
        accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
        accuracy_list.append(accuracy)

ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.1
ADMM terminated after 2 iterations with accuracy 0.09435681699767277
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.01
ADMM terminated after 14 iterations with accuracy 0.009754336202586257
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.001
ADMM terminated after 38 iterations with accuracy 0.0009699014638720622
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.0001
ADMM terminated after 67 iterations with accuracy 9.32258353556232e-05
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 1e-05
ADMM terminated after 96 iterations with accuracy 9.822358674532827e-06
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optima

In [326]:
str_tol = [str(x) for x in tol_list]
method_list = ["kkt_block_admm"]*len(accuracy_list)

df_block_kkt = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_block_kkt

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.02,7.971542,kkt_block_admm,0.1,0.1
1,00:00:00.07,1.935066,kkt_block_admm,0.01,0.01
2,00:00:00.20,0.245395,kkt_block_admm,0.001,0.001
3,00:00:00.27,0.025737,kkt_block_admm,0.0001,0.0001
4,00:00:00.47,0.003667,kkt_block_admm,1e-05,1e-05
5,00:00:00.58,0.001294,kkt_block_admm,1e-06,1e-06
6,00:00:00.70,0.001053,kkt_block_admm,1e-07,1e-07
7,00:00:00.81,0.00103,kkt_block_admm,1e-08,1e-08


## Data formating

In [327]:
frames = [df_sk, df_rg, df_admm, df_kkt, df_block, df_block_kkt]

df = pd.concat(frames)

df["str_tol"].replace({"0.0001": "1e-04"}, inplace=True)
dist = np.array(df['distance'])
df['log_distance'] = np.log(dist)

In [328]:
df.sort_values(by=['log_distance'], ascending = False)

Unnamed: 0,time,distance,method,tol_rate,str_tol,log_distance
2,00:00:00.01,9.607049,block_admm,0.001,0.001,2.262497
2,00:00:00.00,9.607049,admm,0.001,0.001,2.262497
1,00:00:00.00,9.607049,admm,0.01,0.01,2.262497
0,00:00:00.01,9.607049,admm,0.1,0.1,2.262497
0,00:00:00.01,9.607049,block_admm,0.1,0.1,2.262497
1,00:00:00.01,9.607049,block_admm,0.01,0.01,2.262497
0,00:00:00.01,7.971542,kkt_admm,0.1,0.1,2.075878
0,00:00:00.02,7.971542,kkt_block_admm,0.1,0.1,2.075878
0,00:00:00.02,6.982505,regain,0.1,0.1,1.943408
1,00:00:00.02,6.982505,regain,0.01,0.01,1.943408


## Tolerance plot

In [329]:
fig = px.line(df, x="str_tol", y="distance", color= "method", log_y = True,
              labels={
                     "str_tol": "Tolerance rate",
                     "distance": "Log_distance",
                     "method": "method"
                 },
              template = "plotly_white",
              title="Log-distance between Z and Z' with respect to ADMM convergence rates")
fig.show()

In [330]:
df = df.reset_index(drop=True)

for i, row in df.iterrows():
    sec_time = df['time'][i][-5:]
    df.at[i,'sec_time'] = sec_time
df['sec_time'] = pd.to_numeric(df['sec_time'])
df.head()

Unnamed: 0,time,distance,method,tol_rate,str_tol,log_distance,sec_time
0,00:00:00.07,0.011971,sklearn,0.1,0.1,-4.425309,0.07
1,00:00:00.06,0.001343,sklearn,0.01,0.01,-6.612788,0.06
2,00:00:00.05,0.001343,sklearn,0.001,0.001,-6.612788,0.05
3,00:00:00.08,0.001173,sklearn,0.0001,0.0001,-6.748085,0.08
4,00:00:00.08,0.001173,sklearn,1e-05,1e-05,-6.748085,0.08


## Time plot

In [331]:
fig = px.line(df, x="sec_time", y="distance", color= "method", log_y = True, 
#               text = "str_tol",
        labels={"sec_time": "Time, s",
                "distance": "Log_distance",
                "method": "method"
                 },
        template = "plotly_white",
              title='ADMM perfomance comparison')
fig.update_traces(textposition='top center')
fig.show()

In [332]:
# fig_1 = px.scatter(x=a['time'], y=a['accuracy'], color= "method")
# fig_1.show()

fig_1 = px.scatter(df, x="sec_time", 
                   y='distance', color="method", 
#                    text = "sec_time", 
                   log_y = True, template = "plotly_white",
                   text = "str_tol",
                 size='sec_time',
                  title='ADMM perfomance comparison')
fig_1.update_traces(textposition='top center')
fig_1.show()