In [6]:
%matplotlib inline

import sys
import time
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.covariance import GraphicalLasso as sk_GL
from sklearn.covariance import empirical_covariance

sys.path.append('..')
from gglasso.solver.single_admm_solver import ADMM_SGL
from helper.data_generation import time_varying_power_network, group_power_network, sample_covariance_matrix
from helper.model_selection import single_grid_search


from regain.covariance import GraphicalLasso as rg_GL

# Sampling from power networks

In [92]:
p = 100
N = 200
Sigma, Theta = group_power_network(p, K=5, M=2) #Theta is true precision matrix

S, samples = sample_covariance_matrix(Sigma, N)

S = S[0,:,:]
Theta = Theta[0,:,:] #true precision matrix

samples.shape

(5, 100, 200)

In [93]:
lambda_list = [0.1, 0.05, 0.025, 0.01, 0.001]
Omega_0 = np.eye(p)

n_iter = 1000000
tol_list = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8]

cov_list = []
precision_list = []
time_list = []
str_tol = []
str_lambda = []

## Model solution Z

In [94]:
start = time.time()
model = rg_GL(alpha=0.001, max_iter=n_iter, tol = 1e-10, rtol = 1e-10).fit(samples[0,:,:].T)
end = time.time()

hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
Z_time = "{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds)
print(Z_time)

00:00:01.39


In [95]:
start = time.time()
model_1 = sk_GL(alpha=0.001, max_iter=n_iter, tol = 1e-10).fit(samples[0,:,:].T)
end = time.time()

hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
Z_time = "{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds)
print(Z_time)

00:00:04.51


In [96]:
np.linalg.norm(model.precision_ - model_1.precision_)

6.814286449561827e-07

In [69]:
Z = model.precision_

## Sklearn

In [70]:
for lambda1 in lambda_list:
    for tol in tol_list:
        start = time.time()
        Z_n = sk_GL(alpha=lambda1, max_iter=n_iter, tol = tol).fit(samples[0,:,:].T)
        end = time.time()

        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

        cov_list.append(Z_n.covariance_)
        precision_list.append(Z_n.precision_)
        str_tol.append(tol)
        str_lambda.append(lambda1)
    
accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [71]:
str_tol = [str(x) for x in str_tol]
str_lambda = [str(x) for x in str_lambda]
method_list = ["sklearn"]*len(str_tol)

df_sk = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'str_tol': str_tol,
                           "str_lambda" : str_lambda
                          }
                    )
df_sk.head()

Unnamed: 0,time,distance,method,str_tol,str_lambda
0,00:00:00.06,21.340258,sklearn,0.1,0.1
1,00:00:00.05,21.340258,sklearn,0.01,0.1
2,00:00:00.03,21.340258,sklearn,0.001,0.1
3,00:00:00.03,21.340258,sklearn,0.0001,0.1
4,00:00:00.03,21.340249,sklearn,1e-05,0.1


## Regain

In [72]:
cov_list.clear()
precision_list.clear()
time_list.clear()
str_tol.clear()
str_lambda.clear()

for lambda1 in lambda_list:
    for tol in tol_list:
        start = time.time()
        Z_n = rg_GL(alpha=lambda1, max_iter=n_iter, tol = tol, rtol = tol).fit(samples[0,:,:].T)
        end = time.time()

        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

        cov_list.append(Z_n.covariance_)
        precision_list.append(Z_n.precision_)
        str_tol.append(tol)
        str_lambda.append(lambda1)

accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [74]:
str_tol = [str(x) for x in str_tol]
str_lambda = [str(x) for x in str_lambda]
method_list = ["regain"]*len(str_tol)

df_rg = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                            'str_tol': str_tol,
                          'str_lambda': str_lambda})
df_rg.head()

Unnamed: 0,time,distance,method,str_tol,str_lambda
0,00:00:00.02,14.630542,regain,0.1,0.1
1,00:00:00.07,20.233617,regain,0.01,0.1
2,00:00:00.05,21.274986,regain,0.001,0.1
3,00:00:00.04,21.333838,regain,0.0001,0.1
4,00:00:00.05,21.339807,regain,1e-05,0.1


## ADMM 

We run the algorithm 2 times because on the first iteration time costly numba initialization happens.

In [76]:
for i in [0,1]:
    cov_list.clear()
    precision_list.clear()
    time_list.clear()
    str_tol.clear()
    str_lambda.clear()
    
    for lambda1 in lambda_list:
        for tol in tol_list:
            start = time.time()
            sol, info = ADMM_SGL(S, lambda1, Omega_0, max_iter=n_iter, tol = tol, rtol = tol, stopping_criterion = "boyd",
                                 verbose = False, latent = False)
            end = time.time()

            hours, rem = divmod(end-start, 3600)
            minutes, seconds = divmod(rem, 60)
            time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

            cov_list.append(sol['Omega'])
            precision_list.append(sol['Theta'])
            str_tol.append(tol)
            str_lambda.append(lambda1)

    accuracy_list = []
    for i in range(0, len(precision_list)):
        accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
        accuracy_list.append(accuracy)

ADMM terminated after 1 iterations with accuracy 3.2099886409999954
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 3.2099886409999954
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 3.2099886409999954
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 4 iterations with accuracy 0.38311298616210876
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 8 iterations with accuracy 0.038023766074266226
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 13 iterations with accuracy 0.003776741297339342
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 18 iterations with accuracy 0.00047437386203548254
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 24 iterations with accuracy 4.4068740016855213e-05
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 2.83967

ADMM terminated after 115 iterations with accuracy 4.916706498800297e-05
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 3.248743417519767
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 3.248743417519767
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 1 iterations with accuracy 3.248743417519767
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 11 iterations with accuracy 0.4955710500523605
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 73 iterations with accuracy 0.049427256982825
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 204 iterations with accuracy 0.005008818669662736
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 361 iterations with accuracy 0.0005039602989126968
ADMM status: {'primal optimal', 'dual optimal'}
ADMM terminated after 522 iterations with accuracy 4.9814731

In [77]:
str_tol = [str(x) for x in str_tol]
str_lambda = [str(x) for x in str_lambda]
method_list = ["admm"]*len(str_tol)

df_admm = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                            'str_tol': str_tol,
                          'str_lambda': str_lambda})
df_admm.head()

Unnamed: 0,time,distance,method,str_tol,str_lambda
0,00:00:00.01,22.124192,admm,0.1,0.1
1,00:00:00.02,22.124192,admm,0.01,0.1
2,00:00:00.01,22.124192,admm,0.001,0.1
3,00:00:00.01,21.570271,admm,0.0001,0.1
4,00:00:00.02,21.371407,admm,1e-05,0.1


## kkt_ADMM

We run the algorithm 2 times because on the first iteration time costly numba initialization happens.

In [78]:
for i in [0,1]:
    cov_list.clear()
    precision_list.clear()
    time_list.clear()
    str_tol.clear()
    str_lambda.clear()
    
    for lambda1 in lambda_list:
        for tol in tol_list:
            start = time.time()
            sol, info = ADMM_SGL(S, lambda1, Omega_0, max_iter=n_iter, tol = tol, stopping_criterion = "kkt",
                                 verbose = False, latent = False)
            end = time.time()

            hours, rem = divmod(end-start, 3600)
            minutes, seconds = divmod(rem, 60)
            time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

            cov_list.append(sol['Omega'])
            precision_list.append(sol['Theta'])
            str_tol.append(tol)
            str_lambda.append(lambda1)

    accuracy_list = []
    for i in range(0, len(precision_list)):
        accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
        accuracy_list.append(accuracy)

ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.1
ADMM terminated after 3 iterations with accuracy 0.06720379682797549
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.01
ADMM terminated after 6 iterations with accuracy 0.008592385736205157
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.001
ADMM terminated after 10 iterations with accuracy 0.0009683881026447461
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.0001
ADMM terminated after 15 iterations with accuracy 9.236659578782124e-05
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 1e-05
ADMM terminated after 21 iterations with accuracy 8.309968864225947e-06
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optima

ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 0.0001
ADMM terminated after 15 iterations with accuracy 9.236659578782124e-05
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 1e-05
ADMM terminated after 21 iterations with accuracy 8.309968864225947e-06
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 1e-06
ADMM terminated after 27 iterations with accuracy 8.031337523030521e-07
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 1e-07
ADMM terminated after 33 iterations with accuracy 7.992731610779944e-08
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dual optimal'}
Try to change the tolerance value 1e-08
ADMM terminated after 39 iterations with accuracy 8.057907998962317e-09
ADMM status: {'primal and dual optimal'}
ADMM is only {'primal and dua

In [79]:
str_tol = [str(x) for x in str_tol]
str_lambda = [str(x) for x in str_lambda]
method_list = ["kkt_admm"]*len(str_tol)

df_kkt = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                            'str_tol': str_tol,
                          'str_lambda': str_lambda})
df_kkt.head()

Unnamed: 0,time,distance,method,str_tol,str_lambda
0,00:00:00.02,21.71224,kkt_admm,0.1,0.1
1,00:00:00.04,21.4249,kkt_admm,0.01,0.1
2,00:00:00.06,21.351891,kkt_admm,0.001,0.1
3,00:00:00.12,21.34135,kkt_admm,0.0001,0.1
4,00:00:00.09,21.340326,kkt_admm,1e-05,0.1


## Data formating

In [80]:
frames = [df_sk, df_rg, df_admm, df_kkt]

df = pd.concat(frames)

df["str_tol"].replace({"0.0001": "1e-04"}, inplace=True)
dist = np.array(df['distance'])
df['log_distance'] = np.log(dist)

In [81]:
df.sort_values(by=['log_distance'], ascending = False)

Unnamed: 0,time,distance,method,str_tol,str_lambda,log_distance
0,00:00:00.01,2.212419e+01,admm,0.1,0.1,3.096672
2,00:00:00.01,2.212419e+01,admm,0.001,0.1,3.096672
1,00:00:00.02,2.212419e+01,admm,0.01,0.1,3.096672
10,00:00:00.01,2.182596e+01,admm,0.001,0.05,3.083100
8,00:00:00.01,2.182596e+01,admm,0.1,0.05,3.083100
...,...,...,...,...,...,...
39,00:00:00.42,6.877157e-05,regain,1e-08,0.001,-9.584720
37,00:00:00.85,4.736031e-05,sklearn,1e-06,0.001,-9.957726
39,00:00:03.56,2.019598e-05,kkt_admm,1e-08,0.001,-10.810027
38,00:00:01.05,8.828683e-06,sklearn,1e-07,0.001,-11.637505


## Tolerance plot

In [None]:
fig = px.line(df, x="str_tol", y="distance", color= "method", log_y = True,
              labels={
                     "str_tol": "Tolerance rate",
                     "distance": "Log_distance",
                     "method": "method"
                 },
              template = "plotly_white",
              title="Log-distance between Z and Z' with respect to ADMM convergence rates")
fig.show()

In [83]:
df = df.reset_index(drop=True)

for i, row in df.iterrows():
    sec_time = df['time'][i][-5:]
    df.at[i,'sec_time'] = sec_time
df['sec_time'] = pd.to_numeric(df['sec_time'])
df.head()

Unnamed: 0,time,distance,method,str_tol,str_lambda,log_distance,sec_time
0,00:00:00.06,21.340258,sklearn,0.1,0.1,3.060595,0.06
1,00:00:00.05,21.340258,sklearn,0.01,0.1,3.060595,0.05
2,00:00:00.03,21.340258,sklearn,0.001,0.1,3.060595,0.03
3,00:00:00.03,21.340258,sklearn,0.0001,0.1,3.060595,0.03
4,00:00:00.03,21.340249,sklearn,1e-05,0.1,3.060595,0.03


## Time plot

In [None]:
fig = px.line(df, x="sec_time", y="distance", color= "method", log_y = True, text = "str_tol",
        labels={"sec_time": "Time, s",
                "distance": "Log_distance",
                "method": "method"
                 },
        template = "plotly_white",
              title='ADMM perfomance comparison')
fig.update_traces(textposition='top center')
fig.show()

In [None]:
# fig_1 = px.scatter(x=a['time'], y=a['accuracy'], color= "method")
# fig_1.show()

fig_1 = px.scatter(df, x="sec_time", 
                   y='distance', color="method", 
#                    text = "sec_time", 
                   log_y = True, template = "plotly_white",
                   text = "str_tol",
                 size='sec_time',
                  title='ADMM perfomance comparison')
fig_1.update_traces(textposition='top center')
fig_1.show()