In [2]:
%matplotlib inline

import sys
import time
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.covariance import GraphicalLasso as sk_GL
from sklearn.covariance import empirical_covariance

sys.path.append('..')
from gglasso.solver.single_admm_solver import ADMM_SGL
from helper.data_generation import time_varying_power_network, group_power_network, sample_covariance_matrix
from helper.model_selection import single_grid_search


from regain.covariance import GraphicalLasso as rg_GL

# Sampling from power networks

In [3]:
p = 1000
N = 2000
Sigma, Theta = group_power_network(p, K=5, M=2) #Theta is true precision matrix

S, samples = sample_covariance_matrix(Sigma, N)

S = S[0,:,:]
Theta = Theta[0,:,:] #true precision matrix

samples.shape

(5, 1000, 2000)

In [4]:
lambda1 = 0.01
Omega_0 = np.eye(p)

n_iter = 50000
tol_list = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8]

## Model solution Z

In [5]:
start = time.time()
model = sk_GL(alpha=lambda1, max_iter=n_iter, tol = 1e-10).fit(samples[0,:,:].T)
end = time.time()

hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
Z_time = "{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds)
print(Z_time)

00:02:13.01


In [6]:
Z = model.precision_

## Sklearn

In [7]:
cov_list = []
precision_list = []
time_list = []
for tol in tol_list:
    start = time.time()
    Z_n = sk_GL(alpha=lambda1, max_iter=n_iter, tol = tol).fit(samples[0,:,:].T)
    end = time.time()
    
    hours, rem = divmod(end-start, 3600)
    minutes, seconds = divmod(rem, 60)
    time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
    
    cov_list.append(Z_n.covariance_)
    precision_list.append(Z_n.precision_)
    
accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [8]:
str_tol = [str(x) for x in tol_list]
method_list = ["sklearn"]*len(accuracy_list)

df_sk = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_sk

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:12.43,0.01796094,sklearn,0.1,0.1
1,00:00:13.80,0.0006475742,sklearn,0.01,0.01
2,00:00:11.11,0.0006475742,sklearn,0.001,0.001
3,00:00:14.43,0.0002295058,sklearn,0.0001,0.0001
4,00:00:20.41,9.810737e-05,sklearn,1e-05,1e-05
5,00:00:37.77,8.313535e-06,sklearn,1e-06,1e-06
6,00:00:36.11,1.094786e-06,sklearn,1e-07,1e-07
7,00:00:47.52,1.616953e-07,sklearn,1e-08,1e-08


## Regain

In [9]:
cov_list = []
precision_list = []
time_list = []
for tol in tol_list:
    start = time.time()
    Z_n = rg_GL(alpha=lambda1, max_iter=n_iter, tol = tol).fit(samples[0,:,:].T)
    end = time.time()
    
    hours, rem = divmod(end-start, 3600)
    minutes, seconds = divmod(rem, 60)
    time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
    
    cov_list.append(Z_n.covariance_)
    precision_list.append(Z_n.precision_)

accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [10]:
str_tol = [str(x) for x in tol_list]
method_list = ["regain"]*len(accuracy_list)

df_rg = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_rg

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.51,33.475813,regain,0.1,0.1
1,00:00:00.69,28.27673,regain,0.01,0.01
2,00:00:02.84,3.347774,regain,0.001,0.001
3,00:00:05.01,0.311195,regain,0.0001,0.0001
4,00:00:07.15,0.055765,regain,1e-05,1e-05
5,00:00:09.43,0.010816,regain,1e-06,1e-06
6,00:00:10.25,0.006158,regain,1e-07,1e-07
7,00:00:10.53,0.005353,regain,1e-08,1e-08


## ADMM 

We run the algorithm 2 times because on the first iteration time costly numba initialization happens.

In [11]:
for i in [0,1]:
    cov_list = []
    precision_list = []
    time_list = []
    for tol in tol_list:
        start = time.time()
        sol, info = ADMM_SGL(S, lambda1, Omega_0, max_iter=n_iter, tol = tol, verbose = False, latent = False)
        end = time.time()

        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

        cov_list.append(sol['Omega'])
        precision_list.append(sol['Theta'])

    accuracy_list = []
    for i in range(0, len(precision_list)):
        accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
        accuracy_list.append(accuracy)

ADMM terminated after 1 iterations with accuracy 7.201949322409392
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 1 iterations with accuracy 7.201949322409392
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 1 iterations with accuracy 7.201949322409392
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 1 iterations with accuracy 7.201949322409392
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 3 iterations with accuracy 2.8805271712760705
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 9 iterations with accuracy 0.38510134274143787
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 17 iterations with accuracy 0.04725723902173184
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 23 iterations with accuracy 0.011602738425873471
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 1 iterations with accuracy 7.201949322409392
ADM

In [12]:
str_tol = [str(x) for x in tol_list]
method_list = ["admm"]*len(accuracy_list)

df_admm = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_admm

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.30,14.037935,admm,0.1,0.1
1,00:00:00.26,14.037935,admm,0.01,0.01
2,00:00:00.27,14.037935,admm,0.001,0.001
3,00:00:00.27,14.037935,admm,0.0001,0.0001
4,00:00:00.60,6.810916,admm,1e-05,1e-05
5,00:00:01.64,1.268499,admm,1e-06,1e-06
6,00:00:02.99,0.183346,admm,1e-07,1e-07
7,00:00:04.02,0.054975,admm,1e-08,1e-08


## kkt_ADMM

We run the algorithm 2 times because on the first iteration time costly numba initialization happens.

In [13]:
for i in [0,1]:
    cov_list = []
    precision_list = []
    time_list = []
    for tol in tol_list:
        start = time.time()
        sol, info = kkt_ADMM_SGL(S, lambda1, Omega_0, max_iter=n_iter, eps_admm = tol, verbose = False, latent = False)
        end = time.time()

        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

        cov_list.append(sol['Omega'])
        precision_list.append(sol['Theta'])

    accuracy_list = []
    for i in range(0, len(precision_list)):
        accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
        accuracy_list.append(accuracy)

ADMM terminated after 2 iterations with accuracy 0.07689794887452671
ADMM status: optimal
ADMM terminated after 8 iterations with accuracy 0.00924577452541569
ADMM status: optimal
ADMM terminated after 17 iterations with accuracy 0.0008594751116571462
ADMM status: optimal
ADMM terminated after 27 iterations with accuracy 9.764438701732223e-05
ADMM status: optimal
ADMM terminated after 42 iterations with accuracy 9.390545098642266e-06
ADMM status: optimal
ADMM terminated after 58 iterations with accuracy 9.972106138606664e-07
ADMM status: optimal
ADMM terminated after 75 iterations with accuracy 9.318130929419711e-08
ADMM status: optimal
ADMM terminated after 92 iterations with accuracy 8.710984052941861e-09
ADMM status: optimal
ADMM terminated after 2 iterations with accuracy 0.07689794887452671
ADMM status: optimal
ADMM terminated after 8 iterations with accuracy 0.00924577452541569
ADMM status: optimal
ADMM terminated after 17 iterations with accuracy 0.0008594751116571462
ADMM statu

In [14]:
str_tol = [str(x) for x in tol_list]
method_list = ["kkt_admm"]*len(accuracy_list)

df_kkt = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_kkt

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.99,9.486796,kkt_admm,0.1,0.1
1,00:00:02.92,1.649652,kkt_admm,0.01,0.01
2,00:00:05.85,0.183346,kkt_admm,0.001,0.001
3,00:00:09.46,0.027849,kkt_admm,0.0001,0.0001
4,00:00:14.27,0.003112,kkt_admm,1e-05,1e-05
5,00:00:19.71,0.000333,kkt_admm,1e-06,1e-06
6,00:00:25.46,3.1e-05,kkt_admm,1e-07,1e-07
7,00:00:33.46,3e-06,kkt_admm,1e-08,1e-08


## Data formating

In [15]:
# Z_frame = pd.DataFrame({"time": Z_time, 
#                         "distance": 0,
#                         "method":["Z"], 
#                         "tol_rate": 1e-10,
#                         "str_tol": "1e-10"})

In [74]:
# frames = [df_sk, df_rg, df_admm, df_kkt, Z_frame]
frames = [df_sk, df_rg, df_admm, df_kkt]

df = pd.concat(frames)

df["str_tol"].replace({"0.0001": "1e-04"}, inplace=True)
df.sort_values(by=['time', 'distance'])

Unnamed: 0,time,distance,method,tol_rate,str_tol
1,00:00:00.26,14.03794,admm,0.01,0.01
2,00:00:00.27,14.03794,admm,0.001,0.001
3,00:00:00.27,14.03794,admm,0.0001,0.0001
0,00:00:00.30,14.03794,admm,0.1,0.1
0,00:00:00.51,33.47581,regain,0.1,0.1
4,00:00:00.60,6.810916,admm,1e-05,1e-05
1,00:00:00.69,28.27673,regain,0.01,0.01
0,00:00:00.99,9.486796,kkt_admm,0.1,0.1
5,00:00:01.64,1.268499,admm,1e-06,1e-06
2,00:00:02.84,3.347774,regain,0.001,0.001


In [75]:
dist = np.array(df['distance'])
df['log_distance'] = np.log(dist)

In [76]:
df.sort_values(by=['log_distance'], ascending = False)

Unnamed: 0,time,distance,method,tol_rate,str_tol,log_distance
0,00:00:00.51,33.47581,regain,0.1,0.1,3.510823
1,00:00:00.69,28.27673,regain,0.01,0.01,3.342039
1,00:00:00.26,14.03794,admm,0.01,0.01,2.641763
3,00:00:00.27,14.03794,admm,0.0001,0.0001,2.641763
0,00:00:00.30,14.03794,admm,0.1,0.1,2.641763
2,00:00:00.27,14.03794,admm,0.001,0.001,2.641763
0,00:00:00.99,9.486796,kkt_admm,0.1,0.1,2.249901
4,00:00:00.60,6.810916,admm,1e-05,1e-05,1.918527
2,00:00:02.84,3.347774,regain,0.001,0.001,1.208296
1,00:00:02.92,1.649652,kkt_admm,0.01,0.01,0.500564


In [167]:
fig = px.line(df, x="str_tol", y="distance", color= "method", log_y = True,
              labels={
                     "str_tol": "Tolerance rate",
                     "distance": "Log_distance",
                     "method": "method"
                 },
              template = "plotly_white",
              title="Log-distance between Z and Z' with respect to ADMM convergence rates")
fig.show()

In [79]:
df = df.reset_index(drop=True)

In [100]:
df

Unnamed: 0,time,distance,method,tol_rate,str_tol,log_distance,sec_time
0,00:00:12.43,0.01796094,sklearn,0.1,0.1,-4.019556,12.43
1,00:00:13.80,0.0006475742,sklearn,0.01,0.01,-7.342277,13.8
2,00:00:11.11,0.0006475742,sklearn,0.001,0.001,-7.342277,11.11
3,00:00:14.43,0.0002295058,sklearn,0.0001,0.0001,-8.379582,14.43
4,00:00:20.41,9.810737e-05,sklearn,1e-05,1e-05,-9.229448,20.41
5,00:00:37.77,8.313535e-06,sklearn,1e-06,1e-06,-11.697626,37.77
6,00:00:36.11,1.094786e-06,sklearn,1e-07,1e-07,-13.724952,36.11
7,00:00:47.52,1.616953e-07,sklearn,1e-08,1e-08,-15.637552,47.52
8,00:00:00.51,33.47581,regain,0.1,0.1,3.510823,0.51
9,00:00:00.69,28.27673,regain,0.01,0.01,3.342039,0.69


In [103]:
for i, row in df.iterrows():
    sec_time = df['time'][i][-5:]
    df.at[i,'sec_time'] = sec_time
df['sec_time'] = pd.to_numeric(df['sec_time'])
df.head()

Unnamed: 0,time,distance,method,tol_rate,str_tol,log_distance,sec_time
0,00:00:12.43,0.017961,sklearn,0.1,0.1,-4.019556,12.43
1,00:00:13.80,0.000648,sklearn,0.01,0.01,-7.342277,13.8
2,00:00:11.11,0.000648,sklearn,0.001,0.001,-7.342277,11.11
3,00:00:14.43,0.00023,sklearn,0.0001,0.0001,-8.379582,14.43
4,00:00:20.41,9.8e-05,sklearn,1e-05,1e-05,-9.229448,20.41


In [165]:
fig = px.line(df, x="sec_time", y="distance", color= "method", log_y = True, text = "str_tol",
        labels={"sec_time": "Time, s",
                "distance": "Log_distance",
                "method": "method"
                 },
        template = "plotly_white",
              title='ADMM perfomance comparison')
fig.update_traces(textposition='top center')
fig.show()

In [157]:
# fig_1 = px.scatter(x=a['time'], y=a['accuracy'], color= "method")
# fig_1.show()

fig_1 = px.scatter(df, x="sec_time", 
                   y='distance', color="method", 
#                    text = "sec_time", 
                   log_y = True, template = "plotly_white",
                   text = "str_tol",
                 size='sec_time',
                  title='ADMM perfomance comparison')
fig_1.update_traces(textposition='top center')
fig_1.show()

In [116]:
import plotly.graph_objects as go


# layout = dict(plot_bgcolor='white',
#               margin=dict(t=20, l=20, r=20, b=20),
#               xaxis=dict(title='World Rank',
#                          range=[0.9, 5.5],
#                          linecolor='#d9d9d9',
#                          showgrid=False,
#                          mirror=True),
#               yaxis=dict(title='Citations',
#                          range=[95.5, 99.5],
#                          linecolor='#d9d9d9',
#                          showgrid=False,
#                          mirror=True))

a = go.Scatter(x=df['sec_time'],
                  y=df['log_distance'],
                  text=df['str_tol'],
                  textposition='top right',
                  textfont=dict(color='#E58606'),
                  mode='lines+markers+text',
                  marker=dict(color='#5D69B1', size=8),
                  line=dict(color='#52BCA3', width=1, dash='dash'),
                  name='citations')

fig = go.Figure(data=a)

fig.show()