In [1]:
%matplotlib inline

import sys
import time
import numpy as np
import pandas as pd
import plotly.express as px

from sklearn.covariance import GraphicalLasso as sk_GL
from sklearn.covariance import empirical_covariance

sys.path.append('..')
from gglasso.solver.single_admm_solver import ADMM_SGL
from gglasso.solver.single_admm_solver import kkt_ADMM_SGL
from helper.data_generation import time_varying_power_network, group_power_network, sample_covariance_matrix
from helper.model_selection import single_grid_search


from regain.covariance import GraphicalLasso as rg_GL
#from regain.covariance.graphical_lasso_ import GraphicalLasso
# from regain.covariance import LatentTimeGraphicalLasso
# from regain.covariance import latent_graphical_lasso_, latent_time_graphical_lasso_

# Sampling from power networks

In [75]:
p = 100
N = 200
Sigma, Theta = group_power_network(p, K=5, M=2) #Theta is true precision matrix

S, samples = sample_covariance_matrix(Sigma, N)

S = S[0,:,:]
Theta = Theta[0,:,:] #true precision matrix

samples.shape, Z.shape

((5, 100, 200), (100, 100))

In [76]:
lambda1 = 0.01
Omega_0 = np.eye(p)

n_iter = 50000
tol_list = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8]

## Model solution Z

In [90]:
start = time.time()
model = sk_GL(alpha=lambda1, max_iter=n_iter, tol = 1e-10).fit(samples[0,:,:].T)
end = time.time()

hours, rem = divmod(end-start, 3600)
minutes, seconds = divmod(rem, 60)
Z_time = "{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds)
print(Z_time)

00:00:03.10


In [81]:
Z = model.precision_

## Sklearn

In [82]:
cov_list = []
precision_list = []
time_list = []
for tol in tol_list:
    start = time.time()
    Z_n = sk_GL(alpha=lambda1, max_iter=n_iter, tol = tol).fit(samples[0,:,:].T)
    end = time.time()
    
    hours, rem = divmod(end-start, 3600)
    minutes, seconds = divmod(rem, 60)
    time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
    
    cov_list.append(Z_n.covariance_)
    precision_list.append(Z_n.precision_)
    
accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [83]:
str_tol = [str(x) for x in tol_list]
method_list = ["sklearn"]*len(accuracy_list)

df_sk = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_sk

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.08,0.013193,sklearn,0.1,0.1
1,00:00:00.06,0.013193,sklearn,0.01,0.01
2,00:00:00.05,0.000861,sklearn,0.001,0.001
3,00:00:00.05,0.000861,sklearn,0.0001,0.0001
4,00:00:00.55,8.7e-05,sklearn,1e-05,1e-05
5,00:00:00.77,4e-05,sklearn,1e-06,1e-06
6,00:00:00.71,3.5e-05,sklearn,1e-07,1e-07
7,00:00:00.83,3.5e-05,sklearn,1e-08,1e-08


## Regain

In [84]:
cov_list = []
precision_list = []
time_list = []
for tol in tol_list:
    start = time.time()
    Z_n = rg_GL(alpha=lambda1, max_iter=n_iter, tol = tol).fit(samples[0,:,:].T)
    end = time.time()
    
    hours, rem = divmod(end-start, 3600)
    minutes, seconds = divmod(rem, 60)
    time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))
    
    cov_list.append(Z_n.covariance_)
    precision_list.append(Z_n.precision_)

accuracy_list = []
for i in range(0, len(precision_list)):
    accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
    accuracy_list.append(accuracy)

In [85]:
str_tol = [str(x) for x in tol_list]
method_list = ["regain"]*len(accuracy_list)

df_rg = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_rg

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.02,7.021403,regain,0.1,0.1
1,00:00:00.01,7.021403,regain,0.01,0.01
2,00:00:00.08,1.3425,regain,0.001,0.001
3,00:00:00.07,0.129316,regain,0.0001,0.0001
4,00:00:00.12,0.014125,regain,1e-05,1e-05
5,00:00:00.16,0.002363,regain,1e-06,1e-06
6,00:00:00.17,0.001365,regain,1e-07,1e-07
7,00:00:00.17,0.00119,regain,1e-08,1e-08


## ADMM 

We run the algorithm 2 times because on the first iteration time costly numba initialization happens.

In [86]:
for i in [0,1]:
    cov_list = []
    precision_list = []
    time_list = []
    for tol in tol_list:
        start = time.time()
        sol, info = ADMM_SGL(S, lambda1, Omega_0, max_iter=n_iter, tol = tol, verbose = False, latent = False)
        end = time.time()

        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

        cov_list.append(sol['Omega'])
        precision_list.append(sol['Theta'])

    accuracy_list = []
    for i in range(0, len(precision_list)):
        accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
        accuracy_list.append(accuracy)

ADMM terminated after 1 iterations with accuracy 2.6899834676528016
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 1 iterations with accuracy 2.6899834676528016
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 1 iterations with accuracy 2.6899834676528016
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 8 iterations with accuracy 0.46502616533481667
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 29 iterations with accuracy 0.05050064025498946
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 57 iterations with accuracy 0.005571403508857344
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 76 iterations with accuracy 0.0013900768953370266
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 81 iterations with accuracy 0.0009703962175316557
ADMM status: {'dual optimal', 'primal optimal'}
ADMM terminated after 1 iterations with accuracy 2.6899834

In [87]:
str_tol = [str(x) for x in tol_list]
method_list = ["admm"]*len(accuracy_list)

df_admm = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_admm

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.00,10.070153,admm,0.1,0.1
1,00:00:00.01,10.070153,admm,0.01,0.01
2,00:00:00.00,10.070153,admm,0.001,0.001
3,00:00:00.02,3.878694,admm,0.0001,0.0001
4,00:00:00.07,0.604315,admm,1e-05,1e-05
5,00:00:00.15,0.073558,admm,1e-06,1e-06
6,00:00:00.19,0.018767,admm,1e-07,1e-07
7,00:00:00.25,0.013147,admm,1e-08,1e-08


## kkt_ADMM

We run the algorithm 2 times because on the first iteration time costly numba initialization happens.

In [88]:
for i in [0,1]:
    cov_list = []
    precision_list = []
    time_list = []
    for tol in tol_list:
        start = time.time()
        sol, info = kkt_ADMM_SGL(S, lambda1, Omega_0, max_iter=n_iter, eps_admm = tol, verbose = False, latent = False)
        end = time.time()

        hours, rem = divmod(end-start, 3600)
        minutes, seconds = divmod(rem, 60)
        time_list.append("{:0>2}:{:0>2}:{:05.2f}".format(int(hours),int(minutes),seconds))

        cov_list.append(sol['Omega'])
        precision_list.append(sol['Theta'])

    accuracy_list = []
    for i in range(0, len(precision_list)):
        accuracy = np.linalg.norm(Z - np.array(precision_list[i]))
        accuracy_list.append(accuracy)

ADMM terminated after 2 iterations with accuracy 0.09582974100748616
ADMM status: optimal
ADMM terminated after 15 iterations with accuracy 0.00906007096761962
ADMM status: optimal
ADMM terminated after 40 iterations with accuracy 0.0009340077761587818
ADMM status: optimal
ADMM terminated after 70 iterations with accuracy 9.773553109816599e-05
ADMM status: optimal
ADMM terminated after 102 iterations with accuracy 9.920788868339118e-06
ADMM status: optimal
ADMM terminated after 135 iterations with accuracy 9.712746514452672e-07
ADMM status: optimal
ADMM terminated after 168 iterations with accuracy 9.653152359718536e-08
ADMM status: optimal
ADMM terminated after 201 iterations with accuracy 9.675222750367183e-09
ADMM status: optimal
ADMM terminated after 2 iterations with accuracy 0.09582974100748616
ADMM status: optimal
ADMM terminated after 15 iterations with accuracy 0.00906007096761962
ADMM status: optimal
ADMM terminated after 40 iterations with accuracy 0.0009340077761587818
ADMM

In [89]:
str_tol = [str(x) for x in tol_list]
method_list = ["kkt_admm"]*len(accuracy_list)

df_kkt = pd.DataFrame(data={'time': time_list, 
                           'distance': accuracy_list,
                           'method': method_list,
                           'tol_rate': tol_list,
                            'str_tol': str_tol})
df_kkt

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.01,8.402418,kkt_admm,0.1,0.1
1,00:00:00.07,1.95298,kkt_admm,0.01,0.01
2,00:00:00.19,0.258434,kkt_admm,0.001,0.001
3,00:00:00.32,0.028812,kkt_admm,0.0001,0.0001
4,00:00:00.42,0.002976,kkt_admm,1e-05,1e-05
5,00:00:00.59,0.000293,kkt_admm,1e-06,1e-06
6,00:00:00.73,2.9e-05,kkt_admm,1e-07,1e-07
7,00:00:00.88,3e-06,kkt_admm,1e-08,1e-08


## Data formating

In [119]:
Z_frame = pd.DataFrame({"time": Z_time, 
                        "distance": 0,
                        "method":["Z"], 
                        "tol_rate": 1e-10,
                        "str_tol": "1e-10"})

In [120]:
frames = [df_sk, df_rg, df_admm, df_kkt, Z_frame]

df = pd.concat(frames)

df["str_tol"].replace({"0.0001": "1e-04"}, inplace=True)
df.sort_values(by=['time', 'distance'])

Unnamed: 0,time,distance,method,tol_rate,str_tol
0,00:00:00.00,10.070153,admm,0.1,0.1
2,00:00:00.00,10.070153,admm,0.001,0.001
1,00:00:00.01,7.021403,regain,0.01,0.01
0,00:00:00.01,8.402418,kkt_admm,0.1,0.1
1,00:00:00.01,10.070153,admm,0.01,0.01
3,00:00:00.02,3.878694,admm,0.0001,0.0001
0,00:00:00.02,7.021403,regain,0.1,0.1
2,00:00:00.05,0.000861,sklearn,0.001,0.001
3,00:00:00.05,0.000861,sklearn,0.0001,0.0001
1,00:00:00.06,0.013193,sklearn,0.01,0.01


In [116]:
norm_ac = np.array(df['distance'])
df['accuracy'] = (norm_ac - np.min(norm_ac))/np.ptp(norm_ac)
df['accuracy'] = 1 - df['accuracy']

# dist = np.array(df['distance'])
# df['accuracy'] = np.log(dist)

In [117]:
df.sort_values(by=['accuracy'], ascending = False)

Unnamed: 0,time,distance,method,tol_rate,str_tol,accuracy
0,00:00:00.00,10.070153,admm,0.1,0.1,2.309576
1,00:00:00.01,10.070153,admm,0.01,0.01,2.309576
2,00:00:00.00,10.070153,admm,0.001,0.001,2.309576
0,00:00:00.01,8.402418,kkt_admm,0.1,0.1,2.128519
0,00:00:00.02,7.021403,regain,0.1,0.1,1.948963
1,00:00:00.01,7.021403,regain,0.01,0.01,1.948963
3,00:00:00.02,3.878694,admm,0.0001,0.0001,1.355498
1,00:00:00.07,1.95298,kkt_admm,0.01,0.01,0.669356
2,00:00:00.08,1.3425,regain,0.001,0.001,0.294534
4,00:00:00.07,0.604315,admm,1e-05,1e-05,-0.50366


In [118]:
fig = px.line(df, x="str_tol", y="accuracy", color= "method",
              labels={
                     "str_tol": "Tolerance rate",
                     "accuracy": "Accuracy",
                     "method": "method"
                 },
              title='Accuracy and tollerance rate comparison')
fig.show()

In [53]:
a = df.loc[df["method"] == "sklearn"]
a

Unnamed: 0,time,distance,method,tol_rate,str_tol,accuracy
0,00:00:00.07,11.77091,sklearn,0.1,0.1,0.336372
1,00:00:00.04,11.77091,sklearn,0.01,0.01,0.336372
2,00:00:00.06,11.770039,sklearn,0.001,0.001,0.336421
3,00:00:00.07,11.770039,sklearn,0.0001,0.0001,0.336421
4,00:00:00.92,11.770009,sklearn,1e-05,1e-05,0.336423
5,00:00:00.85,11.770003,sklearn,1e-06,1e-06,0.336423
6,00:00:00.88,11.770002,sklearn,1e-07,1e-07,0.336423
7,00:00:00.92,11.770002,sklearn,1e-08,1e-08,0.336423


In [72]:
pd.to_datetime(a['time']).astype(int)

0    1615334400070000000
1    1615334400040000000
2    1615334400060000000
3    1615334400070000000
4    1615334400920000000
5    1615334400850000000
6    1615334400880000000
7    1615334400920000000
Name: time, dtype: int64

In [73]:
px.line(a, x=pd.to_datetime(a['time']).astype(int), y="accuracy", color= "method",
              title='Accuracy and tollerance rate comparison')

In [74]:
# fig_1 = px.scatter(x=a['time'], y=a['accuracy'], color= "method")
# fig_1.show()

fig_1 = px.scatter(df, x=pd.to_datetime(df['time']).astype(int), 
                   y='accuracy', color="method",
                 size='accuracy')
fig_1.show()