In [None]:
cd /Users/oleg.vlasovetc/Public/GGLasso

In [None]:
import sys
import time
import numpy as np
import pandas as pd
import itertools
import plotly.express as px
import plotly.graph_objects as go
import datetime
from tqdm import trange

from sklearn.covariance import GraphicalLasso as sk_GL
from sklearn.covariance import empirical_covariance
from sklearn import set_config
set_config(print_changed_only=False)

from gglasso.solver.single_admm_solver import ADMM_SGL
from gglasso.solver.single_admm_solver import block_SGL
from gglasso.helper.data_generation import time_varying_power_network, group_power_network, sample_covariance_matrix
from gglasso.helper.model_selection import single_grid_search
from benchmarks import models_to_dict, sklearn_time_benchmark, admm_time_benchmark, model_solution, benchmark_parameters
from benchmarks import network_generation, hamming_distance, hamming_dict, time_benchmark, dict_to_dataframe, dict_shape
from benchmarks import plot_log_distance, drop_duplicates, sparsity_benchmark

from regain.covariance import GraphicalLasso as rg_GL

In [None]:
S_dict=dict()
X_dict=dict()
Theta_dict=dict()

# p_list=[100, 300, 1000]
# N_list=[300, 600, 2000]
p_list=[10000]
N_list=[20000]

for p, N in zip(p_list, N_list):
    S, X, Theta = network_generation(p, N, K=1, M=10)
    
    S_dict[p, N] = S
    X_dict[p, N] = X
    Theta_dict[p, N] = Theta

print("\n Shape of S_i:", dict_shape(S_dict))
print("\n Shape of X_i:", dict_shape(X_dict))
print("\n Shape of Theta_i:", dict_shape(Theta_dict))

In [None]:
sk_params, rg_params, admm_params = benchmark_parameters()

In [None]:
time_dict = dict()
accuracy_dict = dict()
Z_dict = dict()

for X, S in zip(list(X_dict.values()), list(S_dict.values())):
    times, accs, precs = time_benchmark(X=X, S=S, Z_model="sklearn", lambda1=0.1, n_iter=5,
                                        sk_params=sk_params, rg_params=rg_params, admm_params=admm_params)
    
    time_dict.update(times)
    accuracy_dict.update(accs)
    Z_dict.update(precs)

In [None]:
sparsity = hamming_dict(Theta_dict=Theta_dict, Z_dict=Z_dict, t_rounding=1e-4)

In [None]:
df = dict_to_dataframe(times=time_dict, acc_dict=accuracy_dict, spars_dict=sparsity)
df = drop_duplicates(df)
df.head()

In [None]:
fig = plot_log_distance(df, upper_bound=0.01, lower_bound=0.0001)
fig.show()

In [None]:
frames = sparsity_benchmark(df)

In [None]:
frames[100]

In [None]:
frames[300]

In [None]:
frames[1000]

In [None]:
p_start = 100
p_end = 10000
N_start = 200
N_end=20000

lambda1 = 0.01
max_iter=50000
tol=1e-4
enet=1e-2
n_iter = 2
rtol = 1e-4


time_dict = dict()
time_list = []
sk_model = sk_GL(alpha=lambda1, max_iter=max_iter, tol=tol, enet_tol=enet, assume_centered=False)
rg_model = rg_GL(alpha=lambda1, max_iter=max_iter, tol=tol, rtol=rtol, assume_centered=False)

for p, N in zip(range(p_start, p_end, 1000), range(N_start, N_end, 2000)):
    try:
        start = time.time()
        S, X, Theta = network_generation(p, N, K=1, M=10)
        end = time.time()
        Omega_0 = np.eye(len(S))
        
        print("Power network generation time in seconds: ", end-start)
    except:
        print("power network cannot be generated, increase M")
        break
    
    #sklearn
    try:
        time_list.clear()
        for _ in trange(n_iter, desc=str(sk_model), leave=True):
            sk_start = time.time()
            Z_i = sk_model.fit(X)
            sk_end = time.time()
            
            sk_time = sk_end - sk_start
            time_list.append(sk_time)

        sk_key = "sklearn_" + "p_" + str(p) + "_N_" + str(N)
        time_dict[sk_key] = np.mean(time_list)
        sk = False
    except:
        sk = True
        print("sklearn kernel has died")
    
    #regain
    try:
        time_list.clear()
        for _ in trange(n_iter, desc=str(rg_model), leave=True):
            rg_start = time.time()
            Z_i = rg_model.fit(X)
            rg_end = time.time()

            rg_time = rg_end - rg_start
            time_list.append(rg_time)

        rg_key = "regain_" + "p_" + str(p) + "_N_" + str(N)
        time_dict[rg_key] = np.mean(time_list)
        rg = False
    except:
        rg = True
        print("regain kernel has died")
    
    #single-admm
    try:
        time_list.clear()
        single_key = "single-admm_" + "p_" + str(p) + "_N_" + str(N)

        for _ in trange(n_iter, desc=single_key, leave=True):
            single_start = time.time()
            Z_i, info = ADMM_SGL(S, lambda1=lambda1, Omega_0=Omega_0, max_iter=max_iter, tol=tol, rtol=rtol, stopping_criterion="boyd")
            single_end = time.time()

            single_time = single_end - single_start
            time_list.append(single_time)

        time_dict[single_key] = np.mean(time_list)
        single_admm = False
    except:
        single_admm = True
        print("single-admm kernel has died")
    
    #block-admm
    try:
        time_list.clear()
        block_key = "block-admm_" + "p_" + str(p) + "_N_" + str(N)

        for _ in trange(n_iter, desc=block_key, leave=True):
            block_start = time.time()
            Z_i = block_SGL(S, lambda1=lambda1, Omega_0=Omega_0, max_iter=max_iter, tol=tol, rtol=rtol, stopping_criterion="boyd")
            block_end = time.time()

            block_time = block_end - block_start
            time_list.append(block_time)

        time_dict[block_key] = np.mean(time_list)
        block_admm = False
    except:
        block_admm = True
        print("block-admm kernel has died")
    
    if (sk + rg + single_admm + block_admm) == 4:
        print("all kernels have died")
        break

In [None]:
time_dict

In [None]:
fig = px.scatter(time_df, x="time", y="p", text="N", color="method",
                     labels={
                         "time": "Time, s",
                         "p": "Number of features, p",
                         "method": "method"
                     },
                     template="plotly_white",
                     title="Scalability plot")

fig.update_traces(mode='markers+lines', marker_line_width=1, marker_size=10)
fig.show()