In [None]:
import os
os.chdir(os.path.pardir)

import numpy as np
import pandas as pd
import time

from gglasso.solver.single_admm_solver import ADMM_SGL
from gglasso.solver.single_admm_solver import block_SGL
from gglasso.helper.data_generation import time_varying_power_network, group_power_network, sample_covariance_matrix
from gglasso.helper.model_selection import single_grid_search
from benchmarks import models_to_dict, sklearn_time_benchmark, admm_time_benchmark, model_solution, benchmark_parameters
from benchmarks import time_benchmark, sparsity_benchmark
from benchmarks import sk_scaling, single_scaling, block_scaling

from plots import plot_accuracy, plot_scalability
from utils import network_generation, dict_shape, hamming_dict
from utils import benchmarks_dataframe, best_time_dataframe, drop_acc_duplicates

from regain.covariance import GraphicalLasso as rg_GL

from sklearn.covariance import GraphicalLasso as sk_GL
from sklearn import set_config
set_config(print_changed_only=False)

In [None]:
# from regain import utils
# from sklearn import datasets
# from sklearn.utils import Bunch
# from regain.datasets import datasets
# from regain.datasets import make_dataset

# # prepare data
# n_times = [20, 50, 100]
# n_dims = np.sqrt(np.logspace(2, 5, 10)).astype(int)

# n_samples = 200
# n_dim_lat = 2

# np.random.seed(42)
# with utils.suppress_stdout():
#     data = {
#         (dim, T): make_dataset(
#             mode='ma', n_samples=n_samples, 
#             n_dim_lat=n_dim_lat, n_dim_obs=dim,
#             T=T, epsilon=1e-2)
#         for dim, T in (product(n_dims, n_times))
#     }

In [None]:
S_dict=dict()
X_dict=dict()
Theta_dict=dict()

# p_list=[100, 500, 1000, 2500, 5000, 10000]
# N_list=[200, 1000, 2000, 5000, 10000, 20000]
p_list=[100]
N_list=[200]

print(" Power network generation ".center(40, '-'))

for p, N in zip(p_list, N_list):
    try:
        start = time.time()
        S, X, Theta = network_generation(p, N, K=1, M=2)
        end = time.time()
        print("p: %5d, N : %5d, Time : %5.4f" % (p, N, end-start))
    except:
        print("Power network cannot be generated")
        print("Tip: increase the number of sub-blocks M")
        break

    S_dict[p, N] = S
    X_dict[p, N] = X
    Theta_dict[p, N] = Theta

In [None]:
print("\n Shape of S_i:", dict_shape(S_dict))
print("\n Shape of X_i:", dict_shape(X_dict))
print("\n Shape of Theta_i:", dict_shape(Theta_dict))

In [None]:
sk_params, rg_params, admm_params = benchmark_parameters(S_dict=S_dict, sk_tol_list=[0.5], enet_list=[0.5])

## Speed benchmark

In [None]:
time_dict = dict()
accuracy_dict = dict()
Z_dict = dict()

for X, S in zip(list(X_dict.values()), list(S_dict.values())):
    times, accs, precs = time_benchmark(X=X, S=S, Z_model="sklearn", lambda1=0.1, n_iter=5,
                                        sk_params=sk_params, rg_params=rg_params, admm_params=admm_params)
    
    time_dict.update(times)
    accuracy_dict.update(accs)
    Z_dict.update(precs)

In [None]:
sparsity = hamming_dict(Theta_dict=Theta_dict, Z_dict=Z_dict, t_rounding=1e-4)

In [None]:
df = benchmarks_dataframe(times=time_dict, acc_dict=accuracy_dict, spars_dict=sparsity)
df = drop_acc_duplicates(df)
df.head()

In [None]:
fig = plot_accuracy(df, upper_bound=0.01, lower_bound=0.0001)
fig.show()

In [None]:
# loop this over p
# df_p = df[df.p==100]
# accur = 1e-3
# res_p = dict()
# for method in df_p.method.unique():
#     tmp = df_p[(df_p.method == method) & (df_p.accuracy <= accur)]
#     res_p[method] = tmp.time.min()
# res_p

## Sparsity benchmark

In [None]:
frames = sparsity_benchmark(df)

In [None]:
frames[100]

In [None]:
frames[300]

In [None]:
frames[1000]

## Scalability benchmark

In [None]:
lambda1 = 0.01
max_iter= 50000
n_iter = 1
tol = 1e-10
rtol = 1e-10

In [None]:
scale_time_dict = dict()
sk = False
rg = False

for X, S in zip(list(X_dict.values()), list(S_dict.values())):
    #sklearn models
    if sk == False:
        sk_model = sk_GL(alpha=lambda1, max_iter=max_iter, assume_centered=False)
        sk_list, sk = sk_scaling(X, sk_model, n_iter)
    else:
        print("sklearn kernel has died at p={}".format(len(X[0])))
        
    if rg == False:
        rg_model = rg_GL(alpha=lambda1, max_iter=max_iter, assume_centered=False, init=np.eye(S.shape[0]))
        rg_list, rg = sk_scaling(X, rg_model, n_iter)
    else:
        print("regain kernel has died at p={}".format(len(X[0])))
    
    # ADMM models
    Omega_0 = np.eye(len(S))
    try:
        single_list = single_scaling(S, lambda1, Omega_0, max_iter, tol, rtol, n_iter)
    except:
        print("single ADMM kernel has died at p={}".format(len(X[0])))
    try:
        block_list = block_scaling(S, lambda1, Omega_0, max_iter, tol, rtol, n_iter)
    except:
        print("block ADMM kernel has died at p={}".format(len(X[0])))
        
    key = "p_" + str(len(X[0])) + "_N_" + str(len(X))
    
    scale_time_dict["sklearn_" + key] = np.mean(sk_list)
    scale_time_dict["regain_" + key] = np.mean(rg_list)
    scale_time_dict["single-admm_" + key] = np.mean(single_list)
    scale_time_dict["block-admm_" + key] = np.mean(block_list)

In [None]:
time_df = best_time_dataframe(scale_time_dict)
time_df.head()

In [None]:
fig = plot_scalability(time_df)
fig.show()