In [None]:
import os
os.chdir(os.path.pardir)

import numpy as np
import pandas as pd
import time
import itertools
from itertools import product

from regain.covariance import GraphicalLasso as rg_GL
from regain import datasets, utils

from sklearn.covariance import GraphicalLasso as sk_GL
from sklearn import set_config

from regain_benchmark import regain_time
from sklearn_benchmark import sklearn_time
from gglasso_benchmark import gglasso_time
# from gglasso.solver.single_admm_solver import ADMM_SGL
# from gglasso.solver.single_admm_solver import block_SGL
# from gglasso.helper.data_generation import time_varying_power_network, group_power_network, sample_covariance_matrix
# from gglasso.helper.model_selection import single_grid_search

from utils import network_generation, model_solution, benchmark_parameters 
from utils import sparsity_benchmark, dict_shape, hamming_dict
from utils import benchmarks_dataframe, best_time_dataframe, drop_acc_duplicates

from plots import plot_accuracy, plot_scalability, plot_lambdas

set_config(print_changed_only=False)
pd.set_option('display.max_columns', None)

## Data

### Power networks

In [None]:
S_dict=dict()
X_dict=dict()
Theta_dict=dict()

# p_list=[100, 500, 1000, 2500, 5000, 10000]
# N_list=[200, 1000, 2000, 5000, 10000, 20000]
p_list=[100, 200]
N_list=[200, 400]

print(" Power network generation ".center(40, '-'))

for p, N in zip(p_list, N_list):
    try:
        start = time.perf_counter()
        S, X, Theta = network_generation(p, N, K=1, M=2)
        end = time.perf_counter()
        print("p: %5d, N : %5d, Time : %5.4f" % (p, N, end-start))
    except:
        print("Power network cannot be generated")
        print("Tip: increase the number of sub-blocks M")
        break

    S_dict[p, N] = S
    X_dict[p, N] = X
    Theta_dict[p, N] = Theta

In [None]:
print("\n Shape of S_i:", dict_shape(S_dict))
print("\n Shape of X_i:", dict_shape(X_dict))
print("\n Shape of Theta_i:", dict_shape(Theta_dict))

### Latent variables data from <em>regain</em>

In [None]:
# prepare data
n_times = [20, 50, 100]
n_dims = np.sqrt(np.logspace(2, 5, 10)).astype(int)

n_samples = 200
n_dim_lat = 2

np.random.seed(42)
with utils.suppress_stdout():
    data = {
        (dim, T): datasets.make_dataset(
            mode='ma', n_samples=n_samples, 
            n_dim_lat=n_dim_lat, n_dim_obs=dim,
            T=T, epsilon=1e-2)
        for dim, T in (product(n_dims, n_times))
    }

In [None]:
X_lat = dict()

for key in data.keys():
    X_lat.update({key:data[key]['data']})
    
X_lat = list(X_lat.values())
X_lat = [x for data in X_lat for x in data] #flatten data array

In [None]:
S_lat = list() # empirical covariance matrices
for i in X_lat:
    S_lat.append(np.cov(i))

### Real soil data

In [None]:
mapping = pd.read_table('/Users/oleg.vlasovetc/Public/GGLasso/data/soil/88soils_rounded_metadata.txt', index_col=0)

In [None]:
mapping.head()

### Hyperparameters

In [None]:
sk_params, rg_params, gglasso_params, lambda_list = benchmark_parameters()

### Model solution

In [None]:
model_time_dict = dict()
model_Z_dict = dict()

for X, l1 in itertools.product(list(X_dict.values()), lambda_list):
    model = "sklearn"
    Z, Z_time, info = model_solution(model=model, X=X, lambda1=l1)
    
    key = "p_" + str(X.shape[1]) + "_l1_" + str(l1)
    model_time_dict.update({key: Z_time})
    model_Z_dict.update({key: Z})
print("Model solution({0}): {1}".format(model,info))

### GGLasso

In [None]:
time_dict = dict()
accuracy_dict = dict()
Z_dict = dict()
trace_dict = dict()

In [None]:
for X, S in zip(list(X_dict.values()), list(S_dict.values())):
    Omega_0 = np.eye(len(S))
    gg_time, gg_accuracy, Z_gg = gglasso_time(S=S, Omega_0=Omega_0, Z=model_Z_dict, lambda_list=lambda_list,
                                              n_iter=1 + 1, gglasso_params=gglasso_params)
    time_dict.update(gg_time)
    accuracy_dict.update(gg_accuracy)
    Z_dict.update(Z_gg)

    for key, item in Z_gg.items():
        trace_dict.update({key: {"Z": item, "X": X, "S": S,"l1": l1}}) # add time for each lambda

### Sklearn

In [None]:
for X, S in zip(list(X_dict.values()), list(S_dict.values())):
    sk_time, sk_accuracy, Z_sk = sklearn_time(X=X, Z=model_Z_dict, sk_params=sk_params, lambda_list=lambda_list, n_iter=2)
    
    time_dict.update(sk_time)
    accuracy_dict.update(sk_accuracy)
    Z_dict.update(Z_sk)
    
    for key, item in Z_sk.items():
        trace_dict.update({key: {"Z": item, "X": X, "S": S,"l1": l1}})

### Regain

In [None]:
for X, S in zip(list(X_dict.values()), list(S_dict.values())):
    rg_time, rg_accuracy, Z_rg = regain_time(X=X, Z=model_Z_dict, rg_params=rg_params, lambda_list=lambda_list, n_iter=2)
    
    time_dict.update(rg_time)
    accuracy_dict.update(rg_accuracy)
    Z_dict.update(Z_rg)
    
    for key, item in Z_rg.items():
        trace_dict.update({key: {"Z": item, "X": X, "S": S,"l1": l1}})

### Sparsity

In [None]:
sparsity = hamming_dict(Theta_dict=Theta_dict, Z_dict=Z_dict, t_rounding=1e-4)

### Visualization

In [None]:
df = benchmarks_dataframe(times=time_dict, acc_dict=accuracy_dict, spars_dict=sparsity)
df = drop_acc_duplicates(df)
df.head()

In [None]:
fig = plot_lambdas(df, upper_bound=0.01, lower_bound=0.0001)
fig.show()

In [None]:
fig = plot_accuracy(df, upper_bound=0.01, lower_bound=0.000001)
fig.show()

In [None]:
frames = sparsity_benchmark(df)

In [None]:
frames[100]

In [None]:
frames[200]

In [None]:
frames[1000]