In [1]:
import gglasso
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import scipy as sp
import matplotlib.pyplot as plt

from numpy.linalg import matrix_rank
from matplotlib.pyplot import figure
from scipy import stats
from scipy.linalg import eigh
from numpy import genfromtxt

from gglasso.solver.admm_solver import ADMM_MGL
from gglasso.problem import glasso_problem

from gglasso.helper.data_generation import generate_precision_matrix, group_power_network, sample_covariance_matrix
from gglasso.helper.basic_linalg import adjacency_matrix
from gglasso.helper.data_generation import time_varying_power_network, sample_covariance_matrix
from gglasso.helper.experiment_helper import lambda_grid, discovery_rate, error
from gglasso.helper.utils import get_K_identity
from gglasso.helper.experiment_helper import plot_evolution, plot_deviation, surface_plot, single_heatmap_animation
from gglasso.helper.model_selection import aic, ebic, K_single_grid

### Read data

In [2]:
sub_corr = []

for i in range(0, 50):
    sub_corr.append(genfromtxt("/storage/groups/bds01/datasets/brains/corr_matrices/corr{0}.csv".format(i), delimiter=','))

sub_corr = np.array(sub_corr)
sub_corr.shape

(50, 436, 436)

### SGL

In [13]:
lambda1_range = np.logspace(-0.9, -1.5, 10)
lambda1_range

array([0.12589254, 0.10797752, 0.09261187, 0.07943282, 0.06812921,
       0.05843414, 0.05011872, 0.04298662, 0.03686945, 0.03162278])

In [16]:
N = sub_corr.shape[1]


est_uniform, est_indv, statistics = K_single_grid(sub_corr[:10,:], lambda1_range, N, 
                                                  method = 'eBIC', gamma = 0.3, 
                                                  latent = False, use_block = True)


------------Range search for instance 0------------
ADMM terminated after 73 iterations with status: optimal.
ADMM terminated after 54 iterations with status: optimal.
ADMM terminated after 56 iterations with status: optimal.
ADMM terminated after 57 iterations with status: optimal.
ADMM terminated after 60 iterations with status: optimal.
ADMM terminated after 66 iterations with status: optimal.
ADMM terminated after 73 iterations with status: optimal.
ADMM terminated after 82 iterations with status: optimal.
ADMM terminated after 94 iterations with status: optimal.
ADMM terminated after 82 iterations with status: optimal.
------------Range search for instance 1------------
ADMM terminated after 84 iterations with status: optimal.
ADMM terminated after 65 iterations with status: optimal.
ADMM terminated after 64 iterations with status: optimal.
ADMM terminated after 68 iterations with status: optimal.
ADMM terminated after 73 iterations with status: optimal.
ADMM terminated after 81 i

In [17]:
statistics

{'BIC': array([[[ 105228.30686567],
         [ 107219.82072956],
         [ 110437.51028301],
         [ 115398.04282336],
         [ 121293.23949615],
         [ 127563.68468055],
         [ 134299.07985907],
         [ 140852.2257635 ],
         [ 147655.75421168],
         [ 154947.18619038]],
 
        [[  62330.44803786],
         [  59905.94030433],
         [  59682.48476706],
         [  59255.10868659],
         [  60266.30536546],
         [  62754.16956879],
         [  65253.23739328],
         [  68673.08358251],
         [  72246.72383698],
         [  74844.20283737]],
 
        [[  39364.59220797],
         [  35448.43470329],
         [  32025.87862695],
         [  28904.30660338],
         [  28202.70437599],
         [  28455.7879491 ],
         [  28861.90010454],
         [  31113.54614812],
         [  33910.7477674 ],
         [  37000.09015761]],
 
        [[  90549.35892741],
         [  90090.32827863],
         [  90028.81901017],
         [  92032.86890086]

In [20]:
b = "100"

a = "/storage/groups/bds01/datasets/brains/est_uniform{0}/".format(b)

a

'/storage/groups/bds01/datasets/brains/est_uniform100/'

In [5]:
N = sub_corr.shape[1]

P = glasso_problem(sub_corr[:10,:], N, reg_params = {'lambda1': lambda1_range}, latent = False, do_scaling = False)
print(P)

 
GROUP GRAPHICAL LASSO PROBLEM 
Regularization parameters:
{'lambda1': array([0.17782794, 0.16259647, 0.14866962, 0.13593564, 0.12429236,
       0.11364637, 0.10391223, 0.09501185, 0.08687381, 0.07943282]), 'lambda2': 0.01, 'mu1': None}


In [6]:
modelselect_params = {'lambda1_range': lambda1_range}
gamma = 0.3

In [7]:
P.model_selection(modelselect_params = modelselect_params, method = 'eBIC', gamma = gamma)

# regularization parameters are set to the best ones found during model selection
print(P.reg_params)

KeyboardInterrupt: 

In [None]:
P.solution.precision_

In [None]:
sgl_stats = pd.DataFrame({'SP': list(P.modelselect_stats["SP"]), 'lambda': list(P.modelselect_stats["LAMBDA"])})
sgl_stats[sgl_stats["lambda"] == 0.14237976633439411]

Opt lambda is 0.142 and corresponding sparsity level is 8.5%

### SGL + low rank

In [None]:
opt_lambda = [0.142379]
# mu1_range = np.logspace(0.8, 0.9, 30)
opt_mu = [7.94]

N = sub_corr.shape[1]

In [None]:
P_low = glasso_problem(sub_corr[0,:], N, reg_params = {'lambda1': opt_lambda, 'mu1_range': opt_mu},
                       latent = True, do_scaling = False)
print(P_low)

In [None]:
modelselect_params = {'lambda1_range': opt_lambda, 'mu1_range': opt_mu}

In [None]:
P_low.model_selection(modelselect_params = modelselect_params, method = 'eBIC', gamma = 0.3)

print(P_low.reg_params)

In [None]:
P_low.modelselect_stats

# 10 Samples

### SGL over separate 10 samples

In [None]:
lambda1_range = np.logspace(-0.3, -1, 10)
N = sub_corr.shape[1]

In [None]:
est_uniform, est_indv, statistics = K_single_grid(sub_corr[:9, :], lambda1_range, N,
                                                  method = 'eBIC', gamma = gamma, 
                                                  latent = False, use_block = True)

In [None]:
statistics

In [None]:
Theta = est_uniform["Theta"]

Theta.shape

In [None]:
K = Theta.shape[0]


for i in range(0, K):
    fig, axes = plt.subplots(1, 2, sharex=True, figsize=(30,30))

    ax = sns.heatmap(sub_corr[i, :], ax=axes[0], center=0, vmin = -0.5, vmax = 0.5, 
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Covariance')
    
    ax = sns.heatmap(Theta[i, :], ax=axes[1], center=0, vmin = -0.5, vmax = 0.5,
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Inverse Covariance')
    
    fig.show()
    
    # fig.savefig("plots/SGL_plots/SGL_lowrank_heatmap{0}.png".format(i))

## The patterns are somewhat similar, it means we can solve group graphical lasso

### SGL + low-rank over separate 10 samples

In [None]:
lambda1_range = [0.17, 0.14237977, 0.05]
mu1_range = [5, 7.94, 10]
gamma = 0.3

N = sub_corr.shape[1]

In [None]:
low_est_uniform, low_est_indv, low_statistics = K_single_grid(sub_corr[:9, :], lambda1_range, N,
                                                  mu_range = mu1_range,
                                                  method = 'eBIC', gamma = gamma, 
                                                  latent = True, use_block = True)

In [None]:
low_statistics

The solver always picks the largest mu until rank becomes 0, so we make a conclusion there are no latent variables?

In [None]:
Theta = low_est_uniform["Theta"]
L = low_est_uniform["L"]

Theta.shape, L.shape

In [None]:
K = Theta.shape[0]


for i in range(0, K):
    fig, axes = plt.subplots(1, 3, sharex=True, figsize=(30,30))

    ax = sns.heatmap(sub_corr[i, :], ax=axes[0], center=0, vmin = -0.5, vmax = 0.5, 
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Covariance')
    
    ax = sns.heatmap(Theta[i, :], ax=axes[1], center=0, vmin = -0.5, vmax = 0.5,
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Inverse Covariance')
    
    ax = sns.heatmap(L[i, :], ax=axes[2], center=0, vmin = -0.5, vmax = 0.5,
                 square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Low-rank')
    
    fig.show()
    
    # fig.savefig("plots/SGL_plots/SGL_lowrank_heatmap{0}.png".format(i))

In [None]:
statistics

## Low-rank matrix is empty, so no latent confouners!

### MGL (Group Graphical Lasso)

In [None]:
# lambda1_range = [0.17, 0.14237977, 0.1]
lambda1_range = np.logspace(-0.8, -1.1, 5)
lambda2_range = np.logspace(-0.8, -1.1, 5)
N = sub_corr.shape[1]

In [None]:
P_10 = glasso_problem(sub_corr[:9,:], N, reg_params = {'lambda1_range': lambda1_range, 'lambda2_range': lambda2_range}, 
                      latent = False, do_scaling = False)
print(P_10)

In [None]:
modelselect_params = {'lambda1_range': lambda1_range, 'lambda2_range': lambda2_range}
gamma = 0.3

In [None]:
modelselect_params

In [None]:
P_10.model_selection(modelselect_params = modelselect_params, method = 'eBIC', gamma = gamma)

# regularization parameters are set to the best ones found during model selection
print(P_10.reg_params)

In [None]:
np.logspace(-0.8, -1.1, 10)

In [None]:
statistics_10 = P_10.modelselect_stats

In [None]:
P_10.solve()

In [None]:
P_10.solution.precision_.shape

In [None]:
K = P_10.solution.precision_.shape[0]


for i in range(0, K):
    fig, axes = plt.subplots(1, 2, sharex=True, figsize=(30,30))

    ax = sns.heatmap(sub_corr[i, :], ax=axes[0], center=0, vmin = -0.5, vmax = 0.5, 
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Covariance')
    
    ax = sns.heatmap(P_10.solution.precision_[i, :], ax=axes[1], center=0, vmin = -0.5, vmax = 0.5,
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Inverse Covariance')
    
    fig.show()

In [None]:
statistics_10['L1'].shape, statistics_10['L2'].shape, statistics_10['BIC'][0.3].shape

In [None]:
statistics_10['BIC'][0.3].shape

In [None]:
#plot for GGL
fig = surface_plot(statistics_10['L1'], statistics_10['L2'], statistics_10['BIC'][0.7], name = 'eBIC')
fig.savefig("plots/GGL_l1_l2_10.png")