In [None]:
import gglasso
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import os
import matplotlib.pyplot as plt

from numpy import genfromtxt
from matplotlib.pyplot import figure
from datetime import datetime
from gglasso.problem import glasso_problem
from gglasso.helper.model_selection import K_single_grid
from gglasso.helper.experiment_helper import surface_plot, lambda_grid

In [None]:
def read_sub_corr(K=int, path=str):
    sub_corr = []

    for i in range(0, K):
        sub_corr.append(genfromtxt(path.format(i), delimiter=','))

    sub_corr = np.array(sub_corr)
    
    return sub_corr

In [None]:
def plot_solution(solution = np.array, show_output = True, name = str):
    
    K = solution.shape[0]


    for i in range(0, K):
        fig, axes = plt.subplots(1, 2, sharex=True, figsize=(30,30))

        ax = sns.heatmap(sub_corr[i, :], ax=axes[0], center=0, vmin = -0.5, vmax = 0.5, 
                         square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
        ax.set_title('Covariance')

        ax = sns.heatmap(solution[i, :], ax=axes[1], center=0, vmin = -0.5, vmax = 0.5,
                         square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
        ax.set_title('Inverse Covariance')

        fig.savefig("../../plots/{0}_heatmap{1}.png".format(name, i))

### Read data

#### Remove outliers

The outliers are found after manual checking of SGL solution for all samples.

In [None]:
corr_all_ix = np.arange(0, 950)
outliers_ix = [96, 144, 210, 522]

corr_filtered_ix = np.array([i for i in corr_all_ix if i not in outliers_ix])
corr_filtered_ix.shape

In [None]:
storage_dir = "/lustre/groups/bds01/datasets/brains/"

In [None]:
outliers = []

for i in outliers_ix:
    outliers.append(genfromtxt(storage_dir + "corr_matrices/corr{0}.csv".format(i), delimiter=','))

    
outliers = np.array(outliers)
outliers.shape

#### HMGU cluster

In [None]:
corr = []

for i in corr_filtered_ix[:50]:
    corr.append(genfromtxt(storage_dir + "corr_matrices/corr{0}.csv".format(i), delimiter=','))

    
corr = np.array(corr)
corr.shape

#### On premises

In [None]:
start = 0
stop = 50

sub_corr = []

### on premisis
for i in range(start, stop):
    sub_corr.append(genfromtxt("../data/sub_corr50/sub_corr{0}.csv".format(i), delimiter=','))

    
sub_corr = np.array(sub_corr)
sub_corr.shape

In [None]:
plot_solution(solution=sub_corr[:2,:], name="SGL")

### Group GL

# GGL

In [None]:
# N = 436
N = 5
gamma = 0.3

# lambda1_range = np.logspace(0.5, -5, 5)
# lambda2_range = np.logspace(0.1, -3, 3)

lambda1_range = [0.89442719, 0.28284271, 0.08944272, 0.02828427, 0.00894427]
lambda2_range = [0.31622777, 0.1, 0.03162278, 0.01, 0.00316228]

In [None]:
P_50 = glasso_problem(corr, N, reg_params = {'lambda1_range': lambda1_range, 'lambda2_range': lambda2_range}, 
                      latent = False, do_scaling = False)

In [None]:
modelselect_params = {'lambda1_range': lambda1_range, 'lambda2_range': lambda2_range}

In [None]:
P_50.model_selection(modelselect_params = modelselect_params, method = 'eBIC', gamma = gamma)

In [None]:
sol = P_50.solution.precision_
sol.shape

In [None]:
with open('statistics_GGL.txt', 'w') as f:
    print(P_50.modelselect_stats, file=f)

In [None]:
stats = P_50.modelselect_stats

In [None]:
#plot for GGL
fig = surface_plot(stats['L1'], stats['L2'], stats['BIC'][0.7], name = 'eBIC')
fig.show()

# fig.savefig("../../plots/eBIC.png")

## Never reach optimum