In [None]:
import gglasso
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import os
import matplotlib.pyplot as plt

from numpy import genfromtxt
from matplotlib.pyplot import figure
from datetime import datetime
from gglasso.helper.model_selection import K_single_grid

In [None]:
def calculate_edge_probablity(data=list, indices=list):
    
    P = np.zeros((436, 436))
    
    for i in indices:
        x = data[i, :]
        x = (x != 0).astype(int)

        P = P + x
    
    P = P / len(indices)
    
    return pd.DataFrame(P)

### Read data

#### Remove outliers

The outliers are found after manual checking of SGL solution for all samples.

In [None]:
corr_all_ix = np.arange(0, 950)
outliers_ix = [96, 144, 210, 522]

corr_filtered_ix = np.array([i for i in corr_all_ix if i not in outliers_ix])
corr_filtered_ix.shape

In [None]:
storage_dir = "/lustre/groups/bds01/datasets/brains/"

In [None]:
outliers = []

for i in outliers_ix:
    outliers.append(genfromtxt(storage_dir + "corr_matrices/corr{0}.csv".format(i), delimiter=','))

    
outliers = np.array(outliers)
outliers.shape

In [None]:
fig, axes = plt.subplots(2, 2, sharex=True, figsize=(30,30))

ax = sns.heatmap(outliers[0, :], ax=axes[0][0], center=0, vmin = -0.5, vmax = 0.5, 
                 square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
ax.set_title('outlier_{0}'.format(outliers_ix[0]))

ax = sns.heatmap(outliers[1, :], ax=axes[0][1], center=0, vmin = -0.5, vmax = 0.5,
                 square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
ax.set_title('outlier_{0}'.format(outliers_ix[1]))

ax = sns.heatmap(outliers[2, :], ax=axes[1][0], center=0, vmin = -0.5, vmax = 0.5,
                 square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
ax.set_title('outlier_{0}'.format(outliers_ix[2]))

ax = sns.heatmap(outliers[3, :], ax=axes[1][1], center=0, vmin = -0.5, vmax = 0.5,
                 square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
ax.set_title('outlier_{0}'.format(outliers_ix[3]))
    
fig.savefig("../../plots/outliers_heatmap.png")

### HMGU cluster

In [None]:
### on cloud
corr = []

for i in corr_filtered_ix[:10]:
    corr.append(genfromtxt(storage_dir + "corr_matrices/corr{0}.csv".format(i), delimiter=','))

    
corr = np.array(corr)
corr.shape

### On premisis

In [None]:
start = 0
stop = 50

sub_corr = []

### on premisis
for i in range(start, stop):
    sub_corr.append(genfromtxt("../../data/sub_corr50/sub_corr{0}.csv".format(i), delimiter=','))

    
sub_corr = np.array(sub_corr)
sub_corr.shape

### Single GL

### On premises

In [None]:
lambda1_range = np.logspace(0, -1, 5)
mu1_range = np.logspace(0, -1, 5)

In [None]:
N = sub_corr.shape[1]

In [None]:
est_uniform, est_indv, statistics = K_single_grid(sub_corr[:1, :], lambda1_range, N,
                                                  mu_range = mu1_range,
                                                  method = 'eBIC', gamma = 0.3, 
                                                  latent = True, use_block = True)

In [None]:
K = est_uniform["Theta"].shape[0]

In [None]:
# dump matrices into csv
for i in range(0, K):
    np.savetxt("/storage/groups/bds01/datasets/brains/est_uniform_latent50/Theta{0}.csv".format(i), est_uniform["Theta"][i], 
               delimiter=",", header='')
    np.savetxt("/storage/groups/bds01/datasets/brains/est_uniform_latent50/L{0}.csv".format(i), est_uniform["L"][i], 
               delimiter=",", header='')

In [None]:
# !jupyter nbconvert --to script --no-prompt SGL_latent.ipynb

In [None]:
with open('/storage/groups/bds01/datasets/brains/statistics_SGL_latent50.txt', 'w') as f:
    print(statistics, file=f)

### HMGU cluster

In [None]:
lambda1_range = np.logspace(-0.9, -1.5, 4)
mu1_range = np.arange(6.25, 11,  0.5)[::-1]

In [None]:
K = len(corr)
N = K*[corr.shape[1]]

In [None]:
start_time = datetime.now()

low_est_uniform, low_est_indv, low_statistics = K_single_grid(corr[:2, :], lambda1_range, N[:2],
                                                  mu_range = mu1_range,
                                                  method = 'eBIC', gamma = 0.3, 
                                                  latent = True, use_block = True)

end_time = datetime.now()

run_time = end_time - start_time

low_statistics['time'] = run_time
print("--- TIME: {0} ---".format(run_time))

### Read solution

In [None]:
Theta = []
L = []

for i in range(0, 50):
    Theta.append(genfromtxt("/storage/groups/bds01/datasets/brains/est_uniform_latent50/Theta{0}.csv".format(i), delimiter=','))
    L.append(genfromtxt("/storage/groups/bds01/datasets/brains/est_uniform_latent50/L{0}.csv".format(i), delimiter=','))

Theta, L = np.array(Theta), np.array(L)
Theta.shape, L.shape

In [None]:
K = Theta.shape[0]

### Plot the solution and low rank

In [None]:
for i in range(0, K):
    fig, axes = plt.subplots(1, 3, sharex=True, figsize=(30,30))

    ax = sns.heatmap(sub_corr[i, :], ax=axes[0], center=0, vmin = -0.5, vmax = 0.5, 
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Covariance')
    
    ax = sns.heatmap(Theta[i, :], ax=axes[1], center=0, vmin = -0.5, vmax = 0.5,
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Inverse Covariance')
    
    ax = sns.heatmap(L[i, :], ax=axes[2], center=0, vmin = -0.5, vmax = 0.5,
                 square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Low-rank')
    
    fig.savefig("plots/SGL_plots/SGL_lowrank_heatmap{0}.png".format(i))