In [None]:
import gglasso
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import scipy as sp
import matplotlib.pyplot as plt
import os

from numpy.linalg import matrix_rank
from matplotlib.pyplot import figure
from scipy import stats
from scipy.linalg import eigh
from numpy import genfromtxt

from gglasso.solver.admm_solver import ADMM_MGL
from gglasso.problem import glasso_problem

from gglasso.helper.data_generation import generate_precision_matrix, group_power_network, sample_covariance_matrix
from gglasso.helper.basic_linalg import adjacency_matrix
from gglasso.helper.data_generation import time_varying_power_network, sample_covariance_matrix
from gglasso.helper.experiment_helper import lambda_grid, discovery_rate, error
from gglasso.helper.utils import get_K_identity
from gglasso.helper.experiment_helper import plot_evolution, plot_deviation, surface_plot, single_heatmap_animation
from gglasso.helper.model_selection import aic, ebic, K_single_grid

### Read data

In [None]:
start = 0
stop = 2

sub_corr = []

for i in range(start, stop):
    sub_corr.append(genfromtxt("/storage/groups/bds01/datasets/brains/corr_matrices/corr{0}.csv".format(i), delimiter=','))

sub_corr = np.array(sub_corr)
sub_corr.shape

In [None]:
lambda1_range = np.logspace(-0.9, -1.5, 2)

N = sub_corr.shape[1]


est_uniform, est_indv, statistics = K_single_grid(sub_corr, lambda1_range, N, 
                                                  method = 'eBIC', gamma = 0.3, 
                                                  latent = False, use_block = True)


K = "test"


os.mkdir("/storage/groups/bds01/datasets/brains/est_uniform{0}/".format(K))
os.mkdir("/storage/groups/bds01/datasets/brains/est_individ{0}/".format(K))

# dump matrices into csv
for i in range(start, stop):
    np.savetxt("/storage/groups/bds01/datasets/brains/est_uniform{0}/est_uniform{1}.csv".format(K, i), est_uniform["Theta"][i], 
               delimiter=",", header='')
    np.savetxt("/storage/groups/bds01/datasets/brains/est_individ{0}/est_individ{1}.csv".format(K, i), est_indv["Theta"][i], 
               delimiter=",", header='')
    
with open("statistics{0}.txt".format(K), 'w') as f:
    print(statistics, file=f)

### SGL

In [None]:
lambda1_range = np.logspace(-0.9, -1.5, 10)
lambda1_range

In [None]:
N = sub_corr.shape[1]


est_uniform, est_indv, statistics = K_single_grid(sub_corr[:10,:], lambda1_range, N, 
                                                  method = 'eBIC', gamma = 0.3, 
                                                  latent = False, use_block = True)


In [None]:
statistics

In [None]:
N = sub_corr.shape[1]

P = glasso_problem(sub_corr[:10,:], N, reg_params = {'lambda1': lambda1_range}, latent = False, do_scaling = False)
print(P)

In [None]:
modelselect_params = {'lambda1_range': lambda1_range}
gamma = 0.3

In [None]:
P.model_selection(modelselect_params = modelselect_params, method = 'eBIC', gamma = gamma)

# regularization parameters are set to the best ones found during model selection
print(P.reg_params)

In [None]:
P.solution.precision_

In [None]:
sgl_stats = pd.DataFrame({'SP': list(P.modelselect_stats["SP"]), 'lambda': list(P.modelselect_stats["LAMBDA"])})
sgl_stats[sgl_stats["lambda"] == 0.14237976633439411]

Opt lambda is 0.142 and corresponding sparsity level is 8.5%

### SGL + low rank

In [None]:
opt_lambda = [0.142379]
# mu1_range = np.logspace(0.8, 0.9, 30)
opt_mu = [7.94]

N = sub_corr.shape[1]

In [None]:
P_low = glasso_problem(sub_corr[0,:], N, reg_params = {'lambda1': opt_lambda, 'mu1_range': opt_mu},
                       latent = True, do_scaling = False)
print(P_low)

In [None]:
modelselect_params = {'lambda1_range': opt_lambda, 'mu1_range': opt_mu}

In [None]:
P_low.model_selection(modelselect_params = modelselect_params, method = 'eBIC', gamma = 0.3)

print(P_low.reg_params)

In [None]:
P_low.modelselect_stats

# 10 Samples

### SGL over separate 10 samples

In [None]:
lambda1_range = np.logspace(-0.3, -1, 10)
N = sub_corr.shape[1]

In [None]:
est_uniform, est_indv, statistics = K_single_grid(sub_corr[:9, :], lambda1_range, N,
                                                  method = 'eBIC', gamma = gamma, 
                                                  latent = False, use_block = True)

In [None]:
statistics

In [None]:
Theta = est_uniform["Theta"]

Theta.shape

In [None]:
K = Theta.shape[0]


for i in range(0, K):
    fig, axes = plt.subplots(1, 2, sharex=True, figsize=(30,30))

    ax = sns.heatmap(sub_corr[i, :], ax=axes[0], center=0, vmin = -0.5, vmax = 0.5, 
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Covariance')
    
    ax = sns.heatmap(Theta[i, :], ax=axes[1], center=0, vmin = -0.5, vmax = 0.5,
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Inverse Covariance')
    
    fig.show()
    
    # fig.savefig("plots/SGL_plots/SGL_lowrank_heatmap{0}.png".format(i))

## The patterns are somewhat similar, it means we can solve group graphical lasso

### SGL + low-rank over separate 10 samples

In [None]:
lambda1_range = np.logspace(-0.9, -1.5, 4)
# mu1_range = np.linspace(8.75, 6.25, 5)
mu1_range = np.arange(6.25, 11,  0.5)[::-1]
gamma = 0.3

N = sub_corr.shape[1]

In [None]:
low_est_uniform, low_est_indv, low_statistics = K_single_grid(sub_corr, lambda1_range, N,
                                                  mu_range = mu1_range,
                                                  method = 'eBIC', gamma = gamma, 
                                                  latent = True, use_block = True)

In [None]:
low_est_indv["Theta"]

In [None]:
low_statistics

The solver always picks the largest mu until rank becomes 0, so we make a conclusion there are no latent variables?

In [None]:
Theta = low_est_uniform["Theta"]
L = low_est_uniform["L"]

Theta.shape, L.shape

In [None]:
K = Theta.shape[0]


for i in range(0, K):
    fig, axes = plt.subplots(1, 3, sharex=True, figsize=(30,30))

    ax = sns.heatmap(sub_corr[i, :], ax=axes[0], center=0, vmin = -0.5, vmax = 0.5, 
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Covariance')
    
    ax = sns.heatmap(Theta[i, :], ax=axes[1], center=0, vmin = -0.5, vmax = 0.5,
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Inverse Covariance')
    
    ax = sns.heatmap(L[i, :], ax=axes[2], center=0, vmin = -0.5, vmax = 0.5,
                 square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Low-rank')
    
    fig.show()
    
    # fig.savefig("plots/SGL_plots/SGL_lowrank_heatmap{0}.png".format(i))

In [None]:
statistics

## Low-rank matrix is empty, so no latent confouners!

### MGL (Group Graphical Lasso)

In [None]:
# lambda1_range = [0.17, 0.14237977, 0.1]
lambda1_range = np.logspace(-0.8, -1.1, 5)
lambda2_range = np.logspace(-0.8, -1.1, 5)
N = sub_corr.shape[1]

In [None]:
P_10 = glasso_problem(sub_corr[:9,:], N, reg_params = {'lambda1_range': lambda1_range, 'lambda2_range': lambda2_range}, 
                      latent = False, do_scaling = False)
print(P_10)

In [None]:
modelselect_params = {'lambda1_range': lambda1_range, 'lambda2_range': lambda2_range}
gamma = 0.3

In [None]:
modelselect_params

In [None]:
P_10.model_selection(modelselect_params = modelselect_params, method = 'eBIC', gamma = gamma)

# regularization parameters are set to the best ones found during model selection
print(P_10.reg_params)

In [None]:
np.logspace(-0.8, -1.1, 10)

In [None]:
statistics_10 = P_10.modelselect_stats

In [None]:
P_10.solve()

In [None]:
P_10.solution.precision_.shape

In [None]:
K = P_10.solution.precision_.shape[0]


for i in range(0, K):
    fig, axes = plt.subplots(1, 2, sharex=True, figsize=(30,30))

    ax = sns.heatmap(sub_corr[i, :], ax=axes[0], center=0, vmin = -0.5, vmax = 0.5, 
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Covariance')
    
    ax = sns.heatmap(P_10.solution.precision_[i, :], ax=axes[1], center=0, vmin = -0.5, vmax = 0.5,
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Inverse Covariance')
    
    fig.show()

In [None]:
statistics_10['L1'].shape, statistics_10['L2'].shape, statistics_10['BIC'][0.3].shape

In [None]:
statistics_10['BIC'][0.3].shape

In [None]:
#plot for GGL
fig = surface_plot(statistics_10['L1'], statistics_10['L2'], statistics_10['BIC'][0.7], name = 'eBIC')
fig.savefig("plots/GGL_l1_l2_10.png")

### Read solution

In [None]:
corr = []
sol = []

for i in range(0, 951):
    corr.append(genfromtxt("/storage/groups/bds01/datasets/brains/corr_matrices/corr{0}.csv".format(i), delimiter=','))
    sol.append(genfromtxt("/storage/groups/bds01/datasets/brains/est_uniform/est_uniform{0}.csv".format(i), delimiter=','))

In [None]:
sol = np.array(sol)
corr = np.array(corr)
corr.shape, sol.shape

In [None]:
df_train = pd.read_csv("/storage/groups/bds01/datasets/brains/train.csv", sep=',')
df_test = pd.read_csv("/storage/groups/bds01/datasets/brains/submission_valid.csv", sep=',')

df = pd.concat([df_train, df_test], ignore_index=True)

In [None]:
df.shape

In [None]:
df["sex_f0_m1"].isna().sum()

In [None]:
sex = df["sex_f0_m1"].reset_index()

male_ixs = np.array(sex[sex["sex_f0_m1"] == 1.0]["index"])
female_ixs = np.array(sex[sex["sex_f0_m1"] == 0.0]["index"])

male_ixs.shape, female_ixs.shape

In [None]:
def calculate_edge_probablity(data=list, indices=list):
    
    P = np.zeros((436, 436))
    
    for i in indices:
        x = data[i, :]
        x = (x != 0).astype(int)

        P = P + x
    
    P = P / len(indices)
    
    return pd.DataFrame(P)

In [None]:
P_male = calculate_edge_probablity(data=sol, indices=male_ixs)
P_female = calculate_edge_probablity(data=sol, indices=female_ixs)

In [None]:
P_male[P_male < 0.8] = 0
P_female[P_female < 0.8] = 0

In [None]:
# plt.figure(figsize=(16,16))
# ax = sns.heatmap(P_male, center=0, vmin = -0.5, vmax = 0.5, square = True, cbar = False, 
#                  cmap = "coolwarm", xticklabels=False, yticklabels=False)
fig, axes = plt.subplots(1, 2, sharex=True, figsize=(30,30))

ax = sns.heatmap(P_male, ax=axes[0], center=0, vmin = -0.5, vmax = 0.5, 
                 square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
ax.set_title("Male's connectivity network")

ax = sns.heatmap(P_female, ax=axes[1], center=0, vmin = -0.5, vmax = 0.5,
                 square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
ax.set_title("Female's connectivity network")

fig.show()

fig.savefig("/mnt/home/icb/oleg.vlasovetc/brain_challenge/f-threshold-select/plots/connectivity_heatmap.png")

In [None]:
a = P_male.astype(bool)
b = P_female.astype(bool)

a == b

In [None]:
plt.figure(figsize=(16,16))
ax = sns.heatmap(a == b, center=0, vmin = -0.5, vmax = 0.5, square = True, cbar = False, 
                 cmap = "coolwarm", xticklabels=False, yticklabels=False)

In [None]:
plt.figure(figsize=(16,16))
ax = sns.heatmap(P_df, center=0, vmin = -0.5, vmax = 0.5, 
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)

In [None]:
test = sol[:5, :]
test.shape

In [None]:
empty = np.zeros((436, 436))

K = test.shape[0]

for i in range(0, K):
    x = test[i, :]
    x = (x != 0).astype(int)
    
    empty = empty + x

In [None]:
empty = empty / K

In [None]:
empty

In [None]:
K = sol.shape[0]


for i in range(0, K):
    fig, axes = plt.subplots(1, 2, sharex=True, figsize=(30,30))
    
    ax = sns.heatmap(corr[i, :], ax=axes[0], center=0, vmin = -0.5, vmax = 0.5, 
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Covariance')
    
    ax = sns.heatmap(sol[i, :], ax=axes[1], center=0, vmin = -0.5, vmax = 0.5,
                     square = True, cbar = False, cmap = "coolwarm", xticklabels=False, yticklabels=False)
    ax.set_title('Inverse Covariance')
    
    fig.savefig("/storage/groups/bds01/datasets/brains/plots/SGL_plots/SGL_heatmap{0}.png".format(i))