In [None]:
import os
# os.chdir(os.path.pardir)

import numpy as np
import pandas as pd

import networkx as nx
import matplotlib.pyplot as plt

from GGLasso.gglasso.helper.data_generation import group_power_network, sample_covariance_matrix
from GGLasso.gglasso.helper.basic_linalg import adjacency_matrix
from GGLasso.gglasso.helper.utils import sparsity, zero_replacement, normalize, log_transform

from GGLasso.gglasso.problem import glasso_problem

from tempfile import TemporaryFile

# Import preprocessed soil data

In [None]:
soil = pd.read_table('soil_116.csv', sep=',')
soil.head()

In [None]:
X_soil = np.array(soil)
X_soil.shape

# Log-transform

In [None]:
X = normalize(X_soil)

Note: X is scaled with geometric mean, and should be a dataframe of form (p,N)

In [None]:
X = log_transform(pd.DataFrame(X.T))

# Calculate covariance and scale to correlations

In [None]:
S0 = np.cov(X.values, bias = True)

In [None]:
# scale covariances to correlations
scale = np.tile(np.sqrt(np.diag(S0)),(S0.shape[0],1))
scale = scale.T * scale

S = S0 / scale

# GGLasso problem

Hyperparameters are taken from the experiments with [SpiecEasi](https://github.com/zdk123/SpiecEasi)

In [None]:
P = glasso_problem(S, X.shape[1], reg_params = {'lambda1': 0.05, 'mu1': 0.05}, latent = True, do_scaling = False)
print(P)

Gamma value for eBIC. Should be between 0 and 1. The larger gamma, the more eBIC tends to pick sparse solutions. The default is 0.1.

In [None]:
# lambda1_range = np.logspace(0, -3, 20)
lambda1_range = [1.14221314, 1.03975454, 0.94648667, 0.86158509, 0.78429934, 0.71394626, 0.64990398,
                 0.59160641, 0.53853823, 0.49023037, 0.44625582, 0.40622586, 0.36978666, 0.33661612, 
                 0.30642104, 0.27893451, 0.25391358, 0.23113707, 0.21040365, 0.19153006, 0.17434947,
                 0.15871000, 0.14447343, 0.13151390, 0.11971686, 0.10897804, 0.09920251, 0.09030386,
                 0.08220344, 0.07482964, 0.06811729, 0.06200704, 0.05644490, 0.05138169, 0.04677266,
                 0.04257707, 0.03875783, 0.03528118, 0.03211639, 0.02923549, 0.02661302, 0.02422578,
                 0.02205268, 0.02007452, 0.01827380, 0.01663460, 0.01514245, 0.01378414, 0.01254768, 0.01142213]
modelselect_params = {'lambda1_range': lambda1_range}

P.model_selection(modelselect_params = modelselect_params, method = 'eBIC', gamma = 1)

# regularization parameters are set to the best ones found during model selection
print(P.reg_params)

Optimal lambda=1.14221314, mu = 1

In [None]:
sol = P.solution.precision_
P.solution.calc_adjacency()


plt.figure()
G1 = nx.from_numpy_array(P.solution.adjacency_)
nx.draw_spring(G1, node_color = "darkblue", edge_color = "darkblue", font_color = 'white', with_labels = True)

In [None]:
np.save("optimal_sol", sol)