In [None]:
# !jupyter labextension install jupyterlab-plotly

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px

from latentcor import get_tps, latentcor
from gglasso.problem import glasso_problem
from matplotlib import pyplot as plt

from vis import plot_heatmap

In [None]:
def PCA(X, L, inverse=True):
    sig, V = np.linalg.eigh(L)

    # sort eigenvalues in descending order
    sig = sig[::-1]
    V = V[:, ::-1]

    ind = np.argwhere(sig > 1e-9)

    if inverse:
        loadings = V[:, ind] @ np.diag(np.sqrt(1 / sig[ind]))
    else:
        loadings = V[:, ind] @ np.diag(np.sqrt(sig[ind]))

    # compute the projection
    zu = X.values @ loadings

    return zu, loadings, np.round(sig[ind].squeeze(), 3)

### Import data

In [None]:
# clr-transformed Atacama soil microbiome
acm = pd.read_csv('~/GGLasso/data/soil/processed/acm_clr.tsv', sep='\t', index_col = 0)

# (p, N)
acm.shape

In [None]:
# covariates
meta = pd.read_csv('~/GGLasso/data/soil/processed/acm_meta.tsv', sep='\t', index_col = 0)

meta = meta.loc[:, meta.iloc[0, :] != 'categorical']
meta = meta.apply(pd.to_numeric, errors='coerce')
meta = meta.dropna(how='all')
meta = meta.iloc[1:]

print(meta.shape)

meta.head()

In [None]:
# join by sample id
df = acm.join(meta)

#drop missing values
df = df.dropna(axis=0)

df.isnull().sum().any()

In [None]:
# drop covariates with zero variance
for var in df.columns:
    if df[var].var() == 0:
        print("'{0}' covariate has been dropped".format(var))
        del df[var]
# (df.var() == 0)
# print(df.var().to_string())

In [None]:
df.shape

### Latent correlation

In [None]:
N, p = df.shape

clean_types = get_tps(df)

In [None]:
vis_df = df.copy()

In [None]:
# Rename long feature IDs with concise names
id_dict = dict()

i = 1
for col in vis_df.columns:
    # length of ASVs identifier
    if len(col) == 32:
        asv_name = "ASV_{0}".format(i)
        id_dict[asv_name] = col
        vis_df.rename(columns={col: asv_name}, inplace=True)
        
        i += 1

In [None]:
### N, p input
lat_cor = latentcor(vis_df, tps=clean_types, method='original', use_nearPD=False)

In [None]:
S = lat_cor['R']

In [None]:
mask = np.triu(np.ones_like(S, dtype=bool))
rLT = S.mask(mask)

heat = go.Heatmap(
    z = rLT,
    x = rLT.columns.values,
    y = rLT.columns.values,
    zmin = - 1, # Sets the lower bound of the color domain
    zmax = 1,
    xgap = 1, # Sets the horizontal gap (in pixels) between bricks
    ygap = 1,
    colorscale = 'RdBu_r'
)

title = 'Latent Correlation Matrix'

layout = go.Layout(
    title_text=title, 
    title_x=0.5, 
    width=2400, 
    height=2400,
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    yaxis_autorange='reversed'
)

fig=go.Figure(data=[heat], layout=layout)

# fig.write_image("../plots/png/latent_corr.png")
# fig.write_html("../plots/html/latent_corr.html")

In [None]:
meta_ticks = np.array(S.columns[-14:])
bug_ticks = np.arange(len(S.columns[:-14]))
ticks = np.hstack((bug_ticks, meta_ticks))

In [None]:
S.to_csv("/Users/oleg.vlasovetc/Desktop/acm_latent_corr.csv", index=False)

### SGL + low-rank: species and covariates together

In [None]:
P_SGL_L = glasso_problem(S.values, N, latent=True, do_scaling=False)
print(P_SGL_L)

In [None]:
lambda1_range = [0.5]
mu1_range = [15, 10, 5, 3.7, 3, 2.25, 2.1, 2, 1.7, 1.6]

# lambda1_range = [0.05, 0.025, 0.01]
# mu1_range = [5, 3.7, 3, 2.25, 2.1, 2, 1.7, 1.6]

modelselect_params = {'lambda1_range': lambda1_range, 'mu1_range': mu1_range}

In [None]:
P_SGL_L.model_selection(modelselect_params=modelselect_params, method='eBIC', gamma=0.25)
print(P_SGL_L.reg_params)

In [None]:
P_SGL_L.modelselect_stats

In [None]:
Theta_SGL_L = P_SGL_L.solution.precision_

In [None]:
L = P_SGL_L.solution.lowrank_

In [None]:
l_names = list()

for i in range(1, L.shape[0]+1):
    l_names.append("L{0}".format(i))

In [None]:
a = pd.DataFrame(L, columns=l_names)
b = S.reset_index(drop=True) 
c = pd.concat([a, b], axis=1)

In [None]:
test = pd.concat([c.iloc[:, 0:2], c.iloc[:, -2:]], axis=1)

In [None]:
import plotly.express as px

fig = px.scatter_matrix(test, title="Scatter matrix of low-rank vs. correlation vectors")
fig.update_traces(diagonal_visible=False)

fig.update_layout(margin = dict(t=100,r=100,b=100,l=100), width = 1000, height = 1000,
                 title='Low-rank vectors vs. Correlation vectors between the covariates and ASVs', title_x=0.5)


fig.write_image("../plots/png/scatter.png")
fig.write_html("../plots/html/scatter.html")

In [None]:
proj, loadings, eigv = PCA(vis_df, L, inverse=True)
r = np.linalg.matrix_rank(L)

### Solve SGL and SGL+low-rank with weighted solver

#### SGL mask of abs corr values

In [None]:
lambda1_mask_corr = abs(S.values)

In [None]:
df_mask_corr = pd.DataFrame(lambda1_mask_corr, columns=vis_df.columns, index=S.index)

In [None]:
fig = px.imshow(df_mask_corr, color_continuous_scale='RdBu_r')
fig.update_layout(margin = dict(t=100,r=100,b=100,l=100), width = 1000, height = 1000,
                 title='Absolute correlation lambda-mask matrix: weights before the penalization term', title_x=0.5)

# fig.write_image("../plots/png/lambda_mask_corr.png")
# fig.write_html("../plots/html/lambda_mask_corr.html")

In [None]:
lambda1_range = np.logspace(0, -2, 8)
mu1_range = np.logspace(1.5, 0, 6)

In [None]:
P_corr = glasso_problem(S.values, N, latent=True, do_scaling=False)
print(P_corr)

In [None]:
modelselect_params = {'lambda1_range': lambda1_range, 'mu1_range': mu1_range, 'lambda1_mask': lambda1_mask_abs}

P_corr.model_selection(modelselect_params=modelselect_params, method='eBIC', gamma=0.25)
print(P_corr.reg_params)

In [None]:
Theta_corr = P_corr.solution.precision_

### SGL mask explicit

In [None]:
# create lambda matrix full of zeros
shape_meta = (p, p)
mask = np.zeros(shape_meta)

# add small constant, so ADMM could converge
mask = mask + 0.01

# heavy penalize species
n_bugs = len(bug_ticks)
bugs_block = np.ones((n_bugs, n_bugs))
mask[0:n_bugs, 0:n_bugs] += bugs_block - 0.01
lambda1_mask_exp = mask

In [None]:
df_mask_exp = pd.DataFrame(lambda1_mask_exp, columns=vis_df.columns, index=S.index)

In [None]:
fig = px.imshow(df_mask_exp, color_continuous_scale='RdBu_r')
fig.update_layout(margin = dict(t=100,r=100,b=100,l=100), width = 1000, height = 1000,
                 title='Lambda-mask matrix: weights before the penalization term', title_x=0.5)

fig.add_annotation(text="$\lambda=1$",
                  xref="paper", yref="paper", font=dict(color='yellow',size=155),
                  x=0.5, y=0.5, showarrow=False)
fig.add_annotation(text="$\lambda=0.01$",
                  xref="paper", yref="paper", font=dict(color='yellow',size=155),
                  x=0.5, y=0.05, showarrow=False)

fig.update_coloraxes(showscale=False)

# fig.write_image("../plots/png/lambda_mask.png")
# fig.write_html("../plots/html/lambda_mask.html")

In [None]:
P_exp = glasso_problem(S.values, N, latent=True, do_scaling=False)
print(P_exp)

In [None]:
lambda1_range = np.logspace(0, -2, 8)
mu1_range = np.logspace(1.5, 0, 6)

modelselect_params = {'lambda1_range': lambda1_range, 'mu1_range': mu1_range, 'lambda1_mask': lambda1_mask_exp}

In [None]:
P_exp.model_selection(modelselect_params=modelselect_params, method='eBIC', gamma=0.25)

print(P_exp.reg_params)

In [None]:
Theta_exp = P_exp.solution.precision_
L = P_exp.solution.lowrank_

### no low_rank exp - optimal

In [None]:
P_exp_no = glasso_problem(S.values, N, latent=False, do_scaling=False)
print(P_exp_no)

modelselect_params = {'lambda1_range': lambda1_range, 'lambda1_mask': lambda1_mask_exp}

P_exp_no.model_selection(modelselect_params=modelselect_params, method='eBIC', gamma=0.25)

print(P_exp_no.reg_params)

Theta_exp_no = P_exp_no.solution.precision_

### no low_abs

In [None]:
P_abs_no = glasso_problem(S.values, N, latent=False, do_scaling=False)
print(P_exp_no)

modelselect_params = {'lambda1_range': lambda1_range, 'lambda1_mask': lambda1_mask_abs}

P_abs_no.model_selection(modelselect_params=modelselect_params, method='eBIC', gamma=0.25)

print(P_abs_no.reg_params)
Theta_abs_no = P_abs_no.solution.precision_

### Save plots

In [None]:

def plot_heatmap(cov, precision, mask, low_rank=None, low=False):

    left = 0.125  # the left side of the subplots of the figure
    right = 0.9  # the right side of the subplots of the figure
    bottom = 0.1  # the bottom of the subplots of the figure
    top = 0.9  # the top of the subplots of the figure
    wspace = -0.6  # the amount of width reserved for blank space between subplots,
    hspace = 0.5  # the amount of height reserved for white space between subplots,
    fontsize = 56
    cmap = "coolwarm"
    vmin = -0.5
    vmax = 0.5
    linewidth = .5
    square = True
    cbar = False

    if low:
            fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(90, 35))

            plt.subplots_adjust(left=left, bottom=bottom, right=right, top=top, wspace=wspace, hspace=hspace)

            ax1.get_shared_y_axes().join(ax2)
            ax3.get_shared_y_axes().join(ax4)

            g1 = sns.heatmap(cov, cmap=cmap, cbar=cbar, ax=ax1, vmin=vmin, vmax=vmax, linewidth=linewidth, square=square,
                             xticklabels=ticks, yticklabels=ticks)
            g1.set_ylabel('')
            g1.set_xlabel('Covariance', fontsize=fontsize)

            g2 = sns.heatmap(precision, cmap=cmap, cbar=cbar, ax=ax2, vmin=vmin, vmax=vmax, linewidth=linewidth, square=square,
                             xticklabels=ticks, yticklabels=ticks)
            g2.set_ylabel('')
            g2.set_xlabel('Inverse covariance', fontsize=fontsize)
            g2.set_yticks([])

            g3 = sns.heatmap(low_rank, cmap=cmap, ax=ax3, cbar=cbar, vmin=vmin, vmax=vmax, linewidth=linewidth, square=square,
                             xticklabels=ticks, yticklabels=ticks)
            g3.set_ylabel('')
            g3.set_xlabel('Low-rank solution', fontsize=fontsize)
            g3.set_yticks([])

            g4 = sns.heatmap(mask, cmap=cmap, ax=ax4, cbar=cbar, vmin=vmin, vmax=vmax, linewidth=linewidth, square=square,
                             xticklabels=ticks, yticklabels=ticks)
            g4.set_ylabel('')
            g4.set_xlabel('Mask', fontsize=fontsize)
            g4.set_yticks([])
    else:

        wspace = 0.5  # the amount of width reserved for blank space between subplots,
        hspace = 0.5

        fig, (ax1, ax2 ,ax3) = plt.subplots(1, 3, figsize=(90, 35))

        plt.subplots_adjust(left=left, bottom=bottom, right=right, top=top, wspace=wspace, hspace=hspace)

        ax1.get_shared_y_axes().join(ax2, ax3)

        g1 = sns.heatmap(cov, cmap=cmap, cbar=cbar, ax=ax1, vmin=vmin, vmax=vmax, linewidth=linewidth, square=square,
                         xticklabels=ticks, yticklabels=ticks)
        g1.set_ylabel('')
        g1.set_xlabel('Covariance', fontsize=fontsize)

        g2 = sns.heatmap(precision, cmap=cmap, cbar=cbar, ax=ax2, vmin=vmin, vmax=vmax, linewidth=linewidth,
                         square=square,
                         xticklabels=ticks, yticklabels=ticks)
        g2.set_ylabel('')
        g2.set_xlabel('Inverse covariance', fontsize=fontsize)
        g2.set_yticks([])

        g3 = sns.heatmap(mask, cmap=cmap, ax=ax3, cbar=cbar, vmin=vmin, vmax=vmax, linewidth=linewidth, square=square,
                         xticklabels=ticks, yticklabels=ticks)
        g3.set_ylabel('')
        g3.set_xlabel('Mask', fontsize=fontsize)
        g3.set_yticks([])

    return fig

In [None]:
fig = plot_heatmap(cov=S, precision=Theta_exp, low_rank=L, mask=mask, low=True)
fig.savefig("../plots/png/low_rank_exp.png")

fig = plot_heatmap(cov=S, precision=Theta_corr, low_rank=L, mask=abs(S.values), low=True)
fig.savefig("../plots/png/low_rank_abscorr.png")


fig = plot_heatmap(cov=S, precision=Theta_exp_no, mask=mask, low=False)
fig.savefig("../plots/png/exp_no.png")

fig = plot_heatmap(cov=S, precision=Theta_abs_no, mask=abs(S.values), low=False)
fig.savefig("../plots/png/abscorr_no.png")