In [None]:
import os
os.chdir('/container/mount/point')

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

from scipy.cluster.hierarchy import linkage, leaves_list

from gglasso.helper.utils import sparsity
from gglasso.helper.model_selection import ebic

from utils.helper import transform_features, scale_array_by_diagonal
from utils.stability_selection import subsample
from utils.solver import ADMM_single

### Import American Gut Project data

In [201]:
matched_df = pd.read_csv('data/AGP/matched_df_W.csv', sep=',', index_col=0)

f_taxa_smoker = pd.read_csv('data/AGP/tax_table_smoker.csv', sep=',', index_col=0)
f_count_smoker = pd.read_csv('data/AGP/otu_table_smoker.csv', sep=',', index_col=0)  # N x p

f_taxa_non_smoker = pd.read_csv('data/AGP/tax_table_non_smoker.csv', sep=',', index_col=0)
f_count_non_smoker = pd.read_csv('data/AGP/otu_table_non_smoker.csv', sep=',', index_col=0)  # N x p

print("Bacterial families for both cases are the same:", (f_count_smoker.columns == f_count_non_smoker.columns).all())
family_names = f_count_smoker.columns
names_dict = {index: (f_taxa_smoker.loc[index, "Family"] if pd.notna(f_taxa_smoker.loc[index, "Family"]) else f"f_unknown_{index}") for index in family_names.map(int)}

Bacterial families for both cases are the same: True


### Perform Subsampling

In [202]:
level = "family"
model = "sparse"
N = 10  # Number of subsamples
lambda1_range = np.logspace(0, -2, 10)

# Process both smoker and non-smoker cases
subsampling_results = dict()

for case, f_count in zip(["smoker", "non_smoker"], [f_count_non_smoker, f_count_smoker]):
    print(f"Processing {case} case")
    
    # Select count table according to selected taxonomic level
    control_counts = f_count.copy().T
    p, n = control_counts.shape
    print(f"Level {level}: \n Shape(p,N): {control_counts.shape}")

    control_ids = list(map(str, matched_df[matched_df["W"] == 1].index))

    control_clr_counts = transform_features(control_counts, transformation="clr")
    subsamples = subsample(control_clr_counts.values, N)

    for i in range(subsamples.shape[2]):  # Loop through each subsample
        subsample_slice = subsamples[:, :, i]  # Access the i-th subsample
        print(f"Subsample {i+1}:")
        print(subsample_slice.shape)

    print(f"Print subsamples object: {subsamples.shape}")

    subsampling_results[case] = {
        "control_counts": control_counts,
        "control_clr_counts": control_clr_counts,
        "subsamples": subsamples
    }

Processing smoker case
Level family: 
 Shape(p,N): (40, 234)
Subsample 1:
(40, 152)
Subsample 2:
(40, 152)
Subsample 3:
(40, 152)
Subsample 4:
(40, 152)
Subsample 5:
(40, 152)
Subsample 6:
(40, 152)
Subsample 7:
(40, 152)
Subsample 8:
(40, 152)
Subsample 9:
(40, 152)
Subsample 10:
(40, 152)
Print subsamples object: (40, 152, 10)
Processing non_smoker case
Level family: 
 Shape(p,N): (40, 234)
Subsample 1:
(40, 152)
Subsample 2:
(40, 152)
Subsample 3:
(40, 152)
Subsample 4:
(40, 152)
Subsample 5:
(40, 152)
Subsample 6:
(40, 152)
Subsample 7:
(40, 152)
Subsample 8:
(40, 152)
Subsample 9:
(40, 152)
Subsample 10:
(40, 152)
Print subsamples object: (40, 152, 10)


### Empirical correlation

In [203]:
corr_dict = {"smoker": dict(), "non_smoker": dict()}

for case, f_count in zip(["smoker", "non_smoker"], [f_count_non_smoker, f_count_smoker]):
    print(f"Processing {case} case")
    
    # Select count table according to selected taxonomic level
    control_counts = f_count.copy().T
    p, n = control_counts.shape
    print(f"Level {level}: \n Shape(p,N): {control_counts.shape}")

    control_ids = list(map(str, matched_df[matched_df["W"] == 1].index))

    control_clr_counts = transform_features(control_counts, transformation="clr")
    subsamples = subsample(control_clr_counts.values, N)

    for i in range(subsamples.shape[2]):  # Loop through each subsample
        subsample_slice = subsamples[:, :, i]  # Access the i-th subsample
        #print(f"Subsample {i+1}:")
        #print(subsample_slice.shape)

    print(f"Print subsamples shape: {subsamples.shape}")

    S0 = np.cov(control_clr_counts, bias=True)
    S = scale_array_by_diagonal(S0)
    if p != S.shape[0]:
        raise Exception("Check covariance shape!")
    
    corr_dict[case] = S

Processing smoker case
Level family: 
 Shape(p,N): (40, 234)
Print subsamples shape: (40, 152, 10)
Processing non_smoker case
Level family: 
 Shape(p,N): (40, 234)
Print subsamples shape: (40, 152, 10)


In [204]:
### Hierarchical clustering of the correlation matrix
corr_mat = corr_dict["smoker"]

linkage_counts = linkage(corr_mat, method='average')
ordered_indices = leaves_list(linkage_counts)

for case, f_count in zip(["smoker", "non_smoker"], [f_count_non_smoker, f_count_smoker]):
    print(f"Processing {case} case")

    S = corr_dict[case]

    S_hier = pd.DataFrame(S, index=family_names, columns=family_names)
    S_hier = S_hier.iloc[ordered_indices, ordered_indices]

    fig = px.imshow(S_hier,
                color_continuous_scale='RdBu_r',
                labels={'color': 'Value'},
                title=f'Empirical correlation: p={p}, n={n} ({case})',
                color_continuous_midpoint=0)

    # Customize layout for better visuals (optional)
    fig.update_layout(
        width=850,
        height=850,
        xaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in S_hier.index.map(int)]),
        yaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in S_hier.columns.map(int)])
    )

    fig.show()

Processing smoker case


Processing non_smoker case


## SPIEC-EASI Solution

(corresponding code in R)

### Sparse part

In [205]:
se_Theta_smoker = pd.read_csv('data/AGP/theta_smoker.csv', sep=',', index_col=0)
se_Theta_nonsmoker = pd.read_csv('data/AGP/theta_non_smoker.csv', sep=',', index_col=0)

for case, se_Theta in zip(["smoker", "non_smoker"], [se_Theta_smoker, se_Theta_nonsmoker]):
    control_counts = subsampling_results[case]["control_counts"]
    se_Theta.index, se_Theta.columns = family_names, family_names
    se_Theta = se_Theta.iloc[ordered_indices, ordered_indices]

    SP = np.round(sparsity(se_Theta), 4)

    fig = px.imshow(se_Theta,
                    color_continuous_scale='RdBu_r',
                    labels={'color': 'Value'},
                    title=f'SPIEC-EASI: Theta, SP={SP} ({case})',
                    color_continuous_midpoint=0)

    fig.update_layout(
        width=850,
        height=850,
        xaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in se_Theta.index.map(int)]),
        yaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in se_Theta.columns.map(int)])
    )

    fig.show()


### Low-rank part

In [206]:
se_L_smoker = pd.read_csv('data/AGP/low_rank_smoker.csv', sep=',', index_col=0)
se_L_nonsmoker = pd.read_csv('data/AGP/low_rank_non_smoker.csv', sep=',', index_col=0)

for case, se_L in zip(["smoker", "non_smoker"], [se_L_smoker, se_L_nonsmoker]):
    rank_se_L = np.linalg.matrix_rank(se_L)
    print(f"Rank of the low-rank matrix from SPIEC-EASI ({case}): {rank_se_L}")

    se_L.index, se_L.columns = family_names, family_names
    se_L = se_L.iloc[ordered_indices, ordered_indices]

    fig = px.imshow(se_L,
                    color_continuous_scale='RdBu_r',
                    labels={'color': 'Value'},
                    title=f'SPIEC-EASI: L (rank={rank_se_L}) ({case})',
                    color_continuous_midpoint=0)

    fig.update_layout(
        width=850,
        height=850,
        xaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in se_L.index.map(int)]),
        yaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in se_L.columns.map(int)])
    )

    fig.show()

Rank of the low-rank matrix from SPIEC-EASI (smoker): 10


Rank of the low-rank matrix from SPIEC-EASI (non_smoker): 10


### Likelihood (Omega=S-L)

In [207]:
for case, se_Theta in zip(["smoker", "non_smoker"], [se_Theta_smoker, se_Theta_nonsmoker]):
    se_Theta.index, se_Theta.columns = family_names, family_names
    se_Theta = se_Theta.iloc[ordered_indices, ordered_indices]

    se_L = pd.read_csv(f'data/AGP/low_rank_{case}.csv', sep=',', index_col=0)
    se_L.index, se_L.columns = family_names, family_names
    se_L = se_L.iloc[ordered_indices, ordered_indices]

    se_Omega = se_Theta - se_L

    fig = px.imshow(se_Omega,
                    color_continuous_scale='RdBu_r',
                    labels={'color': 'Value'},
                    title=f'SPIEC-EASI: Omega ({case})',
                    color_continuous_midpoint=0)

    fig.update_layout(
        width=850,
        height=850,
        xaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in se_Omega.index.map(int)]),
        yaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in se_Omega.columns.map(int)])
    )

    fig.show()


## GGLasso solution

In [157]:
### lambda path from SPIEC-EASI
lambda1_range = [3.02721826, 2.37563971, 1.86430695, 1.46303347, 1.14813010, 0.90100654, 0.70707387, 0.55488327, 0.43545018, 0.34172387, 
                 0.26817121, 0.21045003, 0.16515276, 0.12960528, 0.10170903, 0.07981718, 0.06263733, 0.04915527, 0.03857509, 0.03027218]

rank_range = [2, 3, 4, 5, 6, 7, 8, 9, 10]
verbose = False
tol, rtol = 1e-5, 1e-5
beta = 0.05
mu1 = 1

low_rank_dict = {"smoker": dict(), "non_smoker": dict()}

for case in ["smoker", "non_smoker"]:
    print(f"Processing {case} case")

    control_counts = subsampling_results[case]["control_counts"]
    control_clr_counts = subsampling_results[case]["control_clr_counts"]
    subsamples = subsampling_results[case]["subsamples"]

    n_samples = control_clr_counts.shape[1]

    for r in rank_range:
        print(f"--------------     Rank: {r} --------------")

        sol_dict = dict()

        for lambda1 in lambda1_range:
            print(f"-------------- Lambda: {lambda1} --------------")

            sub_sol_dict = dict()

            ### Loop through each subsample
            for i in range(N):
                subsample_slice = subsamples[:, :, i]
                print(f"Subsample {i+1}:")

                S0 = np.cov(subsample_slice, bias=True)
                S = scale_array_by_diagonal(S0)

                if p != S.shape[0]:
                    raise Exception("Check covariance shape!")

                ### Solve the sparse problem
                sub_sol, _ = ADMM_single(S=S, lambda1=lambda1, mu1=mu1, r=r, Omega_0=np.eye(p), 
                                verbose=verbose, latent=True, tol=tol, rtol=rtol)

                G = sub_sol['Theta'].astype(bool).astype(int)

                sub_sol_dict[f"subsample_{i+1}"] = {"Theta": sub_sol['Theta'],
                                                    "L": sub_sol['L'],
                                                    "Omega": sub_sol['Omega'],
                                                    "X": sub_sol["X"], "S": S,
                                                    "adjacency_matrix": G}

            # Store subsample solutions in sol_dict[lambda1]
            sol_dict[lambda1] = {"sub_samples": sub_sol_dict}

            estimates = list()

            for i in range(N):
                ### adjacency matrix (like in the paper) or estimate (like in R)?
                estimate = sol_dict[lambda1]["sub_samples"][f'subsample_{i+1}']["adjacency_matrix"]

                ### psi_Lambda_st in the paper notation
                estimates.append(estimate)

            ### theta_b_st_hat in the paper notation
            edge_average = np.mean(estimates, axis=0)

            ### the variance of a Bernoulli distribution (ksi_b_st in the paper notation)
            edge_instability = 2 * edge_average * (1 - edge_average)

            ### D_b in the paper notation
            total_instability = 2 * np.sum(edge_instability) / (p * (p - 1))

            # Add instability measure to sol_dict[lambda1]
            sol_dict[lambda1]["D_b"] = total_instability

        # Collect all instabilities for current mu1
        instabilities = [sol_dict[lambda1]["D_b"] for lambda1 in sol_dict if "D_b" in sol_dict[lambda1]]

        # Find the index of the largest lambda where total_instability <= stars_thresh
        indices = np.where(np.array(instabilities) <= beta)[0]

        if len(indices) > 0:
            opt_index = np.max(indices)
        else:
            opt_index = indices[0]  # pick the only one

        lambda_star = lambda1_range[opt_index]

        print(f"Optimal lambda: {lambda_star}")

        ### Do the refit of the model on the exactly the same empirical covariance matrix as SPIEC-EASI
        S = corr_dict[case]
        # S0 = np.cov(control_clr_counts, bias=True)
        # S = scale_array_by_diagonal(S0)
        # if p != S.shape[0]:
        #     raise Exception("Check covariance shape!")
        

        sol, _ = ADMM_single(S=S, lambda1=lambda_star, mu1=mu1, r=r, Omega_0=np.eye(p),
                        verbose=verbose, latent=True, tol=tol, rtol=rtol)
        
        print(f"Rank of the low-rank matrix: {np.linalg.matrix_rank(sol['L'])}")


        ebic_result = ebic(S, sol['Theta'], N=n_samples, gamma=0.5)

        sol_dict["refit"] = {"Theta": sol['Theta'], "Omega": sol['Omega'], "X": sol['X'],
                            "L": sol["L"], "S": S, "lambda_star": lambda_star, "eBIC": ebic_result,
                            "adjacency_matrix": sol['Theta'].astype(bool).astype(int)}

        low_rank_dict[case][r] = sol_dict

Processing smoker case
--------------     Rank: 2 --------------
-------------- Lambda: 3.02721826 --------------
Subsample 1:
ADMM terminated after 25 iterations with status: optimal.
Subsample 2:
ADMM terminated after 25 iterations with status: optimal.
Subsample 3:
ADMM terminated after 26 iterations with status: optimal.
Subsample 4:
ADMM terminated after 26 iterations with status: optimal.
Subsample 5:
ADMM terminated after 26 iterations with status: optimal.
Subsample 6:
ADMM terminated after 27 iterations with status: optimal.
Subsample 7:
ADMM terminated after 27 iterations with status: optimal.
Subsample 8:
ADMM terminated after 26 iterations with status: optimal.
Subsample 9:
ADMM terminated after 25 iterations with status: optimal.
Subsample 10:
ADMM terminated after 26 iterations with status: optimal.
-------------- Lambda: 2.37563971 --------------
Subsample 1:
ADMM terminated after 23 iterations with status: optimal.
Subsample 2:
ADMM terminated after 23 iterations with s

### Empirical covariance

In [212]:
selected_rank = 10

for case in ["smoker", "non_smoker"]:
    gg_S = pd.DataFrame(low_rank_dict[case][selected_rank]["refit"]["S"], index=family_names, columns=family_names)
    gg_S = gg_S.iloc[ordered_indices, ordered_indices]

    fig = px.imshow(gg_S,
                    color_continuous_scale='RdBu_r',
                    labels={'color': 'Value'},
                    title=f'GGLasso: S_hat ({case})',
                    color_continuous_midpoint=0)

    fig.update_layout(
        width=850,
        height=850,
        xaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in gg_S.index.map(int)]),
        yaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in gg_S.columns.map(int)])
    )

    fig.show()


### Sparse part

In [213]:
selected_rank = 10

for case in ["smoker", "non_smoker"]:
    gg_Theta = pd.DataFrame(low_rank_dict[case][selected_rank]["refit"]["Theta"], index=family_names, columns=family_names)
    gg_Theta = gg_Theta.iloc[ordered_indices, ordered_indices]
    SP = np.round(sparsity(gg_Theta), 4)

    fig = px.imshow(gg_Theta,
                    color_continuous_scale='RdBu_r',
                    labels={'color': 'Value'},
                    title=f'GGLasso: Theta, SP={SP} ({case})',
                    zmin=-1, zmax=1,
                    color_continuous_midpoint=0)

    fig.update_layout(
        width=850,
        height=850,
        xaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in gg_Theta.index.map(int)]),
        yaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in gg_Theta.columns.map(int)])
    )

    fig.show()


### Low-rank part

In [214]:
selected_rank = 10

for case in ["smoker", "non_smoker"]:
    gg_L = pd.DataFrame(low_rank_dict[case][selected_rank]["refit"]["L"], index=family_names, columns=family_names)
    gg_L = gg_L.iloc[ordered_indices, ordered_indices]
    rank_gg_L = np.linalg.matrix_rank(gg_L)
    print(f"Rank of the low-rank matrix from GGLasso ({case}): {rank_gg_L}")

    fig = px.imshow(gg_L,
                    color_continuous_scale='RdBu_r',  # Adjust the color scale
                    labels={'color': 'Value'},          # Label for color bar
                    title=f'GGLasso: L (rank={rank_gg_L}) ({case})',
                    color_continuous_midpoint=0)

    # Customize layout for better visuals (optional)
    fig.update_layout(
        width=850,
        height=850,
        xaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in gg_L.index.map(int)]),
        yaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in gg_L.columns.map(int)])
    )

    fig.show()


Rank of the low-rank matrix from GGLasso (smoker): 10


Rank of the low-rank matrix from GGLasso (non_smoker): 10


### Likelihood

In [215]:
for case in ["smoker", "non_smoker"]:
    gg_Omega = pd.DataFrame(low_rank_dict[case][selected_rank]["refit"]["Omega"], index=family_names, columns=family_names)

    fig = px.imshow(gg_Omega,
                    color_continuous_scale='RdBu_r',  
                    labels={'color': 'Value'},
                    title=f'GGLasso: Omega ({case})',
                    zmin=-1, zmax=1,
                    color_continuous_midpoint=0)

    # Customize layout for better visuals (optional)
    fig.update_layout(
        width=850,
        height=850,
        xaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in gg_Omega.index.map(int)]),
        yaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in gg_Omega.columns.map(int)])
    )

    fig.show()

In [216]:
for case in ["smoker", "non_smoker"]:
    gg_S = pd.DataFrame(low_rank_dict[case][selected_rank]["refit"]["S"], index=family_names, columns=family_names)
    gg_Omega = pd.DataFrame(low_rank_dict[case][selected_rank]["refit"]["Omega"], index=family_names, columns=family_names)

    I = gg_Omega.dot(gg_S)

    fig = px.imshow(I,
                    color_continuous_scale='RdBu_r',  
                    labels={'color': 'Value'},
                    title=f'GGLasso: Omega x S_hat ({case})',
                    color_continuous_midpoint=0)

    # Customize layout for better visuals (optional)
    fig.update_layout(
        width=850,
        height=850,
        xaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in gg_Omega.index.map(int)]),
        yaxis=dict(tickmode='array', tickvals=list(range(len(names_dict))), ticktext=[names_dict[idx] for idx in gg_Omega.columns.map(int)])
    )

    fig.show()

In [175]:
for case in ["smoker", "non_smoker"]:
    ranks = list(low_rank_dict[case].keys())
    sparsities = [sparsity(low_rank_dict[case][rank]["refit"]["Theta"]) for rank in ranks]

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=ranks, y=sparsities, mode='lines+markers', name=f'Sparsity ({case})'))

    fig.update_layout(
        title=f'Sparsity vs Rank ({case})',
        xaxis_title='Rank',
        yaxis_title='Sparsity',
        template='plotly_white'
    )

    fig.show()

    for key in low_rank_dict[case].keys():
        print(f"Case: {case}, Rank: {key}")
        print(low_rank_dict[case][key]["refit"]["eBIC"])

Case: smoker, Rank: 2
9495.495216659821
Case: smoker, Rank: 3
9563.352478919613
Case: smoker, Rank: 4
9628.149991294586
Case: smoker, Rank: 5
9729.759572497198
Case: smoker, Rank: 6
9848.735316611022
Case: smoker, Rank: 7
10016.372557973547
Case: smoker, Rank: 8
10525.578827209194
Case: smoker, Rank: 9
10733.087098956574
Case: smoker, Rank: 10
10911.493729619573


Case: non_smoker, Rank: 2
9804.053772739013
Case: non_smoker, Rank: 3
9951.007001392712
Case: non_smoker, Rank: 4
10318.997218918852
Case: non_smoker, Rank: 5
10368.097968798967
Case: non_smoker, Rank: 6
11052.380656889622
Case: non_smoker, Rank: 7
11466.931314924661
Case: non_smoker, Rank: 8
11658.787989949113
Case: non_smoker, Rank: 9
12539.298517160623
Case: non_smoker, Rank: 10
12572.558322527371


### SVD of both solutions

In [176]:
for case in ["smoker", "non_smoker"]:
    se_L = pd.read_csv(f'data/AGP/low_rank_{case}.csv', sep=',', index_col=0)
    gg_L = pd.DataFrame(low_rank_dict[case][selected_rank]["refit"]["L"], index=family_names, columns=family_names)

    # Perform Singular Value Decomposition (SVD) on both matrices
    U_se, s_se, Vt_se = np.linalg.svd(se_L)
    U_gg, s_gg, Vt_gg = np.linalg.svd(gg_L)

    # Compare eigenvalues
    print(f"Eigenvalues of se_L ({case}):")
    print(s_se)
    print(f"\nEigenvalues of gg_L ({case}):")
    print(s_gg)
    print(f"\nDifference between SE and GG ({case}):")
    print(np.linalg.norm(se_L.values - gg_L.values))
    
    # Plot the spectrum of eigenvalues
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=list(range(len(s_se))), y=s_se, mode='lines+markers', name=f'SE Eigenvalues ({case})'))
    fig.add_trace(go.Scatter(x=list(range(len(s_gg))), y=s_gg, mode='lines+markers', name=f'GG Eigenvalues ({case})'))

    fig.update_layout(
        title=f'Eigenvalue Spectrum Comparison ({case})',
        xaxis_title='Eigenvalue Index',
        yaxis_title='Eigenvalue',
        template='plotly_white'
    )

    fig.show()

Eigenvalues of se_L (smoker):
[4.81950257e-01 3.17211505e-01 2.39490164e-01 2.17153912e-01
 1.40416947e-01 9.41347012e-02 7.24883264e-02 6.73928338e-02
 3.08306066e-02 5.36903359e-03 4.28057817e-16 3.82752158e-16
 3.57328332e-16 3.34684189e-16 3.26563259e-16 2.95731216e-16
 2.82656519e-16 2.80128464e-16 2.65834802e-16 2.57032684e-16
 2.28009502e-16 2.26526398e-16 2.08508392e-16 1.83219080e-16
 1.76174948e-16 1.66337058e-16 1.61784510e-16 1.49742126e-16
 1.26011131e-16 1.11057731e-16 1.06098174e-16 9.51511228e-17
 9.26487331e-17 8.72575925e-17 5.73626500e-17 5.40587300e-17
 3.95695689e-17 2.57510519e-17 2.32278107e-17 1.59413943e-17]

Eigenvalues of gg_L (smoker):
[2.09826932e+00 1.31029128e+00 1.05315455e+00 1.01085471e+00
 8.68014879e-01 7.24686964e-01 5.96101775e-01 4.97170752e-01
 2.12852647e-01 1.11316091e-01 1.54238415e-16 1.54238415e-16
 1.54238415e-16 1.54238415e-16 1.54238415e-16 1.54238415e-16
 1.54238415e-16 1.54238415e-16 1.54238415e-16 1.54238415e-16
 1.54238415e-16 1.54238

Eigenvalues of se_L (non_smoker):
[4.86107316e-01 2.58969619e-01 2.45004652e-01 1.29174767e-01
 8.11749284e-02 5.67155277e-02 5.34183753e-02 3.03558096e-02
 2.61841384e-02 8.02153982e-03 4.73086299e-16 4.31750028e-16
 4.25039652e-16 3.86996688e-16 3.56778755e-16 3.40659694e-16
 3.26065441e-16 3.08015914e-16 2.85426458e-16 2.78331056e-16
 2.59240545e-16 2.46870231e-16 2.38536530e-16 2.28439343e-16
 2.05338178e-16 1.80378509e-16 1.68985546e-16 1.48929517e-16
 1.24268233e-16 1.21063045e-16 1.11132283e-16 1.06250611e-16
 9.31854329e-17 8.77561739e-17 6.30947167e-17 5.94350977e-17
 4.57180903e-17 4.18045432e-17 2.03577125e-17 1.06546310e-17]

Eigenvalues of gg_L (non_smoker):
[2.64653789e+00 1.71736165e+00 1.36126432e+00 1.13475226e+00
 1.08423325e+00 9.12400545e-01 7.11934763e-01 3.62149972e-01
 2.34200845e-01 2.55305077e-02 1.90076194e-16 1.90076194e-16
 1.90076194e-16 1.90076194e-16 1.90076194e-16 1.90076194e-16
 1.90076194e-16 1.90076194e-16 1.90076194e-16 1.90076194e-16
 1.90076194e-16