In [None]:
import os
import pandas as pd # type: ignore
import numpy as np # type: ignore
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pickle

from statsmodels.stats.multitest import multipletests
from tqdm.notebook import tqdm
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri, FloatVector


# Convert pandas.DataFrames to R dataframes automatically.
pandas2ri.activate()

try:
    os.chdir('/container/mount/point')
except FileNotFoundError:
    print("Warning: Directory '/container/mount/point' does not exist.")


from utils.helper import r_to_pandas, check_samples_overlap, generate_taxa_dict, calculate_obs_stat_and_pvalues
from utils.helper import perform_bh_correction_and_filter

In [2]:
def calculate_unadjusted_p_values(test_stats, obs_stat, test_type):
    """
    Calculate unadjusted p-values for observed test statistics using permutation test results.

    Parameters
    ----------
    test_stats : pd.DataFrame
        DataFrame of test statistics from permutations (rows: taxa/features, columns: permutations).
    obs_stat : pd.Series or pd.DataFrame
        Observed test statistics (index: taxa/features).
    test_type : str
        Type of test to perform. Currently supports "two-sided".

    Returns
    -------
    p_value : pd.DataFrame
        DataFrame of unadjusted p-values (index: taxa/features, column: "p-value").
    """
    if test_type == "two-sided":
        # two-sided test with absolute value tests both directions
        # calculate the proportion of |T_rand| >= |T_obs|
        p_value = test_stats.abs().ge(obs_stat.abs().values, axis="rows")
        p_value.index = obs_stat.index
        p_value = p_value.mean(axis="columns").sort_values().to_frame()
        p_value.columns = ["p-value"]

    return p_value

In [97]:
def min_unadjusted_p_values(test_stats):
    """
    Calculate the minimum unadjusted p-value for each permutation by treating each column as the observed statistic.

    For each permutation (column), treats its values as the observed statistics,
    compares against all other permutations, and computes the minimum p-value across all taxa/features.

    Parameters
    ----------
    test_stats : pd.DataFrame
        DataFrame of test statistics from permutations (rows: taxa/features, columns: permutations).

    Returns
    -------
    min_p_values : np.ndarray
        Array of minimum unadjusted p-values for each permutation.
    """
    min_p_values = []

    for i in test_stats.columns:
        # Choose random statistic as "observed" statistic
        obs_stat = test_stats.loc[:, i]

        # Select random statistics except the observed one
        t = test_stats.loc[:, test_stats.columns != i]

        # Select statistics greater or equal to the observed one
        t_comp = t.abs().ge(obs_stat.abs().values, axis="rows")

        # Calculate unadjusted p-values
        t_comp = t_comp.mean(axis="columns")

        # Save min p-value among all species
        min_p_values.append(min(t_comp))
        
    min_p_values = np.array(min_p_values)
    
    return min_p_values

In [107]:
def adjusted_p_values(min_p_values, p_value):
    """
    Calculate adjusted p-values using the distribution of minimum unadjusted p-values.

    For each observed p-value, computes the proportion of min_p_values less than or equal to it.

    Parameters
    ----------
    min_p_values : np.ndarray or list
        Array or list of minimum unadjusted p-values from permutations.
    p_value : pd.DataFrame
        DataFrame of observed unadjusted p-values (column: 'p-value').

    Returns
    -------
    adj_p_values : list
        List of adjusted p-values for each observed statistic.
    """
    adj_p_values = []
    p = p_value.shape[0]

    for i in range(p):
        adj = np.mean(min_p_values <= p_value[i])
        adj_p_values.append(adj)
    
    return adj_p_values

In [5]:
def perform_bh_correction_and_filter(stats, alpha=0.05):
    """
    Perform Benjamini-Hochberg correction on p-values and filter significant features.

    Parameters:
    - taxa (DataFrame): DataFrame containing taxonomic information.
    - stats (DataFrame): DataFrame containing statistical results with 'p_value' column.
    - alpha (float, optional): Threshold for significance. Default is 0.05.

    Returns:
    - DataFrame: Subset of the input DataFrame containing features with adjusted p-values <= alpha.
    """
    # Perform Benjamini-Hochberg correction
    adjusted_pvalues = sm.stats.multipletests(stats['p_value'], method='fdr_bh')[1]

    # Update the DataFrame with adjusted p-values
    stats['adjusted_pvalue'] = adjusted_pvalues
    stats['effect'] = np.sign(stats['obs_stat'])

    # Display the result
    da_species = stats[stats['adjusted_pvalue'] <= alpha]

    return da_species

In [122]:
def plot_histogram(data, bins, xlabel, ylabel, title):
    """
    Plot a histogram of the given data and return the matplotlib figure object.

    Returns
    -------
    fig : matplotlib.figure.Figure
        The matplotlib figure object.
    """
    fig = plt.figure(figsize=(8, 5))
    plt.hist(data, bins=bins, color='skyblue', edgecolor='black')
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.tight_layout()

    return fig

def plot_volcano(df, level, method):
    """
    Plot a volcano plot for differential abundance results.

    Returns
    -------
    fig : plotly.graph_objs._figure.Figure
        The plotly figure object.
    """
    cat_order = ["FDR<0.05", "0.05≤FDR<0.10", "0.10≤FDR<0.20", "NS"]
    color_map = {
        "FDR<0.05": "red",
        "0.05≤FDR<0.10": "orange",
        "0.10≤FDR<0.20": "gold",
        "NS": "lightgrey",
    }
    fig = px.scatter(
        df,
        x="T_obs",
        y="-log10_p",
        color="Significance",
        category_orders={"Significance": cat_order},
        color_discrete_map=color_map,
        hover_data=[level, "p_perm", method],
        title=f"{method}: Volcano plot of differential abundance results ({level})",
        labels={"T_obs": "Test statistic (T_obs)", "-log10_p": "-log10(p_perm)"},
    )
    for p_thr, clr in [(0.05, "blue"), (0.10, "purple"), (0.20, "green")]:
        fig.add_hline(
            y=-np.log10(p_thr),
            line_dash="dash",
            line_color=clr,
            annotation_text=f"p = {p_thr}",
            annotation_position="top left",
        )
    fig.update_traces(marker=dict(size=8, opacity=0.75))
    fig.update_layout(legend_title="Significance (by q-value)", width=900, height=640)

    return fig


In [6]:
utils = importr('utils')
devtools = importr('devtools')
linda = importr("LinDA")

### KORA Dataset

In [7]:
# Load your  matched sample dataframe and ASV table
kora_matched_df = pd.read_csv("data/smoking_KORA_experiment.csv", index_col=0)
asv = pd.read_csv("data/filtered_count_table.csv", index_col=0)
simulated_outcomes = pd.read_csv("data/simulated_KORA_outcomes.csv", index_col=0)
taxa = pd.read_csv('data/taxonomy_clean.csv', index_col=0)
print(f"ASV table shape (features, samples): {asv.shape}")

# Sort ASV columns to match sample order in kora_matched_df
sample_order = list(kora_matched_df.index.astype(str))
ASV_table = asv.reindex(sample_order, axis=1)

taxa_dict = {}
for level in ['domain', 'phylum', 'class', 'order', 'family', 'genus', 'species']:
    df_level = ASV_table.join(taxa[level])
    df_level = df_level.groupby(level).sum()
    taxa_dict[level] = df_level
taxa_dict["ASVs"] = ASV_table

for level in taxa_dict.keys():
    print(f"{level} count table shape: {taxa_dict[level].shape}")

with open("data/taxa_dict.pkl", "wb") as f:
    pickle.dump(taxa_dict, f)

ASV table shape (features, samples): (1469, 436)
domain count table shape: (2, 436)
phylum count table shape: (9, 436)
class count table shape: (15, 436)
order count table shape: (41, 436)
family count table shape: (80, 436)
genus count table shape: (401, 436)
species count table shape: (1354, 436)
ASVs count table shape: (1469, 436)


In [8]:
# Overlap 16S and samples
sample_ids = ASV_table.columns.astype(str)
matched_samples = kora_matched_df[kora_matched_df.index.astype(str).isin(sample_ids)]

#  true outcome vector
w = pd.DataFrame(matched_samples["W"].values, index=matched_samples.index, columns=["w"])

# Now you can run your downstream analysis, e.g. LinDA
linda_stats = calculate_obs_stat_and_pvalues(taxa_dict, w)
print("------------------------- LinDA is DONE ------------------------- \n")

domain
2 436
(436, 1)
phylum
9 436
(436, 1)
class
15 436
(436, 1)
order
41 436
(436, 1)
Pseudo-count approach is used.
family
80 436
(436, 1)
Pseudo-count approach is used.
genus
401 436
(436, 1)
Pseudo-count approach is used.
species
1354 436
(436, 1)
Pseudo-count approach is used.
ASVs
1469 436
(436, 1)
Pseudo-count approach is used.
------------------------- LinDA is DONE ------------------------- 



In [None]:
# --- Simulated outcomes ---
n_iter = 1000
alpha = 0.05
target_variable = "smoking_bin" 

for level, data in taxa_dict.items():
    p, N = data.shape
    if p < 20:
        continue

    print(f"Linda test for {level} level")

    test_stats = []
    pvalue_list = []
    W_frame = simulated_outcomes.iloc[:, -n_iter:]

    for i in tqdm(range(n_iter)):
        w_tmp = W_frame.iloc[:, i].to_frame(name="w")
        w_tmp.index = w_tmp.index.astype(str)
        w_tmp = w_tmp[w_tmp.index.isin(data.columns)]

        # apply linda
        lo_tmp = linda.linda(data, w_tmp, formula="~w", alpha=alpha, prev_cut=0, lib_cut=1)
        linda_out_tmp = r_to_pandas(lo_tmp.rx2("output"))
        out = linda_out_tmp['w']

        test_stats.append(out['stat'].values)
        pvalue_list.append(out['pvalue'].values)

    # Save results
    test_stats_df = pd.DataFrame(test_stats).T
    test_stats_df.to_csv(f'data/linda_{level}_{n_iter}_{target_variable}.csv')

    pvalue_df = pd.DataFrame(pvalue_list).T
    pvalue_df.to_csv(f'data/linda_pvalues_{level}_{n_iter}_{target_variable}.csv')

Linda test for order level


  0%|          | 0/1000 [00:00<?, ?it/s]

Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Imputation approac

  0%|          | 0/1000 [00:00<?, ?it/s]

Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Imputation approac

  0%|          | 0/1000 [00:00<?, ?it/s]

Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Imputation approac

  0%|          | 0/1000 [00:00<?, ?it/s]

Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Imputation approac

  0%|          | 0/1000 [00:00<?, ?it/s]

Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Imputation approach is used.
Pseudo-count approach is used.
Imputation approach is used.
Imputation approac

In [125]:
def plot_volcano(df, level, method):
    """
    Plot a volcano plot for differential abundance results.

    Returns
    -------
    fig : plotly.graph_objs._figure.Figure
        The plotly figure object.
    """
    cat_order = ["FDR<0.05", "0.05≤FDR<0.10", "0.10≤FDR<0.20", "NS"]
    color_map = {
        "FDR<0.05": "red",
        "0.05≤FDR<0.10": "orange",
        "0.10≤FDR<0.20": "gold",
        "NS": "lightgrey",
    }
    # Set marker symbol based on method
    symbol = "circle" if method == "q_perm_BH" else "square"

    fig = px.scatter(
        df,
        x="T_obs",
        y="-log10_p",
        color="Significance",
        category_orders={"Significance": cat_order},
        color_discrete_map=color_map,
        hover_data=[level, "p_perm", method],
        title=f"{method}: Volcano plot of differential abundance results ({level})",
        labels={"T_obs": "Test statistic (T_obs)", "-log10_p": "-log10(p_perm)"},
        symbol_sequence=[symbol]
    )
    for p_thr, clr in [(0.05, "blue"), (0.10, "purple"), (0.20, "green")]:
        fig.add_hline(
            y=-np.log10(p_thr),
            line_dash="dash",
            line_color=clr,
            annotation_text=f"p = {p_thr}",
            annotation_position="top left",
        )
    fig.update_traces(marker=dict(size=8, opacity=0.75))
    fig.update_layout(legend_title="Significance (by q-value)", width=900, height=640)

    return fig

levels = ["order", "family", "genus", "species", "ASVs"]
n_iter = 1000
target_variable = "smoking_bin"

for level in levels:
    true_pvalue = linda_stats[level]['p_value']
    hist_fig_pvalue = plot_histogram(true_pvalue.values, bins=20, xlabel='p-value', ylabel='Frequency', title=f'Histogram of true p-values {level}')
    hist_fig_pvalue.savefig(f"results/histogram_true_{level}_{n_iter}_{target_variable}.png")
    plt.close(hist_fig_pvalue)

    stats_df = pd.read_csv(f"data/linda_{level}_{n_iter}_{target_variable}.csv", index_col=0)
    T_obs = linda_stats[level]['obs_stat'].values
    T_perm = stats_df.to_numpy()
    B = T_perm.shape[1]

    abs_obs = np.abs(T_obs)[:, None]
    abs_perm = np.abs(T_perm)
    perm_p = (1 + (abs_perm >= abs_obs).sum(axis=1)) / (1 + B)

    hist_fig_permuted = plot_histogram(perm_p, bins=20, xlabel='p-value', ylabel='Frequency', title=f'Histogram of permuted p-values {level}')
    hist_fig_permuted.savefig(f"results/histogram_permuted_{level}_{n_iter}_{target_variable}.png")
    plt.close(hist_fig_permuted)

    rej_bh, qvals_perm, _, _ = multipletests(perm_p, alpha=0.05, method='fdr_bh')
    ### Lee et al. method
    min_p_values = min_unadjusted_p_values(stats_df)
    adj_p_values = adjusted_p_values(min_p_values, true_pvalue)

    out = pd.DataFrame({
        "T_obs": T_obs,
        "p_true": true_pvalue.values,
        "p_perm": perm_p,
        "q_perm_BH": qvals_perm,
        "q_perm_Lee": adj_p_values
    }, index=true_pvalue.index)
    out.to_csv(f"results/permutation_null_{level}_{n_iter}_{target_variable}.csv")

    out_sorted = out.sort_values("q_perm_Lee")
    df = out_sorted.copy().reset_index().rename(columns={"index": level})
    df["-log10_p"] = -np.log10(df["p_perm"])

    # Precompute significance categories for both methods to avoid repeated code
    significance_methods = {
        "q_perm_BH": df["q_perm_BH"].values,
        "q_perm_Lee": df["q_perm_Lee"].values
    }
    for method, q in significance_methods.items():
        sig_count = (q < 0.05).sum()
        print(f"{level} level: {sig_count} significant taxa at FDR<0.05 by {method} method")

        conds = [
            q < 0.05,
            (q >= 0.05) & (q < 0.10),
            (q >= 0.10) & (q < 0.20),
        ]
        choices = ["FDR<0.05", "0.05≤FDR<0.10", "0.10≤FDR<0.20"]
        df["Significance"] = np.select(conds, choices, default="NS").astype(str)
        df["Significance"] = df["Significance"].fillna("NS").astype(str)

        fig = plot_volcano(df, level, method)
        fig.show()
        fig.write_image(f"plots/png/DA_{level}_{method}_KORA_smoking.png")
        fig.write_image(f"plots/svg/DA_{level}_{method}_KORA_smoking.svg")


order level: 3 significant taxa at FDR<0.05 by q_perm_BH method


order level: 1 significant taxa at FDR<0.05 by q_perm_Lee method


family level: 0 significant taxa at FDR<0.05 by q_perm_BH method


family level: 0 significant taxa at FDR<0.05 by q_perm_Lee method


genus level: 0 significant taxa at FDR<0.05 by q_perm_BH method


genus level: 0 significant taxa at FDR<0.05 by q_perm_Lee method


species level: 0 significant taxa at FDR<0.05 by q_perm_BH method


species level: 0 significant taxa at FDR<0.05 by q_perm_Lee method


ASVs level: 0 significant taxa at FDR<0.05 by q_perm_BH method


ASVs level: 0 significant taxa at FDR<0.05 by q_perm_Lee method


In [184]:
all_taxa = []

for level in levels:
    # Load results for this level
    out = pd.read_csv(f"results/permutation_null_{level}_{n_iter}_{target_variable}.csv", index_col=0)
    # Prepare a dataframe with taxa name and adjusted p-values
    df_level = pd.DataFrame({
        "taxa": out.index,
        f"{level}_Lee": out["q_perm_Lee"].values,
        f"{level}_BH": out["q_perm_BH"].values
    })

    all_taxa.append(df_level)

# Concatenate all levels
taxa_df = pd.concat(all_taxa, ignore_index=True)

# # If you want a wide format with taxa as rows and columns for each method/level:
taxa_df_wide = taxa_df.melt(id_vars="taxa", var_name="method_level", value_name="adj_p_value")
taxa_df_wide = taxa_df_wide.pivot_table(index="taxa", columns="method_level", values="adj_p_value")

filtered = taxa_df_wide[(taxa_df_wide < 0.2).any(axis=1)]

filtered.to_csv("data/filtered_taxa_smoking.csv")

matched_names = [
    next((name for name in taxa["name"].values if value in name), None)
    for value in filtered.index
]

filtered.loc[:, "matched_taxa_name"] = matched_names

# Join with suffix to avoid column overlap
filtered = filtered.join(taxa['name'].rename('taxa_name'), how="left")

# Combine columns for full name
filtered.loc[:, 'full_name'] = filtered['matched_taxa_name'].combine_first(filtered['taxa_name'])

filtered = filtered.reset_index(drop=True)
filtered = filtered.set_index('full_name')
filtered = filtered.iloc[:, :-3]

filtered.to_csv("data/filtered_with_matched_taxa_names_smoking.csv")

filtered



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,ASVs_BH,ASVs_Lee,family_BH,family_Lee,genus_BH,genus_Lee,order_BH,order_Lee,species_BH
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
d__Bacteria; p__Bacteroidota; c__Bacteroidia; o__Bacteroidales; f__Tannerellaceae; g__Parabacteroides;s__unknown_905;,0.183442,0.981,,,,,,,
d__Bacteria; p__Firmicutes; c__Clostridia; o__Lachnospirales; f__Lachnospiraceae; g__Lachnospiraceae_NK4A136_group;s__unknown_45;,0.183442,0.891,,,,,,,
d__Bacteria; p__Firmicutes; c__Negativicutes; o__Veillonellales-Selenomonadales; f__Veillonellaceae; g__Megasphaera;s__unknown_549;,0.183442,1.000,,,,,,,
d__Bacteria; p__Bacteroidota; c__Bacteroidia; o__Bacteroidales; f__Tannerellaceae; g__Parabacteroides;s__unknown_2473;,0.183442,1.000,,,,,,,
d__Bacteria; p__Firmicutes; c__Clostridia; o__Oscillospirales; f__Oscillospiraceae; g__UCG-002;s__unknown_75;,0.183442,0.891,,,,,,,
...,...,...,...,...,...,...,...,...,...
d__Bacteria; p__Cyanobacteria; c__Vampirivibrionia; o__Gastranaerophilales; f__Gastranaerophilales; g__Gastranaerophilales;s__unknown_564;,,,,,,,,,0.169081
d__Bacteria; p__Firmicutes; c__Clostridia; o__Oscillospirales; f__Oscillospiraceae; g__UCG-002;s__unknown_75;,,,,,,,,,0.169081
d__Bacteria; p__Firmicutes; c__Clostridia; o__Oscillospirales; f__Oscillospiraceae; g__UCG-002;s__unknown_76;,,,,,,,,,0.169081
d__Bacteria; p__Firmicutes; c__Clostridia; o__Oscillospirales; f__Oscillospiraceae; g__Oscillospira;s__unknown_853;,,,,,,,,,0.169081
