# Visualize taxonomy and alpha/beta diversities

## Platform dependent part
- Resolve platform setup
- the difference to local imports should be resolved by setting the Blue Cloud VRE well, Colab will still be an issue.

In [None]:
import sys
import os
import logging
from IPython import get_ipython
logger = logging.getLogger(name="Diversity analysis app")

if 'google.colab' in str(get_ipython()):
    print('Setting Google colab, you will need a ngrok account to make the dashboard display over the tunnel. \
    https://ngrok.com/')
    # clone the momics-demos repository to use it to load data
    try:
        os.system('git clone https://github.com/palec87/momics-demos.git')
        logger.info(f"Repository cloned")
    except OSError as e:
        logger.info(f"An error occurred while cloning the repository: {e}")

    sys.path.insert(0,'/content/momics-demos')

    # this step takes time beacause of many dependencies
    os.system('pip install marine-omics')


from momics.utils import (
    memory_load, reconfig_logger,
    init_setup, get_notebook_environment,
)

# Set up logging
reconfig_logger()

# Determine the notebook environment
env = get_notebook_environment()

init_setup()
logger.info(f"Environment: {env}")

## Imports

In [None]:
# This needs to be repeated here for the Pannel dashboard to work, WEIRD
# TODO: report as possible bug
import warnings
import holoviews as hv
from skbio.stats.ordination import pcoa

from functools import partial
warnings.filterwarnings('ignore')

import pandas as pd
import panel as pn
from dotenv import load_dotenv
load_dotenv()

import momics.plotting as pl
from momics.panel_utils import (
    diversity_select_widgets, create_indicators_diversity,
    serve_app, close_server,
)
from momics.diversity import (
    beta_diversity_parametrized,
)
from momics.utils import load_and_clean, taxonomy_common_preprocess01


## Settings

In [None]:
DEBUG = True  # enable stdout logging

## Loading

In [None]:
# parquet files
if 'google.colab' in str(get_ipython()):
    root_folder = os.path.abspath(os.path.join('/content/momics-demos'))
else:
    root_folder = os.path.abspath(os.path.join('../'))

assets_folder = os.path.join(root_folder, 'assets')

In [None]:
def get_valid_samples():
    df_valid = pd.read_csv(
        os.path.join(root_folder, 'data/shipment_b1b2_181.csv')
    )
    return df_valid

valid_samples = get_valid_samples()

In [None]:
# High level function from the momics.utils module
full_metadata, mgf_parquet_dfs = load_and_clean(valid_samples=valid_samples)

In [None]:
# select categorical columns from metadata
categorical_columns = sorted(full_metadata.select_dtypes(include=['object', "boolean"]).columns)

# select numerical columns from metadata
numerical_columns = sorted(full_metadata.select_dtypes(include=['int64', 'float64']).columns)

if DEBUG:
    logger.info(f"Data table names are:\n{mgf_parquet_dfs.keys()}")
    logger.info(f"Categorical metadata columns are:\n{categorical_columns}")
    logger.info(f"Numerical metadata columns are:\n{numerical_columns}")

In [None]:
tables = {
    "lsu": mgf_parquet_dfs['lsu'].copy(),
    "ssu": mgf_parquet_dfs['ssu'].copy(),
}

TAXONOMY = pd.DataFrame()
TAXONOMY_RANKS = ['superkingdom', 'kingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']

## APP setup

In [None]:
pn.extension("tabulator")
hv.extension("bokeh", "plotly")
if 'google.colab' in str(get_ipython()):
    pn.extension(comms='colab')
ACCENT = "teal"

styles = {
    "box-shadow": "rgba(50, 50, 93, 0.25) 0px 6px 12px -2px, rgba(0, 0, 0, 0.3) 0px 3px 7px -3px",
    "border-radius": "4px",
    "padding": "10px",
}

# TODO: there is a bug in the panel library that does not allow to open png files, renoming does not help 
image = pn.pane.JPG(os.path.join(assets_folder, "figs/metaGOflow_logo_italics.jpg"),
                    width=200,
                    height=100,
                    )

(select_table, select_cat_factor, 
 select_table_beta, select_taxon,
 select_beta_factor, beta_norm,
 ) = diversity_select_widgets(categorical_columns, numerical_columns)
select_beta_factor.value = 'season'

# redefine select_table_beta
select_table_beta = pn.widgets.Select(
    name='Select table for beta diversity',
    options=list(tables.keys()),
    value='ssu',
)

# to sort alpha diversity plot
sort_alpha = pn.widgets.RadioBoxGroup(
    name='Sort by',
    options=['factor', 'values'],
    inline=True,
)
sort_alpha.value = 'factor'

backend = pn.widgets.RadioBoxGroup(
    name='Backend',
    options=['matplotlib', 'hvplot'],
    inline=True,
)
backend.value = 'hvplot'

progress_bar, indicator_usage = create_indicators_diversity()

def update_used_gb(event):
    if not event:
        return

    used_gb, total_gb = memory_load()
    progress_bar.value = int(used_gb / total_gb * 100)
    indicator_usage.value = used_gb

## Alpha diversity tab

In [None]:
bplot_alpha = pn.bind(
    pl.alpha_plot,
    tables_dict=mgf_parquet_dfs,
    table_name=select_table,
    factor=select_cat_factor,
    metadata=full_metadata,
    order=sort_alpha,
    backend=backend,
)

bplot_av_alpha = pn.bind(
    pl.av_alpha_plot,
    tables_dict=mgf_parquet_dfs,
    table_name=select_table,
    factor=select_cat_factor,
    metadata=full_metadata,
    order=sort_alpha,
    backend=backend,
)

bplot_beta_heatmap = pn.bind(
    pl.beta_plot,
    tables_dict=tables,
    table_name=select_table_beta,
    norm=beta_norm,
    taxon=select_taxon,
    backend=backend,
)

## PCoA tab

In [None]:
def update_beta_pc_plot(tables_dict, metadata, table_name, taxon, factor):
    if TAXONOMY.empty:
        beta_pc_plot.object, explained_var = pl.beta_plot_pc(
            tables_dict=tables_dict,
            metadata=metadata,
            table_name=table_name,
            factor=factor,
            taxon=taxon,
        )
    else:
        beta = beta_diversity_parametrized(
            TAXONOMY, taxon=taxon, metric="braycurtis"
        )
        pcoa_result = pcoa(beta, method="eigh")  # , number_of_dimensions=3)
        explained_variance = (
            pcoa_result.proportion_explained[0],
            pcoa_result.proportion_explained[1]
        )

        if not set(pcoa_result.samples.index) == set(metadata.index):
            raise ValueError("Metadata index name does not match PCoA result.")

        pcoa_df = pd.merge(
            pcoa_result.samples,
            metadata,
            left_index=True,
            right_index=True,
            how="inner",
        )
        beta_pc_plot.object, explained_var = pl.hvplot_plot_pcoa_black(pcoa_df, color_by=factor, explained_variance=explained_variance), explained_variance

    explained_var_indicator.value = sum(explained_var) * 100  # convert to percentage


beta_pc_plot = pn.pane.HoloViews(
    name="Beta PCoA",
    sizing_mode="stretch_both",
    width=800,
    height=600,
)

pn.bind(update_beta_pc_plot,
    tables_dict=tables,
    metadata=full_metadata,
    table_name=select_table_beta,
    taxon=select_taxon,
    factor=select_beta_factor,
    watch=True,
)

# show indicator of the explained variance
explained_var_indicator = pn.indicators.Number(
    name='Explained variance by PC1 + PC2', value=0, format='{value:.1f}%',
    font_size='20pt',
    title_size='12pt',
    colors=[(33, 'red'), (50, 'gold'), (66, 'green')]
)

mapping = pn.widgets.Checkbox(
    name="strict mapping to selected taxonomic level (takes time)",
    value=True,
)

low_prevalence_cutoff = pn.widgets.FloatInput(
    name='Low prevalence cutoff [%]',
    value=10, step=1, start=0, end=100,
    description="Percentage of samples in which the taxon must be present not to be removed.",
)

button_process_taxonomy = pn.widgets.Button(
    name="Process taxonomy",
    button_type="primary",
    description="This will process the taxonomy and update the plots.",
    width=200,
)

taxonomy_process_status = pn.pane.Markdown(
    """No processed taxonomy yet.""",
    hard_line_break=True,
)

## Pre-process taxonomy

In [None]:
def process_taxonomy(table, high_taxon, mapping, prevalence_cutoff_value):
    """
    Preprocess the taxonomy data.
    """
    global TAXONOMY
    TAXONOMY = pd.DataFrame()
    df_filt = tables[table]

    TAXONOMY = taxonomy_common_preprocess01(df_filt, high_taxon, mapping, prevalence_cutoff_value, TAXONOMY_RANKS)

    taxonomy_process_status.object = f"""
        Processed taxonomy for table: {table} with high taxon: {high_taxon} (strict mapping: {mapping})
        and low prevalence cutoff: {prevalence_cutoff_value}% of abundance.
        Number of taxa after processing: {TAXONOMY.shape[0]}.
        """

    update_beta_pc_plot(
        tables_dict=tables,
        metadata=full_metadata,
        table_name=select_table_beta.value,
        taxon=select_taxon.value,
        factor=select_beta_factor.value,
    )

In [None]:
button_process_taxonomy.on_click(
    lambda event: process_taxonomy(
        select_table_beta.value,
        select_taxon.value,
        mapping.value,
        low_prevalence_cutoff.value
    )
)

## table view

In [None]:
atable = pn.widgets.Tabulator(
    tables['ssu'],
    name="Data View",
    height=600,  # or any value you prefer
    sizing_mode="stretch_width",
)

## Tabs

In [None]:
average_alpha_tab = pn.Column(
    bplot_av_alpha,
    height=600,
    scroll=True,
)

# assemble tab with the matrix and checkbox
heatmap_tab = pn.Column(
    beta_norm,
    bplot_beta_heatmap,
    height=600,
    scroll=True,
)

pcoa_tab = pn.Column(
    explained_var_indicator,
    beta_pc_plot,
    height=600,
    scroll=True,
)

tabs = pn.Tabs(
    ('Alpha div.', bplot_alpha),
    ('Av Alpha div.', average_alpha_tab),
    ('Beta div.', heatmap_tab),
    ('PCoA', pcoa_tab),
    atable,
    styles=styles,
    margin=10,
)

## APP

In [None]:
def app():
    cb = pn.state.add_periodic_callback(
        partial(update_used_gb, indicator_usage),
        period=1000,
        timeout=None,
        )
    cb2 = pn.state.add_periodic_callback(
        partial(update_used_gb, progress_bar),
        period=1000,
        timeout=None,
        )
    toggle = pn.widgets.Toggle(name='Toggle callback', value=True)
    toggle.link(cb, bidirectional=True, value='running')
    toggle.link(cb2, bidirectional=True, value='running')

    indicators = pn.FlexBox(
        progress_bar, indicator_usage, toggle)

    template = pn.template.FastListTemplate(
        title="Diversity Analysis",
        sidebar=[
            image,
            "# Alpha diversity", select_table, select_cat_factor, sort_alpha, backend,
            pn.layout.Divider(),
            "# Beta diversity", select_table_beta, select_taxon, select_beta_factor,
            pn.layout.Divider(),
            mapping,
            low_prevalence_cutoff,
            button_process_taxonomy,
            taxonomy_process_status,
        ],
        main=[pn.Column(
                indicators,
                tabs,
                ),
        ],
        main_layout=None,
        accent=ACCENT,
    )
    return template

template = app()

# stupid trick to trigger updata()
select_beta_factor.value = select_beta_factor.options[1]
select_beta_factor.value = select_beta_factor.options[0]

if 'google.colab' in str(get_ipython()):  
    s = serve_app(template, env=env, name="diversity_analysis")
else:
    template.servable()

### Uncomment this if running ngrok tunnel which you want to quit

In [None]:
# only use for the ngrok tunnel in GColab
# close_server(s, env=env)