# Visualize taxonomy and alpha/beta diversities

## Platform dependent part
- Resolve platform setup
- the difference to local imports should be resolved by setting the Blue Cloud VRE well, Colab will still be an issue.

In [None]:
import sys
import os
import io

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from utils import init_setup
init_setup()

## Imports

In [None]:
# This needs to be repeated here for the Pannel dashboard to work, WEIRD
# TODO: report as possible bug
import sys
import os
import io

# import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
import panel as pn

from skbio.diversity import beta_diversity
from skbio.stats.ordination import pcoa

# All low level functions are imported from the momics package
from momics.loader import load_parquets
import momics.plotting as pl
from momics.panel_utils import diversity_select_widgets, create_indicators
from momics.utils import memory_load

# Note: This is breaking the panel preview functionality
# %load_ext autoreload
# %autoreload 2

## Loading

In [6]:
@pn.cache()
def get_data(folder):
    return load_parquets(folder)

@pn.cache()
def get_metadata(folder):
    # Load metadata
    sample_metadata = pd.read_csv(
        os.path.join(folder, "Batch1and2_combined_logsheets_2024-09-11.csv")
    )

    observatory_metadata = pd.read_csv(
        os.path.join(folder, "Observatory_combined_logsheets_validated.csv")
    )

    # Merge metadata
    full_metadata = pd.merge(
        sample_metadata,
        observatory_metadata,
        on=["obs_id", "env_package"],  # Matching conditions
        how="inner"  # Inner join
    )

    # Sort the merged dataframe by 'ref_code' column in ascending order
    full_metadata = full_metadata.sort_values(by="ref_code", ascending=True)
    
    return full_metadata

In [7]:
# parquet files
if 'google.colab' in str(get_ipython()):
    root_folder = os.path.abspath(os.path.join('/content/momics-demos'))
else:
    root_folder = os.path.abspath(os.path.join('../'))


data_folder = os.path.join(root_folder, 'data/parquet_files')
assets_folder = os.path.join(root_folder, 'assets')


mgf_parquet_dfs = get_data(data_folder)

In [None]:
type(mgf_parquet_dfs)

In [9]:
# Load and merge metadata
full_metadata = get_metadata(data_folder)

# select categorical columns from metadata
categorical_columns = sorted(full_metadata.select_dtypes(include=['object']).columns)

# select numerical columns from metadata
numerical_columns = sorted(full_metadata.select_dtypes(include=['int64', 'float64']).columns)

assert len(full_metadata.columns) == len(numerical_columns) + len(categorical_columns)

# print(f"Data table names are:\n{mgf_parquet_dfs.keys()}")
# print(f"Categorical metadata columns are:\n{categorical_columns}")
# print(f"Numerical metadata columns are:\n{numerical_columns}")
# mgf_parquet_dfs['SSU'].sort_values(by='abundance', ascending=False)

In [None]:
df = mgf_parquet_dfs['SSU'].copy()
df.ref_code.nunique(), df.reads_name.nunique()

`ref_code` and `reads_name` are the same length so I will use the `ref_code`

## Development of the beta diversity part

In [11]:
# TODO: link these functions to the indicator
# TODO: put them in the momics package
def get_missing_taxa(df):
    for taxon in ["superkingdom", "kingdom", "phylum", "class", "order", "family", "genus", "species"]:
        print(f'Not classified on {taxon}: {get_missing_taxa_single(df, taxon)}')
    return

def get_missing_taxa_single(df, taxon):
    return len(df[df[taxon].isnull()])

## App setup

In [None]:
pn.extension("tabulator")
ACCENT = "teal"

styles = {
    "box-shadow": "rgba(50, 50, 93, 0.25) 0px 6px 12px -2px, rgba(0, 0, 0, 0.3) 0px 3px 7px -3px",
    "border-radius": "4px",
    "padding": "10px",
}

# TODO: there is a bug in the panel library that does not allow to open png files, renoming does not help 
image = pn.pane.JPG(os.path.join(assets_folder, "figs/metaGOflow_logo_italics.jpg"),
                    width=200, height=100)

(select_table, select_cat_factor, 
 select_table_beta, select_taxon,
 select_beta_factor,
 ) = diversity_select_widgets(categorical_columns, numerical_columns, styles)

indicators = create_indicators()
used_gb, total_gb = memory_load()
indicator_test = pn.FlexBox(
    pn.indicators.Number(
        value=used_gb, name="RAM usage [GB]",
        format="{value:,.1f}",
    ),
)

# TODO: learn the periodic callbacks
# indicators.append(indicator_test)
# cb = pn.state.add_periodic_callback(memory_load, 200, timeout=5000)
# toggle = pn.widgets.Toggle(name='Toggle callback', value=True)
# toggle.link(cb, bidirectional=True, value='running')

bplot_alpha = pn.bind(pl.alpha_plot,
                      table_list=mgf_parquet_dfs,
                      table_name=select_table,
                      factor=select_cat_factor,
                      metadata=full_metadata)

bplot_av_alpha = pn.bind(pl.av_alpha_plot,
                         table_list=mgf_parquet_dfs,
                         table_name=select_table,
                         factor=select_cat_factor,
                         metadata=full_metadata)


bplot_beta_heatmap = pn.bind(pl.beta_plot,
                             table_list=mgf_parquet_dfs,
                             table_name=select_table_beta,
                             taxon=select_taxon,
                             )

bplot_beta_pc = pn.bind(pl.beta_plot_pc,
                        table_list=mgf_parquet_dfs,
                        metadata=full_metadata,
                        table_name=select_table_beta,
                        taxon=select_taxon,
                        factor=select_beta_factor,
                        )

atable = pn.widgets.Tabulator(df, sizing_mode="stretch_both", name="Data View")

tabs = pn.Tabs(
    ('Alpha div.', bplot_alpha),
    ('Av Aplpha div.', bplot_av_alpha),
    ('Beta div.', bplot_beta_heatmap),
    ('PCoA', bplot_beta_pc),
    atable,
    styles=styles, sizing_mode="stretch_width", height=500, margin=10
)

template = pn.template.FastListTemplate(
    title="Diversity Analysis",
    sidebar=[image,
            #  toggle,
             "# Alpha diversity", select_table, select_cat_factor,
             pn.layout.Divider(),
             "# Beta diversity", select_table_beta, select_taxon, select_beta_factor,
             ],
    main=[pn.Column(indicators,
                    tabs,
                    sizing_mode="stretch_both",
                   )],
    main_layout=None,
    accent=ACCENT,
    meta_refresh="2",
)

template.servable()