# Panel dashboard to query and display MGnify data

## Platform dependent part
- Resolve platform setup
- the difference to local imports should be resolved by setting the Blue Cloud VRE well, Colab will still be an issue.

In [None]:
import sys
import os
import logging
from IPython import get_ipython
logger = logging.getLogger(name="MGnify API")

if 'google.colab' in str(get_ipython()):
    print('Setting Google colab, you will need a ngrok account to make the dashboard display over the tunnel. \
    https://ngrok.com/')
    # clone the momics-demos repository to use it to load data
    try:
        os.system('git clone https://github.com/palec87/momics-demos.git')
        logger.info(f"Repository cloned")
    except OSError as e:
        logger.info(f"An error occurred while cloning the repository: {e}")

    sys.path.insert(0,'/content/momics-demos')

    # this step takes time beacause of many dependencies
    os.system('pip install marine-omics')

from momics.utils import (
    memory_load, reconfig_logger,
    init_setup, get_notebook_environment,
    load_and_clean, taxonomy_common_preprocess01,
)

# Set up logging
reconfig_logger()

# Determine the notebook environment
env = get_notebook_environment()

init_setup()
logger.info(f"Environment: {env}")

## Imports

In [2]:
# This needs to be repeated here for the Pannel dashboard to work, WEIRD
# TODO: report as possible bug
import sys
import os
import io
import warnings
warnings.filterwarnings('ignore')

from jsonapi_client import Session as APISession
from jsonapi_client import Modifier
import requests

# Dataframes and display
import panel as pn
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# Data transformation
from functools import reduce

# Plots
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
# %matplotlib inline 

import momics.plotting as pl

In [3]:
# parquet files
if 'google.colab' in str(get_ipython()):
    root_folder = os.path.abspath(os.path.join('/content/momics-demos'))
else:
    root_folder = os.path.abspath(os.path.join('../'))


data_folder = os.path.join(root_folder, 'wf5_MGnify/data/parquet_files')
assets_folder = os.path.join(root_folder, 'assets')

## Methods

In [4]:
def mgn_split_taxonomy(df):
    features = ['domain', 'phylum', 'class', 'order', 'family', 'genus', 'species']
    # all_genomes_df = atable.value

    # Split the 'attributes.taxon-lineage' column and create new columns
    lineage_split = df['attributes.taxon-lineage'].str.split(';', expand=True)
    lineage_split.columns = features

    # Concatenate the original DataFrame with the new columns
    all_genomes_tax_df = pd.concat([df, lineage_split], axis=1)
    return features, all_genomes_tax_df

# sankey_df.head()

## App setup

In [8]:
import numpy as np
xx = np.linspace(-3.5, 3.5, 100)
yy = np.linspace(-3.5, 3.5, 100)
x, y = np.meshgrid(xx, yy)
z = np.exp(-((x - 1) ** 2) - y**2) - (x**3 + y**4 - x / 5) * np.exp(-(x**2 + y**2))

surface=go.Surface(z=z)
fig = go.Figure(data=[surface])

fig.update_layout(
    title="Plotly 3D Plot",
    width=500,
    height=500,
    margin=dict(t=50, b=50, r=50, l=50),
)
plotly_pane = pn.pane.Plotly(fig)

In [None]:
pn.extension("tabulator")
pn.extension("plotly")
if 'google.colab' in str(get_ipython()):
    pn.extension(comms='colab')
ACCENT = "teal"

styles = {
    "box-shadow": "rgba(50, 50, 93, 0.25) 0px 6px 12px -2px, rgba(0, 0, 0, 0.3) 0px 3px 7px -3px",
    "border-radius": "4px",
    "padding": "10px",
}

# TODO: there is a bug in the panel library that does not allow to open png files, renoming does not help 
image = pn.pane.PNG(os.path.join(assets_folder, "figs/mgnify_banner.png"),
                    width=300,
                    # height=100,
                    )

##############################
## Query and show endpoints ##
##############################
# TODO: this goes to the utils
select_endpoint = pn.widgets.Select(
    name="MGnify endpoints",
    value="",
    options=[],
    description="Select endpoint to query",
)

r = requests.get(f"https://www.ebi.ac.uk/metagenomics/api/v1/")
endpoint_dict = r.json()['data']
endpoints = [k for k in endpoint_dict.keys()]
select_endpoint.options = endpoints
select_endpoint.value = endpoints[0]

###################
## Query genomes ##
###################
button1 = pn.widgets.Button(name='Query', button_type='primary')
text_input = pn.widgets.TextInput(name='Text Input', placeholder='Enter a string here...')
atable = pn.widgets.Tabulator(sizing_mode="stretch_both", name="Data View")

def query_genomes(event):
    if not event:
        return
    with APISession("https://www.ebi.ac.uk/metagenomics/api/v1") as mgnify:
        search_filter = Modifier(f"taxon_lineage={text_input.value}")
        resources = map(lambda r: r.json, mgnify.iterate(select_endpoint.value, filter=search_filter))
        resources_df = pd.json_normalize(resources)
        logger.info(f"Queried {len(resources_df)} resources from {select_endpoint.value} with filter {text_input.value}")

    print('Queried', text_input.value, "from", select_endpoint.value)

    # update table view
    atable.value = resources_df
    
    # create data folder if it doesn't exist
    os.system("mkdir -p data")
    # save to parquet
    resources_df.to_parquet(os.path.join("data", f"{select_endpoint.value}_{text_input.value}.parquet"))

    # this is alert for the dsahboard, TODO: needs to be tested
    pn.pane.Alert('## Data saved to data folder ##', alert_type='success', width=500).servable()

pn.bind(query_genomes, button1, watch=True)

#################
## Plot sankey ##
#################
button2 = pn.widgets.Button(name='Display Sankey', button_type='primary')

def plot_sankey(event):
    if not event:
        print('no event')
        return
    print('updating sankey plot')
    features, taxonomy_df = mgn_split_taxonomy(atable.value)
    sankey_df = taxonomy_df.groupby(features).size().reset_index(name='count')
    # panel plot widget with plotly sankey
    sankey = pl.get_sankey(sankey_df, cat_cols=features[0:6],
                           value_cols='count', title='Taxon Lineage')

    # panel pane for plotly sankey
    display(sankey_df)
    sankey_pane = pn.pane.Plotly(sankey, config={'displayModeBar': False})
    tabs.objects[1] = sankey_pane


pn.bind(plot_sankey, button2, watch=True)

sankey_pane = pn.pane.Plotly()
tabs = pn.Tabs(
    ('Table view', atable),
    ('Sankey plot', sankey_pane),
    styles=styles, sizing_mode="stretch_width", height=500, margin=10,
    dynamic=True,
)
pn.bind(plot_sankey, tabs, watch=True)

# template
template = pn.template.FastListTemplate(
    title="Query MGnify",
    sidebar=[image,
             "# Endpoint", select_endpoint,
             "works only for 'genomes' endpoint and lineage queries",
             pn.layout.Divider(),
             "# Query", pn.Column(text_input, button1),
             "# Plot", pn.Row(button2),
             ],
    main=[pn.Column(
        tabs,
        sizing_mode="stretch_both",
    )],
    main_layout=None,
    accent=ACCENT,
)

if "google.colab" in str(get_ipython()) or env == "vscode":
    server=pn.serve({"": template}, port=4040, address="127.0.0.1", threaded=True, websocket_origin="*")
    os.system("curl http://localhost:4040")
    from pyngrok import ngrok

    # Terminate open tunnels if exist
    ngrok.kill()

    # Setting the authtoken, get yours from https://dashboard.ngrok.com/auth
    NGROK_AUTH_TOKEN = os.getenv("NGROK_TOKEN")
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)

    # Open an HTTPs tunnel on port 4040 for http://localhost:4040
    if env == "vscode":
        public_url = ngrok.connect(addr='4040')
    else:
        public_url = ngrok.connect(port='4040')
    
    print("Tracking URL:", public_url)
else:
    print('serving locally')
    template.servable()

In [6]:
# server.stop()
# ngrok.disconnect(server)
# ngrok.kill()