# App for analyzing GECCO runs from the Galaxy

1. Upload local data or query results of the GECCO from the Galaxy.
2. Identifying Biosynthetic Gene Clusters (BGCs).
3. Visualize BGCs.
4. Compare two samples in respect to each other.

Note: Sending GECCO jobs to Galaxy is part of another separate application.

## Platform dependent part
- Resolve platform setup
- the difference to local imports should be resolved by setting the VRE packages well

In [None]:
import os
import sys
import logging
import psutil
from IPython import get_ipython

logger = logging.getLogger(name="GECCO analyzer")

if 'google.colab' in str(get_ipython()):
    print('Setting Google colab, you will need a ngrok account to make the dashboard display over the tunnel. \
    https://ngrok.com/')
    # clone the momics-demos repository to use it to load data
    try:
        os.system('git clone https://github.com/palec87/momics-demos.git')
        logger.info(f"Repository cloned")
    except OSError as e:
        logger.info(f"An error occurred while cloning the repository: {e}")

    sys.path.insert(0,'/content/momics-demos')

    # this step takes time beacause of many dependencies
    os.system('pip install momics@git+https://github.com/emo-bon/marine-omics-methods.git@main')

elif psutil.users() == []:
    logger.info("Binder")

    logger.info('Binder will not allow you to upload the ".env" file')
    os.environ["GALAXY_EARTH_URL"] = "https://earth-system.usegalaxy.eu/"
    ###########################################################################################
    ### INPUT TOKEN HERE, If not using Galaxy, put any string below, but cannot stay empty ####
    ###########################################################################################
    os.environ["GALAXY_EARTH_KEY"] = ""
    assert os.environ["GALAXY_EARTH_KEY"] != "", "token cannot be an empty string, SET your API key."

else:
    logger.info("Local server")

from momics.utils import init_setup, get_notebook_environment, memory_load
init_setup()

# Determine the notebook environment
env = get_notebook_environment()
logger.info(f"Environment: {env}")

## Imports

In [None]:
# This needs to be repeated here for the Pannel dashboard to work, WEIRD
# TODO: report as possible bug
import sys
import os
import io

# import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
import panel as pn

# Import
import bioblend.galaxy as g  # BioBlend is a Python library, wrapping the functionality of Galaxy and CloudMan APIs
from bioblend.galaxy import GalaxyInstance

# All low level functions are imported from the momics package
import momics.diversity as div
import momics.plotting as pl
from momics.panel_utils import serve_app, close_server
from momics.loader import bytes_to_df

### User settings

In [None]:
DEBUG = True  # enable stdout logging

## Loading and setup

In [None]:
if 'google.colab' in str(get_ipython()):
    root_folder = os.path.abspath(os.path.join('/content/momics-demos'))
else:
    root_folder = os.path.abspath(os.path.join('../'))

assets_folder = os.path.join(root_folder, 'assets')

## Content setup

### DF display

In [None]:
pn.extension("tabulator", "mathjax", "filedropper")
DATASETS = {}
ACCENT = "teal"

styles = {
    "box-shadow": "rgba(50, 50, 93, 0.25) 0px 6px 12px -2px, rgba(0, 0, 0, 0.3) 0px 3px 7px -3px",
    "border-radius": "4px",
    "padding": "10px",
}

image = pn.pane.JPG(os.path.join(assets_folder, "figs/logo_gecco.jpeg"),
                    width=100, height=100)

markdown_intro = pn.pane.Markdown(
"""
**If you want to use the Galaxy API:**
- you need to provide your credentials.
- and `job_id` of the job that created the files to get those files.
- For comparison study, you need to provide two job_ids.
"""
)

md_upload = pn.pane.Markdown(
"""
**Upload your files:**
You need to select exactly 3 files where filenames contain:
- `BGCs` or `clusters`
- `features`
- `genes`
"""
)

md_get_galaxy = pn.pane.Markdown(
"""
**Get files from Galaxy:**
You need to provide your credentials and `job_id`.
"""
)

upload_local = pn.widgets.FileInput(
    multiple=True,
)

literal_galaxy_url = pn.widgets.TextInput(
    name='Galaxy server URL',
    placeholder='Enter a https server address here...',
)

literal_galaxy_key = pn.widgets.PasswordInput(
    name='Password',
    placeholder='Enter your password here...',
)

button_display_loaded = pn.widgets.Button(
    name="Display loaded files",
    button_type="primary",
    width=200,
)

### Analysis page

In [None]:
bgc_types = pn.pane.Matplotlib(
    height=600,
    name="BGC types",
    )

analysis_tab = pn.Column(
    bgc_types,
)

tabs = pn.Tabs(
    ('BGCs',""),
    ('features', ""),
    ('genes', ""),
    ("Analysis", analysis_tab),
    dynamic=True,
    styles=styles,
    sizing_mode="stretch_height",
    margin=10,
)


### update methods

In [None]:
def display_tables_after_upload(datasets):
    logger.info("Displaying tables after upload...")
    tabs.__setitem__(0,
                     pn.widgets.Tabulator(
                        datasets[next((key for key in datasets if 'BGCs' in key or 'clusters' in key))],
                        name='BGCs',
                        page_size=50,
                    ),
                    )
    tabs.__setitem__(1,
                     pn.widgets.Tabulator(
                        datasets[next((key for key in datasets if 'features' in key))],
                        name='features',
                        page_size=50,
                    ),
                    )
    tabs.__setitem__(2,
                     pn.widgets.Tabulator(
                        datasets[next((key for key in datasets if 'genes' in key))],
                        name='genes',
                        page_size=50,
                    ),
                    )


def process_uploaded_tables(file_names, file_data):
    """
    Process the uploaded tables and display them in a tabular format.
    """
    logger.info("Processing uploaded tables...")
    logger.info(f"files: {file_names}")
    DATASETS.clear()
    if file_names is []:
        return
    for i, name in enumerate(file_names):
        logger.info(f"Processing {name}...")
        DATASETS[name] = bytes_to_df(file_data[i])
    # Display the first table
    display_tables_after_upload(DATASETS)
    update_bgs_types_plot()


def update_bgs_types_plot():
    """
    Update the BGC types plot.
    """
    logger.info("Updating BGC types plot...")
    bgc_types.object = pl.mpl_bgcs_violin(
        DATASETS[next((key for key in DATASETS if 'BGCs' in key or 'clusters' in key))],
        normalize=False,
        )

### Bindings

In [None]:

pn.bind(
    process_uploaded_tables,
    file_names=upload_local.filename,
    file_data = upload_local.value,
    watch=True,
)

button_display_loaded.on_click(
    lambda event: process_uploaded_tables(upload_local.filename, upload_local.value)
)

## APP setup

In [None]:
pn.extension("tabulator", "mathjax")

def app():
    template = pn.template.FastListTemplate(
        title="Biosynthetic Gene Cluster Analysis",
        sidebar=[image,
                md_upload, 
                pn.Row(upload_local),
                pn.layout.Divider(margin=(-20, 0, 0, 0)),
                # this is prepared for galaxy, but not implemented yet
                # md_get_galaxy, literal_galaxy_url, literal_galaxy_key,
                button_display_loaded,
                ],
        main=[pn.Column(markdown_intro,
                        pn.layout.Divider(margin=(-20, 0, 0, 0)),
                        tabs,
                        scroll=True,
                    )],
        main_layout=None,
        accent=ACCENT,
    )
    return template

template = app()


if 'google.colab' in str(get_ipython()):  
    s = serve_app(template, env=env, name="GECCO_analyser")
else:
    template.servable()

### Uncomment this if running if running ngrok tunnel which you want to quit

In [None]:
# only use for the ngrok tunnel in GColab
# close_server(s, env=env)