# App for analyzing GECCO runs from the Galaxy

1. Upload local data or query results of the GECCO from the Galaxy.
2. Identifying Biosynthetic Gene Clusters (BGCs).
3. Visualize BGCs.
4. Compare two samples in respect to each other.

Note: Sending GECCO jobs to Galaxy is part of another separate application.

## Platform dependent part
- Resolve platform setup
- the difference to local imports should be resolved by setting the VRE packages well

In [1]:
import os
import sys
import logging
import psutil
from IPython import get_ipython

logger = logging.getLogger(name="GECCO galaxy runner")

if 'google.colab' in str(get_ipython()):
    # clone the momics-demos repository to use the utils module from there
    # TODO: eventually utils from momics will be used for that
    try:
        os.system('git clone https://github.com/palec87/momics-demos.git')
        logger.info(f"Repository cloned")
    except OSError as e:
        logger.info(f"An error occurred while cloning the repository: {e}")

    sys.path.insert(0,'/content/momics-demos')

# I do not know how to distinguish between binder and local jupyter server
elif psutil.users()[0].name not in ['davidp', 'david']:  # this is binder 
# elif "zmqshell" in str(get_ipython()):
    logger.info("Binder")
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

    logger.info('Binder will not allow you to upload the ".env" file')
    os.environ["GALAXY_EARTH_URL"] = "https://earth-system.usegalaxy.eu/"
    #########################
    ### INPUT TOKEN HERE ####
    #########################
    os.environ["GALAXY_EARTH_KEY"] = 0
    assert isinstance(os.environ["GALAXY_EARTH_KEY"], str) is True, "token must be a string"

else:
    logger.info("Local server")
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))  # local utils, to be removed in the future

    # downside of this is that all the deps need to be installed in the current (momics-demos) environment
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../../marine-omics')))  # local momics package, to be removed too

from utils import init_setup, get_notebook_environment
init_setup()

# Determine the notebook environment
env = get_notebook_environment()
logger.info(f"Environment: {env}")

Platform: local Linux


In [2]:
# This needs to be repeated here for the Pannel dashboard to work, WEIRD
# TODO: report as possible bug
import sys
import os
import io

# import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
# import seaborn as sns
import panel as pn

# Import
import bioblend.galaxy as g  # BioBlend is a Python library, wrapping the functionality of Galaxy and CloudMan APIs
from bioblend.galaxy import GalaxyInstance

# All low level functions are imported from the momics package
import momics.diversity as div
import momics.plotting as pl
from momics.panel_utils import diversity_select_widgets, create_indicators_diversity
from momics.utils import memory_load

# Note: This is breaking the panel preview functionality
# %load_ext autoreload
# %autoreload 2

## Loading and setup

In [3]:
if 'google.colab' in str(get_ipython()):
    root_folder = os.path.abspath(os.path.join('/content/momics-demos'))
else:
    root_folder = os.path.abspath(os.path.join('../'))

assets_folder = os.path.join(root_folder, 'assets')

## App Setup

In [25]:
def display_tables_after_upload(d1, d2=None, d3=None):
    print(d1)
    df1 = pd.DataFrame.from_dict([d1])
    t1 = pn.widgets.Tabulator(df1, sizing_mode="stretch_both", name="Data View")
    return t1

In [19]:
pn.extension("tabulator", "mathjax", "filedropper")
ACCENT = "teal"

styles = {
    "box-shadow": "rgba(50, 50, 93, 0.25) 0px 6px 12px -2px, rgba(0, 0, 0, 0.3) 0px 3px 7px -3px",
    "border-radius": "4px",
    "padding": "10px",
}

image = pn.pane.JPG(os.path.join(assets_folder, "figs/logo_gecco.jpeg"),
                    width=100, height=100)

markdown_intro = pn.pane.Markdown(
"""
**If you want to use the Galaxy API:**
- you need to provide your credentials.
- and `job_id` of the job that created the files to get those files.
- For comparison study, you need to provide two job_ids.
"""
)

md_upload = pn.pane.Markdown(
"""
**Upload your files:**
You need to select exactly 3 files with endings:
- `_BGC.tsv`
- `_features.tsv`
- `_genes.tsv`
"""
)

md_get_galaxy = pn.pane.Markdown(
"""
**Get files from Galaxy:**
You need to provide your credentials and `job_id`.
"""
)

upload_local = pn.widgets.FileDropper(multiple=True,
                                    #   accepted_filetypes=[".tsv"],
                                      layout="compact")
literal_galaxy_url = pn.widgets.TextInput(
    name='Galaxy server URL',
    placeholder='Enter a https server address here...')


literal_galaxy_key = pn.widgets.PasswordInput(
    name='Password',
    placeholder='Enter your password here...')

table1 = pn.bind(display_tables_after_upload,
                 d1=upload_local.value,
                 watch=True,
                 )

tabs = pn.Tabs(
    ("Table 1", table1),
    dynamic=True,
    styles=styles, sizing_mode="stretch_width", height=500, margin=10,
)




{}


In [20]:
upload_local

BokehModel(combine_events=True, render_bundle={'docs_json': {'d9919da8-34e5-4ccf-9da6-341726c5a0c4': {'version…

In [21]:
upload_local.value

{'summary_detected_BGC.tsv': 'sequence_id\tcluster_id\tstart\tend\taverage_p\tmax_p\ttype\talkaloid_probability\tnrp_probability\tpolyketide_probability\tripp_probability\tsaccharide_probability\tterpene_probability\tproteins\tdomains\r\nk141_102496\tk141_102496_cluster_1\t3\t2678\t0.08912765604164256\t0.08970901574014162\t\t0.03871139869090168\t0.19032612658786585\t0.47201669995970075\t0.15146123951904\t0.010000000000000009\t0.07541916976077911\tk141_102496_1;k141_102496_2;k141_102496_3;k141_102496_4\tPF01261;PF13577;PF14534\r\nk141_107500\tk141_107500_cluster_1\t2\t2071\t0.0558158105243363\t0.057461825763979575\t\t0.02904431699744392\t0.15638347057367996\t0.16369841015161002\t0.21435808823593872\t0.010000000000000009\t0.1605964966222001\tk141_107500_1;k141_107500_2;k141_107500_3\tPF00571;PF00850;PF05175;PF13649\r\nk141_111068\tk141_111068_cluster_1\t1492\t4777\t0.28885009804968387\t0.292865851464946\t\t0.028721291521760173\t0.4583574630033104\t0.43215846084862297\t0.11724239367818767

In [22]:
upload_local.value.keys()

dict_keys(['summary_detected_BGC.tsv'])

In [23]:
table1



{'summary_detected_BGC.tsv': 'sequence_id\tcluster_id\tstart\tend\taverage_p\tmax_p\ttype\talkaloid_probability\tnrp_probability\tpolyketide_probability\tripp_probability\tsaccharide_probability\tterpene_probability\tproteins\tdomains\r\nk141_102496\tk141_102496_cluster_1\t3\t2678\t0.08912765604164256\t0.08970901574014162\t\t0.03871139869090168\t0.19032612658786585\t0.47201669995970075\t0.15146123951904\t0.010000000000000009\t0.07541916976077911\tk141_102496_1;k141_102496_2;k141_102496_3;k141_102496_4\tPF01261;PF13577;PF14534\r\nk141_107500\tk141_107500_cluster_1\t2\t2071\t0.0558158105243363\t0.057461825763979575\t\t0.02904431699744392\t0.15638347057367996\t0.16369841015161002\t0.21435808823593872\t0.010000000000000009\t0.1605964966222001\tk141_107500_1;k141_107500_2;k141_107500_3\tPF00571;PF00850;PF05175;PF13649\r\nk141_111068\tk141_111068_cluster_1\t1492\t4777\t0.28885009804968387\t0.292865851464946\t\t0.028721291521760173\t0.4583574630033104\t0.43215846084862297\t0.11724239367818767

ValueError: If using all scalar values, you must pass an index

<function param.reactive.bind.<locals>.wrapped(*wargs, **wkwargs)>

In [26]:
display_tables_after_upload(upload_local.value, d2=None, d3=None)

{'summary_detected_BGC.tsv': 'sequence_id\tcluster_id\tstart\tend\taverage_p\tmax_p\ttype\talkaloid_probability\tnrp_probability\tpolyketide_probability\tripp_probability\tsaccharide_probability\tterpene_probability\tproteins\tdomains\r\nk141_102496\tk141_102496_cluster_1\t3\t2678\t0.08912765604164256\t0.08970901574014162\t\t0.03871139869090168\t0.19032612658786585\t0.47201669995970075\t0.15146123951904\t0.010000000000000009\t0.07541916976077911\tk141_102496_1;k141_102496_2;k141_102496_3;k141_102496_4\tPF01261;PF13577;PF14534\r\nk141_107500\tk141_107500_cluster_1\t2\t2071\t0.0558158105243363\t0.057461825763979575\t\t0.02904431699744392\t0.15638347057367996\t0.16369841015161002\t0.21435808823593872\t0.010000000000000009\t0.1605964966222001\tk141_107500_1;k141_107500_2;k141_107500_3\tPF00571;PF00850;PF05175;PF13649\r\nk141_111068\tk141_111068_cluster_1\t1492\t4777\t0.28885009804968387\t0.292865851464946\t\t0.028721291521760173\t0.4583574630033104\t0.43215846084862297\t0.11724239367818767

BokehModel(combine_events=True, render_bundle={'docs_json': {'809a9713-c2cd-4435-b54c-b2808d72b2a4': {'version…

In [None]:

template = pn.template.FastListTemplate(
    title="DBiosynthetic Gene Cluster Analysis",
    sidebar=[image,
             md_upload, upload_local,
             pn.layout.Divider(margin=(-20, 0, 0, 0)),
             md_get_galaxy, literal_galaxy_url, literal_galaxy_key,
            #  get_galaxy_files,
            #  pn.layout.Divider(margin=(-20, 0, 0, 0)),
            #  "# Beta diversity", select_table_beta, select_taxon, select_beta_factor,
             ],
    main=[pn.Column(markdown_intro,
                    pn.layout.Divider(margin=(-20, 0, 0, 0)),
                    tabs,
                    sizing_mode="stretch_both",
                   )],
    main_layout=None,
    accent=ACCENT,
    meta_refresh="2",
)

template.servable()