# App for running GECCO jobs in Galaxy

1. Upload and run workflow.
2. Monitor the job.
3. Receive completion notification with some basic summary provided by Galaxy.

Note: "Receiving" the results (tentatively download) is part of the analysis pipeline.

## TODOs
- clean up the dict with the histories display, which currently shows full dict in the dropdown
- Add selector (checkbox) which dataset input should be used.
- add bindings for all the params to construct the input file
- actually submit, and receive email
- save json, from the analysis dashboard, update the json (this will be heavily environmental dependent)
  - note for which parts it works.
- Can Gecco receive multiple `fasta` files?

In [1]:
import os
import sys
import json
import logging
logger = logging.getLogger(name="GECCO galaxy runner")

if 'google.colab' in str(get_ipython()):
    # clone the momics-demos repository to use the utils module from there
    # TODO: eventually utils from momics will be used for that
    try:
        os.system('git clone https://github.com/palec87/momics-demos.git')
        logger.info(f"Repository cloned")
    except OSError as e:
        logger.info(f"An error occurred while cloning the repository: {e}")

    sys.path.insert(0,'/content/momics-demos')

else:
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))  # local utils, to be removed in the future

    # downside of this is that all the deps need to be installed in the current (momics-demos) environment
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../../marine-omics')))  # local momics package, to be removed too

from utils import init_setup, get_notebook_environment
init_setup()

# Initialize the environment variable
notebook_environment = 'unknown'
# Determine the notebook environment
env = get_notebook_environment()
logger.info(f"Environment: {env}")

Platform: local Linux


In [None]:
# This needs to be repeated here for the Pannel dashboard to work, WEIRD
# TODO: report as possible bug
import sys
import os
import io
import warnings
import psutil

import bioblend.galaxy as g  # BioBlend is a Python library, wrapping the functionality of Galaxy and CloudMan APIs
# import boto3
import pandas as pd
import panel as pn
# from bioblend.galaxy import GalaxyInstance
# from bioblend.galaxy.config import ConfigClient

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from utils import init_setup
init_setup()

from bioblend.galaxy.datasets import DatasetClient

from momics.galaxy import BCGalaxy
from momics.panel_utils import (
    serve_app, close_server,
)
from momics.utils import memory_load, reconfig_logger

# instead of the jupyter magic, you can also use
from dotenv import load_dotenv
load_dotenv()

## User settings

In [None]:
DEBUG = True  # enable stdout logging

# Set up logging
reconfig_logger()

In [4]:
exp = BCGalaxy("GALAXY_EARTH_URL", "GALAXY_EARTH_KEY")

In [None]:
if DEBUG:
    exp.get_histories()
    logger.info(exp.histories)

    # exp.set_history(create=False, hid="8d8d4bf21253beda")

## Environment setup

In [None]:
if 'google.colab' in str(get_ipython()):
    root_folder = os.path.abspath(os.path.join('/content/momics-demos'))
else:
    root_folder = os.path.abspath(os.path.join('../'))

assets_folder = os.path.join(root_folder, 'assets')

## App setup

In [None]:
# buttons
but_login = pn.widgets.Button(name="🔐 Galaxy Login")
but_get_histories = pn.widgets.Button(name="📚 Refresh Histories")
but_get_datasets = pn.widgets.Button(name="📖 Refresh Datasets")

# input file handling
# but_upload_dataset = pn.widgets.Button(name="📤 Upload Dataset")
file_input = pn.widgets.FileInput(accept='.fasta', name="Upload a fasta file")

but_submit = pn.widgets.Button(name="🚀 Submit GECCO task")

# selectors
select_history = pn.widgets.Select(
    name="Select history",
    options=[],
    description="Your Galaxy histories, create a new one if needed",
)
select_dataset = pn.widgets.Select(
    name="Select dataset",
    options=[],
    description="Your Galaxy fasta datasets",
)

# gecco params
mask = pn.widgets.Checkbox(name='Enable masking of regions with unknown nucleotides', value=True)
cds = pn.widgets.IntInput(name='IntInMinimum number of genes required for a clusterput',
                          value=3, step=1, start=2, end=1000,
                          )
threshold = pn.widgets.FloatInput(name='Probability threshold for cluster detection',
                                  value=0.05, step=0.01, start=0.0, end=1.0,
                                  )
postproc = pn.widgets.Select(
    name="Post-processing method for gene cluster validation",
    options=["gecco"],
)
antimash_sideload = pn.widgets.Checkbox(name='Generate an antiSMASH v6 sideload JSON file', value=False)
email_input = pn.widgets.TextInput(name='Email notification', placeholder='Enter a string here...')

In [None]:
# callbacks and methods
def handle_login(clicks):
    exp = BCGalaxy("GALAXY_EARTH_URL", "GALAXY_EARTH_KEY")
    if DEBUG:
        logger.debug("exp id: ", id(exp))
    pn.state.notifications.info(f"User logged in: {exp.cfg.whoami()}")
    logger.info(f'You have clicked me {clicks} times')

    # get the histories right upon login
    handle_get_histories(clicks)

    # get the datasets right upon login
    handle_get_datasets(clicks)

def handle_get_histories(clicks):
    if DEBUG:
        logger.debug("exp id: ", id(exp))
    exp.get_histories()
    select_history.options = exp.histories
    select_history.value = exp.histories[0]
    logger.info(f"{len(exp.histories)} histories found.")

def handle_get_datasets(clicks):
    if DEBUG:
        logger.debug("exp id: ", id(exp))
    
    # this already filters the datasets by the extension
    datasets = exp.get_datasets_by_key("extension", "fasta")

    # fill the select_dataset widget
    select_dataset.options = datasets
    select_dataset.value = datasets[0]
    logger.info(f"{len(datasets)} datasets found.")

In [None]:
pn.extension("tabulator")
if 'google.colab' in str(get_ipython()):
    pn.extension(comms='colab')
pn.extension(notifications=True)
ACCENT = "teal"

styles = {
    "box-shadow": "rgba(50, 50, 93, 0.25) 0px 6px 12px -2px, rgba(0, 0, 0, 0.3) 0px 3px 7px -3px",
    "border-radius": "4px",
    "padding": "10px",
}

# TODO: there is a bug in the panel library that does not allow to open png files, renoming does not help 
image = pn.pane.JPG(os.path.join(assets_folder, "figs/logo_gecco.jpeg"),
                    width=200,
                    # height=100,
                    )


def app():
    history_flexbox = pn.Column(
        but_get_histories, select_history,
        pn.bind(handle_get_histories, clicks=but_get_histories.param.clicks),
        sizing_mode="stretch_width",
    )
    dataset_flexbox = pn.Column(
        but_get_datasets, select_dataset,
        pn.bind(handle_get_datasets, clicks=but_get_datasets.param.clicks),  # this callback cannot be moved, do I need it with clicks?
        file_input,
        sizing_mode="stretch_width",
    )
    gecco_flexbox = pn.FlexBox(
        pn.Column(mask, cds, threshold, postproc, antimash_sideload, email_input),
        # mask, cds, threshold, postproc, antimash_sideload, email_input,
        sizing_mode="stretch_width",
    )

    template = pn.template.FastListTemplate(
        title="Run GECCO on Galaxy",
        sidebar=[image,
                but_login, pn.bind(handle_login, clicks=but_login.param.clicks),
                pn.layout.Divider(),
                "## Histories", history_flexbox,
                "## Datasets", dataset_flexbox,
                ],
        main=[pn.Column(#"## Histories", history_flexbox,
                        #"## Datasets", dataset_flexbox,
                        "## GECCO parameters", gecco_flexbox,
                        "## Submit", but_submit,
                        sizing_mode="stretch_width",
                    )],
        main_layout=None,
        accent=ACCENT,
    )
    return template

template = app()
logger.info("Template created")

# serve the app
s = serve_app(template, env=env, name="GECCO_galaxy_runner")

### Uncomment this if running if running ngrok tunnel which you want to quit

In [None]:
# old way
# server.stop()
# ngrok.disconnect(server)
# ngrok.kill()

# new way
# close_server(s, env=env)