diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..8e5d4beb --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +data +test +Automappa.egg-info +build +dist +.vscode +.pytest_cache \ No newline at end of file diff --git a/.env b/.env new file mode 100644 index 00000000..c7bccb0a --- /dev/null +++ b/.env @@ -0,0 +1,31 @@ +# Required for docker-compose.yml +SERVER_ROOT_UPLOAD_FOLDER="/usr/src/app/uploads" +SERVER_HOST="0.0.0.0" +SERVER_PORT=8050 +# By default remove debugging tools --> For development switch this to `True` +SERVER_DEBUG=False +POSTGRES_USER="admin" +POSTGRES_PASSWORD="mypass" +POSTGRES_DB="automappa" +POSTGRES_URL="postgresql+psycopg2://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB}" +POSTGRES_POOL_SIZE=1 +POSTGRES_POOL_PRE_PING=False +RABBITMQ_DEFAULT_USER="user" +RABBITMQ_DEFAULT_PASS="pass" +RABBITMQ_URL="amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq:5672/" +REDIS_BACKEND_HOST='redis' +REDIS_BACKEND_PORT='6379' +REDIS_BACKEND_DB='0' +REDIS_BACKEND_PASSWORD='RedisPassword' +CELERY_BACKEND_URL='redis://redis:6379/0' +CELERY_BROKER_URL="amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq:5672//" +FLOWER_BROKER_API_URL="http://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq:15672/api" +# https://github.com/mher/flower/issues/1036 + +# Grafana configuration +# Allow anonymous authentication or not +GF_AUTH_DISABLE_LOGIN_FORM="false" +# Role of anonymous user +GF_AUTH_ANONYMOUS_ENABLED="false" +# Install plugins here our in your own config file +GF_AUTH_ANONYMOUS_ORG_ROLE="Admin" diff --git a/.gitignore b/.gitignore index bb853153..10ae6932 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,11 @@ app/__pycache__ .DS_Store dist build -Automappa.egg-info \ No newline at end of file +Automappa.egg-info +.env +data +!automappa/data +db-data +file_system_backend +*.db +uploads \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 3464d373..3ad7bab3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,17 @@ -FROM condaforge/miniforge3:latest +FROM condaforge/mambaforge:latest -COPY environment.yml ./environment.yml +COPY environment.yml /tmp/environment.yml +RUN mamba env update -n base -f /tmp/environment.yml && \ + mamba clean --all --force-pkgs-dirs --yes -RUN conda env update -n base -f=environment.yml \ - && conda clean --all --force-pkgs-dirs --yes +COPY . /usr/src/app +WORKDIR /usr/src/app -# Test command is functional -COPY . /Automappa/ -WORKDIR /Automappa/ RUN python -m pip install . --ignore-installed --no-deps -vvv -RUN automappa -h -CMD [ "-h" ] -ENTRYPOINT [ "automappa" ] \ No newline at end of file +# Create an unprivileged user for automappa celery worker +RUN adduser --disabled-password --gecos '' automappa +RUN mkdir -p /usr/src/app/uploads && \ + chown -R automappa:automappa /usr/src/app + +# CMD ["automappa", "-h"] \ No newline at end of file diff --git a/Makefile b/Makefile index 894f13e8..1271cf4c 100644 --- a/Makefile +++ b/Makefile @@ -45,10 +45,32 @@ endif image: Dockerfile docker build . -f $< -t evanrees/automappa:`git branch --show-current` +## Remove automappa-{web,flower,queue} docker images +rm-images: Dockerfile + docker rmi -f `docker images -q automappa-web` + docker rmi -f `docker images -q automappa-queue` + docker rmi -f `docker images -q automappa-flower` + ## Install automappa entrypoint into current environment install: $(PYTHON_INTERPRETER) -m pip install . --ignore-installed --no-deps -vvv +## docker compose build from docker-compose.yml +build: docker-compose.yml + docker compose build + +## alias for docker-compose up --always-recreate-deps --remove-orphans --force-recreate +up: docker-compose.yml + docker compose up --always-recreate-deps --remove-orphans --force-recreate + +## alias for docker-compose down --remove-orphans +down: docker-compose.yml + docker compose down --remove-orphans + +## alias for docker-compose down --remove-orphans --volumes +down-v: docker-compose.yml + docker compose down --remove-orphans -v + # Run Automappa on test data # test: test_data # $(PYTHON_INTERPRETER) index.py -i test/bins.tsv @@ -64,13 +86,13 @@ test_environment: scripts/test_environment.py $(PYTHON_INTERPRETER) $< ## Set up python interpreter environment -create_environment: requirements.txt +create_environment: environment.yml ifeq (True,$(HAS_CONDA)) @echo ">>> Detected conda, creating conda environment." ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER))) - conda create -c conda-forge --name $(PROJECT_NAME) python=3.7 --file=$< + mamba env create --name $(PROJECT_NAME) --file=$< else - conda create -c conda-forge --name $(PROJECT_NAME) python=3.7 --file=$< + mamba env create --name $(PROJECT_NAME) --file=$< endif @echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)" else diff --git a/README.md b/README.md index c7b182fb..ad5d0ebf 100644 --- a/README.md +++ b/README.md @@ -1,297 +1,63 @@ # Automappa: An interactive interface for exploration of metagenomes ![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/WiscEvan/Automappa?label=latest) -[![Anaconda-Server Install Badge](https://anaconda.org/bioconda/automappa/badges/installer/conda.svg)](https://conda.anaconda.org/bioconda) -[![Anaconda-Server Platforms Badge](https://anaconda.org/bioconda/automappa/badges/platforms.svg)](https://anaconda.org/bioconda/automappa) -[![Anaconda-Server Downloads Badge](https://anaconda.org/bioconda/automappa/badges/downloads.svg)](https://anaconda.org/bioconda/automappa) | Image Name | Image Tag | Status | |----------------------|-----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `evanrees/automappa` | `main`,`latest` | [![docker CI/CD](https://github.com/WiscEvan/Automappa/actions/workflows/docker.yml/badge.svg?branch=main "evanrees/automappa:main")](https://github.com/WiscEvan/Automappa/actions/workflows/docker.yml) | | `evanrees/automappa` | `develop` | [![develop docker CI/CD](https://github.com/WiscEvan/Automappa/actions/workflows/docker.yml/badge.svg?branch=develop "evanrees/automappa:develop")](https://github.com/WiscEvan/Automappa/actions/workflows/docker.yml) | -![automappa_demo_920](https://user-images.githubusercontent.com/25933122/158899748-bf21c1fc-6f67-4fd8-af89-4e732fa2edcd.gif) + -## Automappa :deciduous_tree: +> You may also [see each page as a static view](docs/page-overview.md) -***Follow this link to get started using the fully-featured Automappa application:*** https://github.com/WiscEvan/Automappa/tree/develop#getting-started +## :deciduous_tree: Automappa testing setup/run commands -## Test Data +- [Clone the Automappa Repo](#clone-the-automappa-repository) +- [Run `make build` using Makefile](#build-images-for-services-used-by-automappa) +- [Run `make up` using Makefile](#build-and-run-automappa-services) +- [Open the Automappa url](#navigate-to-automappa-page) +- [Download test data](#download-test-data) -Test data to try out Automappa may be downloaded from here: https://drive.google.com/drive/folders/1nBk0AZC3EJV4t-9KdJBShGCfWbdP2kOp?usp=sharing - -This data is not yet binned, so you can easily try out different settings and perform your own refinements on some example data. - -Happy binning! - ------ - -## Automappa-lite :seedling: - -> NOTE: The following section is a lightweight version of Automappa that may be slow and is not constructed for larger datasets. -> If you would like to use all of Automappa's features see the [Automappa section](#automappa) above or navigate to the [getting started page](https://github.com/WiscEvan/Automappa/tree/develop#getting-started) for the full-featured app. - -- [Install with conda](#install-with-conda) -- [Run `automappa` using docker](#quickstart-using-docker-no-installation-required) -- [Install from source](#install-from-source) -- [Advanced Usage](#advanced-usage) - - [A breakdown of the docker run wrapper script](#full-docker-run-command-example) - - [Using a remote Automappa server](#using-a-remote-automappa-server) - - [Using a remote docker container Automappa server](#using-a-remote-docker-container-automappa-server) - -## Install Automappa-lite with conda - -If you are using `conda` (or `mamba`) as a package manager, you can simply install `automappa` using one of the following one-liners. - -### with `conda` - -```bash -conda install -c bioconda automappa -``` - -### with `mamba` - -```bash -mamba install -c bioconda automappa -``` - -After you have installed `automappa`, you can simply run `automappa -h` to see a list of available arguments. - -To start the `automappa` app, you must specify your main binning results and respective kingdom's single-copy marker annotations -generated from an [Autometa analysis](https://www.github.com/KwanLab/Autometa). If you do not yet have these annotations and are -not sure where to start, I would recommend checking out [Autometa's documentation](https://autometa.readthedocs.io/en/latest/) - -### Example `automappa` command - -```bash -automappa --binning-main --markers -``` - -## Automappa-lite Quickstart using Docker (No installation required) - - To quickly start exploring your data, run the app using a wrapper script that will run the docker image, `evanrees/automappa:latest`, ([available from Dockerhub](https://cloud.docker.com/repository/docker/evanrees/automappa/tags "Automappa Dockerhub Tags")). Now you can skip installation and start binning, examining and describing! Let the microbial exegesis begin! - -### Running with a docker container using `run_automappa.sh` - -A docker wrapper is available to run a docker container of `Automappa`. -The only required input for this script is the autometa main binning output table and the respective markers table. - -```bash -# First retrieve the script: -curl -o run_automappa.sh https://raw.githubusercontent.com/WiscEvan/Automappa/main/docker/run_automappa.sh -# (make it executable) -chmod a+x run_automappa.sh -``` - -Now run automappa on autometa binning results using the downloaded script: `run_automappa.sh`. - -### Start automappa docker container - -***NOTE: This will pull the automappa docker image if it is not already available*** - -```bash -./run_automappa.sh --binning binning.main.tsv --markers binning.markers.tsv -``` - ----------------------------------------------------------------------------------------------------- - -## Install Automappa-lite from source - -### Installation from source (using `make`) - -You can install all of Automappa's dependencies using the Makefile found within the repository. - -#### Clone the Automappa repository - -```bash -cd $HOME -git clone https://github.com/WiscEvan/Automappa.git -cd $HOME/Automappa -``` - -#### First create environment - -```bash -make create_environment -``` - -#### Activate environment - -```bash -source activate automappa -``` - -#### The following will install the automappa entrypoint +### clone the Automappa Repository ```bash -make install +git clone https://github.com/WiscEvan/Automappa ``` -Now that all of the dependencies are installed, you may run the Automappa-lite app on your local machine or on a server. - - -### Listing available `make` commands - -You may also list other available make commands by simply typing `make` with no other arguments. - -```bash -make -``` - -A few examples: - -#### pull docker image +### build images for services used by Automappa ```bash -make docker +make build ``` -#### build docker image - -```bash -make image -``` +### build and run automappa services -## Usage - -Simply provide the `automappa` entrypoint with the main binning file output by Autometa as well as the respective markers file. +NOTE: you can skip `make build` if you’d like, as this command will build and pull any images not available. ```bash -automappa \ - --binning-main \ - --markers +make up ``` ----------------------------------------------------------------------------------------------------- - -## Advanced Usage +> NOTE: If your computer is already using most of its resources, you may need to close +some applications so docker may construct all of the necessary Automappa services -### Full `docker run` command example +### Navigate to Automappa page -```bash -# Set automappa parameters (required) -binning="$HOME/test/binning.main.tsv" -markers="$HOME/test/binning.markers.tsv" +Once you see `automappa_web_1` running from the terminal logs, you should be able to navigate to 🥳 -# Set docker image/container parameters (optional) -localport=8050 -containerport=8886 -imagetag="latest" +### Download Test Data -#NOTE: Some necessary path handling here for binding docker volumes -binning_dirname="$( cd -- "$(dirname "$binning")" >/dev/null 2>&1 ; pwd -P )" -binning_filename=$(basename $binning) -markers_dirname="$( cd -- "$(dirname "$markers")" >/dev/null 2>&1 ; pwd -P )" -markers_filename=$(basename $markers) - -# Run with provided parameters -docker run \ - --publish $localport:$containerport \ - --detach=false \ - -v $binning_dirname:/binning:rw \ - -v $markers_dirname:/markers:ro \ - --rm \ - evanrees/automappa:$imagetag \ - --binning-main /binning/$binning_filename \ - --markers /markers/$markers_filename \ - --port $containerport \ - --host 0.0.0.0 -``` - -## Using a remote Automappa server - -If you'd like to run Automappa on a *remote* server but view the output on your *local* machine, - -### Example remote server login with ssh tunnel - -you first need to login to the remote server with a tunnel, e.g. `ssh -L localport:localhost:serverport user@hostaddress`. - -```bash -#ssh -L localport:127.0.0.1:serverport user@kwan-bioinformatics.pharmacy.wisc.edu -#example -ssh -L 8888:127.0.0.1:8050 sam@kwan-bioinformatics.pharmacy.wisc.edu -``` - -Once you are on the server, simply start the Automappa server (with the appropriate port from the ssh tunnel). - -```bash -automappa \ - --binning-main \ - --markers \ - --port 8050 -``` +Test data to try out Automappa may be downloaded from the google drive in the [Automappa test data folder]() -Navigate to the app view in your browser. +Try out different settings and perform your own refinements on some of this sponge data! -This will correspond to the localport that was passed in upon login to the remote server. -In the previous example above we would navigate to `localhost:8888`. +>NOTE: This dataset was retrieved from: +> +> Uppal, Siddharth, Jackie L. Metz, René K. M. Xavier, Keshav Kumar Nepal, Dongbo Xu, Guojun Wang, and Jason C. Kwan. 2022. “Uncovering Lasonolide A Biosynthesis Using Genome-Resolved Metagenomics.” mBio 13 (5): e0152422. -I've numbered the ports here to help illustrate the network communication. - -| Bridge | Port Bridge | Communication Context | -| :------------- | :------------- | :------------- | -| `localport:remoteport` | `8888:8050` | `local:remote` | - -### Using a remote docker container Automappa server - -To access Automappa through a docker container that is on a remote machine, one additional bridge -must be constructed. - -First we need to forward a port from the server back to our local machine. - -```bash -#ssh -L localport:localhost:serverport user@kwan-bioinformatics.pharmacy.wisc.edu -ssh -L 8888:localhost:8887 sam@kwan-bioinformatics.pharmacy.wisc.edu -``` - -Now run automappa using the docker wrapper script: `run_automappa.sh` - -> NOTE: A wrapper is available for download to run docker with port-forwarding. - -```bash -curl -o $HOME/run_automappa.sh https://raw.githubusercontent.com/WiscEvan/Automappa/main/docker/run_automappa.sh -chmod a+x $HOME/run_automappa.sh -``` - -Now start automappa while setting `--localport` to match the `serverport` (`8887` from above). - -```bash -# NOTE: This will pull the automappa docker image if it is not already available. -$HOME/run_automappa.sh \ - --imagetag main \ - # NOTE: The 'localport' here is referring to the port on the remote - --localport 8887 \ - --containerport 8050 \ - --binning binning.main.tsv \ - --markers binning.markers.tsv -``` - -Now navigate to `http://localhost:8888` and you will see the loaded data. - -I've numbered the ports here to help illustrate the network communication. - -#### Example port forwarding breakdown - -| Server | Port | -| :------------- | :------------- | -| Docker container | 8050 | -| Remote Server | 8887 | -| Local Computer | 8888 | - -#### Note - -- You may change **any** of these values as long as you change the respective value. -- This will be most useful if **multiple users** will need to use the app. - -| Bridge | Port Bridge | Communication Context | -| :------------- | :------------- | :------------- | -| `remoteport:containerport` | `8887:8050` | `remote:docker` | -| `localport:remoteport` | `8888:8887` | `local:remote` | - -e.g. - -- `localhost:8888` <-> `8888:8887` <-> `8887:8050` +Happy binning! -or +## Contributors -- `localhost:localport` <-> `localport:serverport` <-> `serverport:containerport` +![Automappa's Contributors](https://contrib.rocks/image?repo=WiscEvan/Automappa) diff --git a/VERSION b/VERSION index 50aea0e7..e3a4f193 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.0 \ No newline at end of file +2.2.0 \ No newline at end of file diff --git a/automappa/Procfile b/automappa/Procfile index 41135e6a..986d06fe 100644 --- a/automappa/Procfile +++ b/automappa/Procfile @@ -1 +1,2 @@ -web: gunicorn index:server \ No newline at end of file +web: gunicorn app:server --workers 4 +queue: celery --concurrency=2 --app=automappa.tasks.queue worker --loglevel=INFO --uid automappa -E \ No newline at end of file diff --git a/automappa/__main__.py b/automappa/__main__.py index b8fcab02..c5fdc9ec 100644 --- a/automappa/__main__.py +++ b/automappa/__main__.py @@ -1,11 +1,64 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from automappa import index +import argparse +import logging +from automappa import settings +from automappa.components import layout +from automappa.data.database import create_db_and_tables +from automappa.app import app -def main(): - index.main() +logging.basicConfig( + format="[%(levelname)s] %(name)s: %(message)s", + level=logging.DEBUG, +) + +logger = logging.getLogger(__name__) +numba_logger = logging.getLogger("numba") +numba_logger.setLevel(logging.WARNING) +numba_logger.propagate = False +h5py_logger = logging.getLogger("h5py") +h5py_logger.setLevel(logging.WARNING) +h5py_logger.propagate = False +root_logger = logging.getLogger() +root_logger.setLevel(logging.WARNING) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Automappa: An interactive interface for exploration of metagenomes", + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "--storage-type", + help=( + "The type of the web storage. (default: %(default)s)\n" + "- memory: only kept in memory, reset on page refresh.\n" + "- session: data is cleared once the browser quit.\n" + "- local: data is kept after the browser quit.\n" + ), + choices=["memory", "session", "local"], + default="session", + ) + parser.add_argument( + "--clear-store-data", + help=( + "Clear storage data (default: %(default)s)\n" + "(only required if using 'session' or 'local' for `--storage-type`)" + ), + action="store_true", + default=False, + ) + args = parser.parse_args() + + create_db_and_tables() + app.layout = layout.render(app, args.storage_type, args.clear_store_data) + app.run( + host=settings.server.host, + port=settings.server.port, + debug=settings.server.debug, + ) if __name__ == "__main__": diff --git a/automappa/app.py b/automappa/app.py index 7c718371..ee2fa92c 100755 --- a/automappa/app.py +++ b/automappa/app.py @@ -1,10 +1,29 @@ -import dash import dash_bootstrap_components as dbc -app = dash.Dash( +from dash_extensions.enrich import ( + DashProxy, + ServersideOutputTransform, +) +import dash_uploader as du +from automappa.data.database import redis_backend, file_system_backend +from automappa import settings + + +app = DashProxy( name=__name__, title="Automappa", external_stylesheets=[dbc.themes.LUX, dbc.icons.BOOTSTRAP], update_title="Automapping...", + suppress_callback_exceptions=True, + prevent_initial_callbacks=False, + use_pages=True, + pages_folder="", + transforms=[ + ServersideOutputTransform( + default_backend=[file_system_backend], + backends=[redis_backend, file_system_backend], + ), + ], ) -app.config.suppress_callback_exceptions = True + +du.configure_upload(app=app, folder=settings.server.root_upload_folder) diff --git a/automappa/apps/mag_refinement.py b/automappa/apps/mag_refinement.py deleted file mode 100644 index eea91316..00000000 --- a/automappa/apps/mag_refinement.py +++ /dev/null @@ -1,794 +0,0 @@ -# -*- coding: utf-8 -*- - -from typing import Dict, List - -import pandas as pd -import dash_daq as daq - -from dash import dcc, html -from dash.dash_table import DataTable -from dash.dependencies import Input, Output, State -from dash.exceptions import PreventUpdate -from dash_extensions import Download -from dash_extensions.snippets import send_data_frame -from plotly import graph_objects as go - -import dash_bootstrap_components as dbc -import plotly.io as pio - -from automappa.app import app - -from automappa.utils.figures import ( - get_scatterplot_2d, - taxonomy_sankey, - get_scatterplot_3d, - metric_boxplot, -) - -pio.templates.default = "plotly_white" - - -######################################################################## -# COMPONENTS: OFFCANVAS SETTINGS -# ###################################################################### - - -color_by_col_dropdown = [ - html.Label("Contigs colored by:"), - dcc.Dropdown( - id="color-by-column", - options=[], - value="cluster", - clearable=False, - ), -] - -scatterplot_2d_xaxis_dropdown = [ - html.Label("X-axis:"), - dcc.Dropdown( - id="x-axis-2d", - options=[ - {"label": "X_1", "value": "x_1"}, - {"label": "Coverage", "value": "coverage"}, - {"label": "GC%", "value": "gc_content"}, - {"label": "Length", "value": "length"}, - ], - value="x_1", - clearable=False, - ), -] - -scatterplot_2d_yaxis_dropdown = [ - html.Label("Y-axis:"), - dcc.Dropdown( - id="y-axis-2d", - options=[ - {"label": "X_2", "value": "x_2"}, - {"label": "Coverage", "value": "coverage"}, - {"label": "GC%", "value": "gc_content"}, - {"label": "Length", "value": "length"}, - ], - value="x_2", - clearable=False, - ), -] - -scatterplot_3d_zaxis_dropdown = [ - html.Label("Z-axis:"), - dcc.Dropdown( - id="scatterplot-3d-zaxis-dropdown", - options=[ - {"label": "Coverage", "value": "coverage"}, - {"label": "GC%", "value": "gc_content"}, - {"label": "Length", "value": "length"}, - ], - value="coverage", - clearable=False, - ), -] - -taxa_rank_dropdown = [ - html.Label("Distribute taxa by rank:"), - dcc.Dropdown( - id="taxonomy-distribution-dropdown", - options=[ - {"label": "Class", "value": "class"}, - {"label": "Order", "value": "order"}, - {"label": "Family", "value": "family"}, - {"label": "Genus", "value": "genus"}, - {"label": "Species", "value": "species"}, - ], - value="species", - clearable=False, - ), -] - -# Scatterplot 2D Legend Toggle -scatterplot_2d_legend_toggle = daq.ToggleSwitch( - id="show-legend-toggle", - size=40, - color="#c5040d", - label="Legend", - labelPosition="top", - vertical=False, - value=True, -) - -# Scatterplot 3D Legend Toggle -scatterplot_3d_legend_toggle = daq.ToggleSwitch( - id="scatterplot-3d-legend-toggle", - size=40, - color="#c5040d", - label="Legend", - labelPosition="top", - vertical=False, - value=True, -) - -# Download Refinements Button -binning_refinements_download_button = [ - dbc.Button( - "Download Refinements", - id="refinements-download-button", - n_clicks=0, - color="primary", - ), - Download(id="refinements-download"), -] - -# Summarize Refinements Button -binning_refinements_summary_button = [ - dbc.Button( - "Summarize Refinements", - id="refinements-summary-button", - n_clicks=0, - color="primary", - ), -] - - -refinement_settings_offcanvas = dbc.Offcanvas( - [ - dbc.Accordion( - [ - dbc.AccordionItem( - [ - dbc.Row( - [ - dbc.Col(color_by_col_dropdown), - dbc.Col(scatterplot_2d_legend_toggle), - ] - ), - dbc.Row( - [ - dbc.Col(scatterplot_2d_xaxis_dropdown), - dbc.Col(scatterplot_2d_yaxis_dropdown), - ] - ), - ], - title="Figure 1: 2D Metagenome Overview", - ), - dbc.AccordionItem( - [ - dbc.Row( - [ - dbc.Col(scatterplot_3d_zaxis_dropdown), - dbc.Col(scatterplot_3d_legend_toggle), - ] - ), - ], - title="Figure 2: 3D Metagenome Overview", - ), - dbc.AccordionItem( - [ - dbc.Col(taxa_rank_dropdown), - ], - title="Figure 3: Taxonomic Distribution", - ), - ], - start_collapsed=True, - flush=True, - ), - dbc.Row( - [ - dbc.Col(binning_refinements_download_button), - dbc.Col(binning_refinements_summary_button), - ] - ), - ], - id="settings-offcanvas", - title="Settings", - is_open=False, - placement="end", - scrollable=True, -) - -######################################################################## -# COMPONENTS: Buttons and Toggle -# ###################################################################### - -refinement_settings_button = dbc.Button("Settings", id="settings-button", n_clicks=0) - -mag_refinement_save_button = dbc.Button( - "Save selection to MAG refinement", - id="mag-refinement-save-button", - n_clicks=0, - disabled=True, -) - -# Tooltip for info on store selections behavior -hide_selections_tooltip = dbc.Tooltip( - 'Toggling this to the "on" state will hide your manually-curated MAG refinement groups', - target="hide-selections-toggle", - placement="auto", -) - -# add hide selection toggle -hide_selections_toggle = daq.ToggleSwitch( - id="hide-selections-toggle", - size=40, - color="#c5040d", - label="Hide MAG Refinements", - labelPosition="top", - vertical=False, - value=False, -) - -# TODO: Refactor to update scatterplot legend with update marker symbol traces... -marker_symbols_label = html.Pre( - """ -Marker Symbol Circle: 0 Diamond: 2 X: 4 Hexagon: 6 - Count Legend Square: 1 Triangle: 3 Pentagon: 5 Hexagram: 7+ -""" -) - -mag_refinement_buttons = html.Div( - [ - refinement_settings_button, - refinement_settings_offcanvas, - mag_refinement_save_button, - hide_selections_toggle, - hide_selections_tooltip, - marker_symbols_label, - ], - className="d-grid gap-2 d-md-flex justify-content-md-start", -) - -######################################################################## -# COMPONENTS: FIGURES AND TABLES -# ###################################################################### - -# Add metrics as alerts using MIMAG standards -# TODO: Add progress bar to emit MAG curation progress -# See: https://dash-bootstrap-components.opensource.faculty.ai/docs/components/progress -# Color using MIMAG thresholds listed below: -# For current standards see the following links: -# contamination: https://genomicsstandardsconsortium.github.io/mixs/contam_score/ -# completeness: https://genomicsstandardsconsortium.github.io/mixs/compl_score/ -# (success) alert --> passing thresholds (completeness >= 90%, contamination <= 5%) -# (warning) alert --> within 10% thresholds, e.g. (completeness >=80%, contam. <= 15%) -# (danger) alert --> failing thresholds (completeness less than 80%, contam. >15%) -# TODO: Add callbacks for updating `color`, `value` and `label` with computed completeness and purity values -completeness_progress = dbc.Progress(id="mag-refinement-completeness-progress") -purity_progress = dbc.Progress(id="mag-refinement-purity-progress") - - -mag_metrics_table = [ - html.Label("Table 1. MAG Marker Metrics"), - dcc.Loading( - id="loading-mag-metrics-datatable", - children=[html.Div(id="mag-metrics-datatable")], - type="dot", - color="#646569", - ), -] - -scatterplot_2d = [ - html.Label("Figure 1: 2D Metagenome Overview"), - dcc.Loading( - id="loading-scatterplot-2d", - children=[ - dcc.Graph( - id="scatterplot-2d", - clear_on_unhover=True, - config={"displayModeBar": True, "displaylogo": False}, - ) - ], - type="graph", - ), -] - -scatterplot_3d = [ - html.Label("Figure 2: 3D Metagenome Overview"), - dcc.Loading( - id="loading-scatterplot-3d", - children=[ - dcc.Graph( - id="scatterplot-3d", - clear_on_unhover=True, - config={ - "toImageButtonOptions": dict( - format="svg", - filename="figure_2_3D_metagenome_overview", - ), - "displayModeBar": True, - "displaylogo": False, - }, - ) - ], - type="graph", - ), -] - - -taxonomy_figure = [ - html.Label("Figure 3: Taxonomic Distribution"), - dcc.Loading( - id="loading-taxonomy-distribution", - children=[ - dcc.Graph( - id="taxonomy-distribution", - config={ - "displayModeBar": False, - "displaylogo": False, - "staticPlot": True, - }, - ) - ], - type="graph", - ), -] - -mag_refinement_coverage_boxplot = [ - html.Label("Figure 4: MAG Refinement Coverage Boxplot"), - dcc.Loading( - id="loading-mag-refinement-coverage-boxplot", - children=[ - dcc.Graph( - id="mag-refinement-coverage-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="dot", - color="#646569", - ), -] - -mag_refinement_gc_content_boxplot = [ - html.Label("Figure 5: MAG Refinement GC Content Boxplot"), - dcc.Loading( - id="loading-mag-refinement-gc-content-boxplot", - children=[ - dcc.Graph( - id="mag-refinement-gc-content-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="dot", - color="#0479a8", - ), -] - -mag_refinement_length_boxplot = [ - html.Label("Figure 6: MAG Refinement Length Boxplot"), - dcc.Loading( - id="loading-mag-refinement-length-boxplot", - children=[ - dcc.Graph( - id="mag-refinement-length-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="dot", - color="#0479a8", - ), -] - - -refinements_table = dcc.Loading( - id="loading-refinements-table", - children=[html.Div(id="refinements-table")], - type="circle", - color="#646569", -) - - -######################################################################## -# LAYOUT -# ###################################################################### - -# https://dash-bootstrap-components.opensource.faculty.ai/docs/components/layout/ -# For best results, make sure you adhere to the following two rules when constructing your layouts: -# -# 1. Only use Row and Col inside a Container. -# 2. The immediate children of any Row component should always be Col components. -# 3. Your content should go inside the Col components. - -layout = dbc.Container( - children=[ - dbc.Row([dbc.Col(mag_refinement_buttons)]), - dbc.Row( - [dbc.Col(scatterplot_2d, width=9), dbc.Col(mag_metrics_table, width=3)] - ), - # TODO: Add MAG assembly metrics table - dbc.Row([dbc.Col(taxonomy_figure, width=9), dbc.Col(scatterplot_3d, width=3)]), - dbc.Row( - [ - dbc.Col(mag_refinement_coverage_boxplot, width=4), - dbc.Col(mag_refinement_gc_content_boxplot, width=4), - dbc.Col(mag_refinement_length_boxplot, width=4), - ] - ), - dbc.Row([dbc.Col(refinements_table, width=12)]), - ], - fluid=True, -) - - -######################################################################## -# CALLBACKS -# ###################################################################### - - -@app.callback( - Output("settings-offcanvas", "is_open"), - Input("settings-button", "n_clicks"), - [State("settings-offcanvas", "is_open")], -) -def toggle_offcanvas(n1: int, is_open: bool) -> bool: - if n1: - return not is_open - return is_open - - -@app.callback( - Output("color-by-column", "options"), [Input("metagenome-annotations", "data")] -) -def color_by_column_options_callback(annotations_json: "str | None"): - df = pd.read_json(annotations_json, orient="split") - return [ - {"label": col.title().replace("_", " "), "value": col} - for col in df.columns - if df[col].dtype.name not in {"float64", "int64"} and col != "contig" - ] - - -@app.callback( - Output("mag-metrics-datatable", "children"), - [ - Input("markers-store", "data"), - Input("scatterplot-2d", "selectedData"), - ], -) -def update_mag_metrics_datatable_callback( - markers_json: "str | None", selected_contigs: Dict[str, List[Dict[str, str]]] -) -> DataTable: - markers_df = pd.read_json(markers_json, orient="split").set_index("contig") - if selected_contigs: - contigs = {point["text"] for point in selected_contigs["points"]} - selected_contigs_count = len(contigs) - markers_df = markers_df.loc[markers_df.index.isin(contigs)] - - expected_markers_count = markers_df.shape[1] - - pfam_counts = markers_df.sum() - if pfam_counts[pfam_counts.ge(1)].empty: - total_markers = 0 - single_copy_marker_count = 0 - markers_present_count = 0 - redundant_markers_count = 0 - marker_set_count = 0 - completeness = "NA" - purity = "NA" - else: - total_markers = pfam_counts.sum() - single_copy_marker_count = pfam_counts.eq(1).sum() - markers_present_count = pfam_counts.ge(1).sum() - redundant_markers_count = pfam_counts.gt(1).sum() - completeness = markers_present_count / expected_markers_count * 100 - purity = single_copy_marker_count / markers_present_count * 100 - marker_set_count = total_markers / expected_markers_count - - marker_contig_count = markers_df.sum(axis=1).ge(1).sum() - single_marker_contig_count = markers_df.sum(axis=1).eq(1).sum() - multi_marker_contig_count = markers_df.sum(axis=1).gt(1).sum() - metrics_data = { - "Expected Markers": expected_markers_count, - "Total Markers": total_markers, - "Redundant-Markers": redundant_markers_count, - "Markers Count": markers_present_count, - "Marker Sets (Total / Expected)": marker_set_count, - "Marker-Containing Contigs": marker_contig_count, - "Multi-Marker Contigs": multi_marker_contig_count, - "Single-Marker Contigs": single_marker_contig_count, - } - if selected_contigs: - selection_metrics = { - "Contigs": selected_contigs_count, - "Completeness (%)": completeness, - "Purity (%)": purity, - } - selection_metrics.update(metrics_data) - # Adding this extra step b/c to keep selection metrics at top of the table... - metrics_data = selection_metrics - - metrics_df = pd.DataFrame([metrics_data]).T - metrics_df.rename(columns={0: "Value"}, inplace=True) - metrics_df.index.name = "MAG Metric" - metrics_df.reset_index(inplace=True) - metrics_df = metrics_df.round(2) - return DataTable( - data=metrics_df.to_dict("records"), - columns=[{"name": col, "id": col} for col in metrics_df.columns], - style_cell={ - "height": "auto", - # all three widths are needed - "minWidth": "20px", - "width": "20px", - "maxWidth": "20px", - "whiteSpace": "normal", - "textAlign": "center", - }, - # TODO: style completeness and purity cells to MIMAG standards as mentioned above - ) - - -@app.callback( - Output("scatterplot-2d", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("refinement-data", "data"), - Input("contig-marker-symbols-store", "data"), - Input("x-axis-2d", "value"), - Input("y-axis-2d", "value"), - Input("show-legend-toggle", "value"), - Input("color-by-column", "value"), - Input("hide-selections-toggle", "value"), - ], -) -def scatterplot_2d_figure_callback( - annotations: "str | None", - refinement: "str | None", - contig_marker_symbols_json: "str | None", - xaxis_column: str, - yaxis_column: str, - show_legend: bool, - color_by_col: str, - hide_selection_toggle: bool, -) -> go.Figure: - # TODO: #23 refactor scatterplot callbacks - bin_df = pd.read_json(annotations, orient="split").set_index("contig") - markers = pd.read_json(contig_marker_symbols_json, orient="split").set_index( - "contig" - ) - color_by_col = "phylum" if color_by_col not in bin_df.columns else color_by_col - # Subset metagenome-annotations by selections iff selections have been made - bin_df[color_by_col] = bin_df[color_by_col].fillna("unclustered") - if hide_selection_toggle: - refine_df = pd.read_json(refinement, orient="split").set_index("contig") - refine_cols = [col for col in refine_df.columns if "refinement" in col] - if refine_cols: - latest_refine_col = refine_cols.pop() - # Retrieve only contigs that have already been refined... - refined_contigs_index = refine_df[ - refine_df[latest_refine_col].str.contains("refinement") - ].index - bin_df.drop( - refined_contigs_index, axis="index", inplace=True, errors="ignore" - ) - - fig = get_scatterplot_2d( - bin_df, - x_axis=xaxis_column, - y_axis=yaxis_column, - color_by_col=color_by_col, - ) - - # Update markers with symbol and size corresponding to marker count - fig.for_each_trace( - lambda trace: trace.update( - marker_symbol=markers.symbol.loc[trace.text], - marker_size=markers.marker_size.loc[trace.text], - ) - ) - fig.update_layout(showlegend=show_legend) - return fig - - -@app.callback( - Output("taxonomy-distribution", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("scatterplot-2d", "selectedData"), - Input("taxonomy-distribution-dropdown", "value"), - ], -) -def taxonomy_distribution_figure_callback( - annotations: "str | None", - selected_contigs: Dict[str, List[Dict[str, str]]], - selected_rank: str, -) -> go.Figure: - df = pd.read_json(annotations, orient="split") - if selected_contigs: - ctg_list = {point["text"] for point in selected_contigs["points"]} - df = df[df.contig.isin(ctg_list)] - fig = taxonomy_sankey(df, selected_rank=selected_rank) - return fig - - -@app.callback( - Output("scatterplot-3d", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("scatterplot-3d-zaxis-dropdown", "value"), - Input("scatterplot-3d-legend-toggle", "value"), - Input("color-by-column", "value"), - Input("scatterplot-2d", "selectedData"), - ], -) -def scatterplot_3d_figure_callback( - annotations: "str | None", - z_axis: str, - show_legend: bool, - color_by_col: str, - selected_contigs: Dict[str, List[Dict[str, str]]], -) -> go.Figure: - df = pd.read_json(annotations, orient="split") - color_by_col = "phylum" if color_by_col not in df.columns else color_by_col - if not selected_contigs: - contigs = df.contig.tolist() - else: - contigs = {point["text"] for point in selected_contigs["points"]} - # Subset DataFrame by selected contigs - df = df[df.contig.isin(contigs)] - if color_by_col == "cluster": - # Categoricals for binning - df[color_by_col] = df[color_by_col].fillna("unclustered") - else: - # Other possible categorical columns all relate to taxonomy - df[color_by_col] = df[color_by_col].fillna("unclassified") - fig = get_scatterplot_3d( - df=df, - x_axis="x_1", - y_axis="x_2", - z_axis=z_axis, - color_by_col=color_by_col, - ) - fig.update_layout(showlegend=show_legend) - return fig - - -@app.callback( - Output("mag-refinement-coverage-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("scatterplot-2d", "selectedData"), - ], -) -def mag_summary_coverage_boxplot_callback( - df_json: "str | None", selected_data: Dict[str, List[Dict[str, str]]] -) -> go.Figure: - df = pd.read_json(df_json, orient="split") - if not selected_data: - raise PreventUpdate - contigs = {point["text"] for point in selected_data["points"]} - df = df.loc[df.contig.isin(contigs)] - fig = metric_boxplot(df, metrics=["coverage"], boxmean="sd") - return fig - - -@app.callback( - Output("mag-refinement-gc-content-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("scatterplot-2d", "selectedData"), - ], -) -def mag_summary_gc_content_boxplot_callback( - df_json: "str | None", selected_data: Dict[str, List[Dict[str, str]]] -) -> go.Figure: - df = pd.read_json(df_json, orient="split") - if not selected_data: - raise PreventUpdate - contigs = {point["text"] for point in selected_data["points"]} - df = df.loc[df.contig.isin(contigs)] - fig = metric_boxplot(df, metrics=["gc_content"], boxmean="sd") - fig.update_traces(name="GC Content") - return fig - - -@app.callback( - Output("mag-refinement-length-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("scatterplot-2d", "selectedData"), - ], -) -def mag_summary_length_boxplot_callback( - df_json: "str | None", selected_data: Dict[str, List[Dict[str, str]]] -) -> go.Figure: - df = pd.read_json(df_json, orient="split") - if not selected_data: - raise PreventUpdate - contigs = {point["text"] for point in selected_data["points"]} - df = df.loc[df.contig.isin(contigs)] - fig = metric_boxplot(df, metrics=["length"]) - return fig - - -@app.callback( - Output("refinements-table", "children"), - [Input("refinement-data", "data")], -) -def refinements_table_callback(df: "str | None") -> DataTable: - df = pd.read_json(df, orient="split") - return DataTable( - data=df.to_dict("records"), - columns=[{"name": col, "id": col} for col in df.columns], - style_cell={"textAlign": "center"}, - style_cell_conditional=[{"if": {"column_id": "contig"}, "textAlign": "right"}], - virtualization=True, - ) - - -@app.callback( - Output("refinements-download", "data"), - [ - Input("refinements-download-button", "n_clicks"), - Input("refinement-data", "data"), - ], -) -def download_refinements( - n_clicks: int, curated_mags: "str | None" -) -> Dict[str, "str | bool"]: - if not n_clicks: - raise PreventUpdate - df = pd.read_json(curated_mags, orient="split") - return send_data_frame(df.to_csv, "refinements.csv", index=False) - - -@app.callback( - Output("mag-refinement-save-button", "disabled"), - [Input("scatterplot-2d", "selectedData")], -) -def mag_refinement_save_button_disabled_callback( - selected_data: Dict[str, List[Dict[str, str]]] -) -> bool: - return not selected_data - - -@app.callback( - [ - Output("refinement-data", "data"), - Output("mag-refinement-save-button", "n_clicks"), - ], - [ - Input("scatterplot-2d", "selectedData"), - Input("refinement-data", "data"), - Input("mag-refinement-save-button", "n_clicks"), - ], - [ - State("refinement-data", "data"), - ], -) -def store_binning_refinement_selections( - selected_data: Dict[str, List[Dict[str, str]]], - refinement_data: "str | None", - n_clicks: int, - intermediate_selections: "str | None", -) -> "str | None": - # Initial load... - if not selected_data: - bin_df = pd.read_json(refinement_data, orient="split") - return bin_df.to_json(orient="split"), 0 - if not n_clicks or (n_clicks and not selected_data): - raise PreventUpdate - pdf = pd.read_json(intermediate_selections, orient="split").set_index("contig") - refinement_cols = [col for col in pdf.columns if "refinement" in col] - refinement_num = len(refinement_cols) + 1 - group_name = f"refinement_{refinement_num}" - contigs = list({point["text"] for point in selected_data["points"]}) - pdf.loc[contigs, group_name] = group_name - pdf = pdf.fillna(axis="columns", method="ffill") - pdf.reset_index(inplace=True) - return pdf.to_json(orient="split"), 0 diff --git a/automappa/apps/mag_summary.py b/automappa/apps/mag_summary.py deleted file mode 100644 index 65b325c2..00000000 --- a/automappa/apps/mag_summary.py +++ /dev/null @@ -1,509 +0,0 @@ -# -*- coding: utf-8 -*- -from typing import Any, Dict, List -from dash.exceptions import PreventUpdate - -import numpy as np -import pandas as pd - -from autometa.binning.summary import fragmentation_metric, get_metabin_stats - -from dash import dcc, html -from dash.dash_table import DataTable -from dash.dependencies import Input, Output -import dash_bootstrap_components as dbc -from plotly import graph_objects as go -import plotly.io as pio - -from automappa.app import app -from automappa.utils.figures import taxonomy_sankey, metric_boxplot - -pio.templates.default = "plotly_white" - - -######################################################################## -# COMPONENTS: Figures & Tables -######################################################################## - -## Overview figures and table - -mag_overview_metrics_boxplot = dcc.Loading( - id="loading-mag-overview-metrics-boxplot", - children=[ - dcc.Graph( - id="mag-overview-metrics-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="default", - color="#0479a8", -) - -mag_overview_gc_content_boxplot = dcc.Loading( - id="loading-mag-overview-gc-content-boxplot", - children=[ - dcc.Graph( - id="mag-overview-gc-content-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="dot", - color="#646569", -) - -mag_overview_length_boxplot = dcc.Loading( - id="loading-mag-overview-length-boxplot", - children=[ - dcc.Graph( - id="mag-overview-length-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="default", - color="#0479a8", -) - -mag_overview_coverage_boxplot = dcc.Loading( - id="loading-mag-overview-coverage-boxplot", - children=[ - dcc.Graph( - id="mag-overview-coverage-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="dot", - color="#646569", -) - -mag_summary_stats_datatable = [ - html.Label("Table 1. MAGs Summary"), - dcc.Loading( - id="loading-mag-summary-stats-datatable", - children=[html.Div(id="mag-summary-stats-datatable")], - type="circle", - color="#646569", - ), -] - -### Selected MAG figures - -mag_taxonomy_sankey = dcc.Loading( - id="loading-mag-taxonomy-sankey", - children=[dcc.Graph(id="mag-taxonomy-sankey")], - type="graph", -) - -mag_metrics_boxplot = dcc.Loading( - id="loading-mag-metrics-boxplot", - children=[ - dcc.Graph( - id="mag-metrics-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="dot", - color="#646569", -) - -mag_gc_content_boxplot = dcc.Loading( - id="loading-mag-gc-content-boxplot", - children=[ - dcc.Graph( - id="mag-gc-content-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="default", - color="#0479a8", -) - -mag_length_boxplot = dcc.Loading( - id="loading-mag-length-boxplot", - children=[ - dcc.Graph( - id="mag-length-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="dot", - color="#646569", -) - -mag_coverage_boxplot = dcc.Loading( - id="loading-mag-coverage-boxplot", - children=[ - dcc.Graph( - id="mag-coverage-boxplot", - config={"displayModeBar": False, "displaylogo": False}, - ) - ], - type="default", - color="#0479a8", -) - -######################################################################## -# AESTHETHIC COMPONENTS: Dropdowns -######################################################################## - - -mag_summary_cluster_col_dropdown = [ - html.Label("MAG Summary Cluster Column Dropdown"), - dcc.Dropdown( - id="mag-summary-cluster-col-dropdown", - value="cluster", - clearable=False, - ), -] - -mag_selection_dropdown = [ - html.Label("MAG Selection Dropdown"), - dcc.Dropdown(id="mag-selection-dropdown", clearable=True), -] - - -######################################################################## -# LAYOUT -# ###################################################################### - -# https://dash-bootstrap-components.opensource.faculty.ai/docs/components/layout/ -# For best results, make sure you adhere to the following two rules when constructing your layouts: -# -# 1. Only use Row and Col inside a Container. -# 2. The immediate children of any Row component should always be Col components. -# 3. Your content should go inside the Col components. - -# TODO: Markdown Summary Report - -layout = dbc.Container( - [ - dbc.Row( - [ - dbc.Col(mag_overview_metrics_boxplot, width=3), - dbc.Col(mag_overview_gc_content_boxplot, width=3), - dbc.Col(mag_overview_length_boxplot, width=3), - dbc.Col(mag_overview_coverage_boxplot, width=3), - ] - ), - dbc.Row([dbc.Col(mag_summary_cluster_col_dropdown)]), - dbc.Col(mag_summary_stats_datatable), - dbc.Col(mag_selection_dropdown), - dbc.Col(mag_taxonomy_sankey), - dbc.Row( - [ - dbc.Col(mag_metrics_boxplot, width=3), - dbc.Col(mag_gc_content_boxplot, width=3), - dbc.Col(mag_length_boxplot, width=3), - dbc.Col(mag_coverage_boxplot, width=3), - ] - ), - ], - fluid=True, -) - - -######################################################################## -# CALLBACKS -# ###################################################################### - - -@app.callback( - Output("mag-overview-metrics-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - ], -) -def mag_overview_metrics_boxplot_callback( - df_json: "str | None", cluster_col: str -) -> go.Figure: - """ - Writes - Given dataframe as json and cluster column: - Input: - - binning dataframe - - binning column - Returns: - n_unique_bins - number of unique bins - """ - mag_summary_df = pd.read_json(df_json, orient="split") - if cluster_col not in mag_summary_df.columns: - raise PreventUpdate - mag_summary_df = mag_summary_df.dropna(subset=[cluster_col]) - mag_summary_df = mag_summary_df.loc[mag_summary_df[cluster_col].ne("unclustered")] - fig = metric_boxplot(df=mag_summary_df, metrics=["completeness", "purity"]) - return fig - - -@app.callback( - Output("mag-overview-gc-content-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - ], -) -def mag_overview_gc_content_boxplot_callback( - df_json: "str | None", cluster_col: str -) -> go.Figure: - mag_summary_df = pd.read_json(df_json, orient="split") - if cluster_col not in mag_summary_df.columns: - raise PreventUpdate - mag_summary_df = mag_summary_df.dropna(subset=[cluster_col]) - mag_summary_df = mag_summary_df.loc[mag_summary_df[cluster_col].ne("unclustered")] - fig = metric_boxplot(df=mag_summary_df, metrics=["gc_content"]) - fig.update_traces(name="GC Content") - return fig - - -@app.callback( - Output("mag-overview-length-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - ], -) -def mag_overview_length_boxplot_callback( - df_json: "str | None", cluster_col: str -) -> go.Figure: - mag_summary_df = pd.read_json(df_json, orient="split") - if cluster_col not in mag_summary_df.columns: - raise PreventUpdate - mag_summary_df = mag_summary_df.dropna(subset=[cluster_col]) - mag_summary_df = mag_summary_df.loc[mag_summary_df[cluster_col].ne("unclustered")] - fig = metric_boxplot(mag_summary_df, metrics=["length"]) - return fig - - -@app.callback( - Output("mag-overview-coverage-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - ], -) -def mag_overview_coverage_boxplot_callback( - df_json: "str | None", cluster_col: str -) -> go.Figure: - mag_summary_df = pd.read_json(df_json, orient="split") - if cluster_col not in mag_summary_df.columns: - raise PreventUpdate - mag_summary_df = mag_summary_df.dropna(subset=[cluster_col]) - mag_summary_df = mag_summary_df.loc[mag_summary_df[cluster_col].ne("unclustered")] - fig = metric_boxplot(mag_summary_df, metrics=["coverage"]) - return fig - - -@app.callback( - Output("mag-summary-cluster-col-dropdown", "options"), - [Input("metagenome-annotations", "data")], -) -def mag_summary_cluster_col_dropdown_options_callback(df_json): - bin_df = pd.read_json(df_json, orient="split") - return [ - {"label": col.title(), "value": col} - for col in bin_df.columns - if "cluster" in col or "refinement" in col - ] - - -@app.callback( - Output("mag-summary-stats-datatable", "children"), - [ - Input("metagenome-annotations", "data"), - Input("markers-store", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - ], -) -def mag_summary_stats_datatable_callback( - mag_annotations_json, markers_json, cluster_col -): - bin_df = pd.read_json(mag_annotations_json, orient="split") - markers = pd.read_json(markers_json, orient="split").set_index("contig") - if cluster_col not in bin_df.columns: - num_expected_markers = markers.shape[1] - length_weighted_coverage = np.average( - a=bin_df.coverage, weights=bin_df.length / bin_df.length.sum() - ) - length_weighted_gc = np.average( - a=bin_df.gc_content, weights=bin_df.length / bin_df.length.sum() - ) - cluster_pfams = markers[markers.index.isin(bin_df.index)] - pfam_counts = cluster_pfams.sum() - total_markers = pfam_counts.sum() - num_single_copy_markers = pfam_counts[pfam_counts == 1].count() - num_markers_present = pfam_counts[pfam_counts >= 1].count() - stats_df = pd.DataFrame( - [ - { - cluster_col: "metagenome", - "nseqs": bin_df.shape[0], - "size (bp)": bin_df.length.sum(), - "N90": fragmentation_metric(bin_df, quality_measure=0.9), - "N50": fragmentation_metric(bin_df, quality_measure=0.5), - "N10": fragmentation_metric(bin_df, quality_measure=0.1), - "length_weighted_gc_content": length_weighted_gc, - "min_gc_content": bin_df.gc_content.min(), - "max_gc_content": bin_df.gc_content.max(), - "std_gc_content": bin_df.gc_content.std(), - "length_weighted_coverage": length_weighted_coverage, - "min_coverage": bin_df.coverage.min(), - "max_coverage": bin_df.coverage.max(), - "std_coverage": bin_df.coverage.std(), - "num_total_markers": total_markers, - f"num_unique_markers (expected {num_expected_markers})": num_markers_present, - "num_single_copy_markers": num_single_copy_markers, - } - ] - ).convert_dtypes() - else: - stats_df = ( - get_metabin_stats( - bin_df=bin_df.set_index("contig"), - markers=markers, - cluster_col=cluster_col, - ) - .reset_index() - .fillna(0) - ) - return DataTable( - data=stats_df.to_dict("records"), - columns=[ - {"name": col.replace("_", " "), "id": col} for col in stats_df.columns - ], - style_table={"overflowX": "auto"}, - style_cell={ - "height": "auto", - # all three widths are needed - "minWidth": "120px", - "width": "120px", - "maxWidth": "120px", - "whiteSpace": "normal", - }, - fixed_rows={"headers": True}, - ) - - -## Selected MAG callbacks - - -@app.callback( - Output("mag-taxonomy-sankey", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - Input("mag-selection-dropdown", "value"), - ], -) -def mag_taxonomy_sankey_callback( - mag_summary_json: "str | None", cluster_col: str, selected_mag: str -) -> go.Figure: - mag_summary_df = pd.read_json(mag_summary_json, orient="split") - if cluster_col not in mag_summary_df.columns: - raise PreventUpdate - mag_df = mag_summary_df.loc[mag_summary_df[cluster_col].eq(selected_mag)] - fig = taxonomy_sankey(mag_df) - return fig - - -@app.callback( - Output("mag-gc-content-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - Input("mag-selection-dropdown", "value"), - ], -) -def mag_summary_gc_content_boxplot_callback( - df_json: "str | None", cluster_col: str, selected_mag: str -) -> go.Figure: - mag_summary_df = pd.read_json(df_json, orient="split") - if cluster_col not in mag_summary_df.columns: - raise PreventUpdate - mag_summary_df = mag_summary_df.dropna(subset=[cluster_col]) - mag_df = mag_summary_df.loc[mag_summary_df[cluster_col].eq(selected_mag)] - mag_df = mag_df.round(2) - fig = metric_boxplot(df=mag_df, metrics=["gc_content"]) - fig.update_traces(name="GC Content") - return fig - - -@app.callback( - Output("mag-metrics-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - Input("mag-selection-dropdown", "value"), - ], -) -def mag_summary_gc_content_boxplot_callback( - df_json: "str | None", cluster_col: str, selected_mag: str -) -> go.Figure: - mag_summary_df = pd.read_json(df_json, orient="split") - if cluster_col not in mag_summary_df.columns: - raise PreventUpdate - mag_summary_df = mag_summary_df.dropna(subset=[cluster_col]) - mag_df = mag_summary_df.loc[mag_summary_df[cluster_col].eq(selected_mag)] - mag_df = mag_df.round(2) - fig = metric_boxplot(df=mag_df, metrics=["completeness", "purity"]) - return fig - - -@app.callback( - Output("mag-coverage-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - Input("mag-selection-dropdown", "value"), - ], -) -def mag_summary_gc_content_boxplot_callback( - df_json: "str | None", cluster_col: str, selected_mag: str -) -> go.Figure: - mag_summary_df = pd.read_json(df_json, orient="split") - if cluster_col not in mag_summary_df.columns: - raise PreventUpdate - mag_summary_df = mag_summary_df.dropna(subset=[cluster_col]) - mag_df = mag_summary_df.loc[mag_summary_df[cluster_col].eq(selected_mag)] - mag_df = mag_df.round(2) - fig = metric_boxplot(df=mag_df, metrics=["coverage"]) - return fig - - -@app.callback( - Output("mag-length-boxplot", "figure"), - [ - Input("metagenome-annotations", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - Input("mag-selection-dropdown", "value"), - ], -) -def mag_summary_gc_content_boxplot_callback( - df_json: "str | None", cluster_col: str, selected_mag: str -) -> go.Figure: - mag_summary_df = pd.read_json(df_json, orient="split") - if cluster_col not in mag_summary_df.columns: - raise PreventUpdate - mag_summary_df = mag_summary_df.dropna(subset=[cluster_col]) - mag_df = mag_summary_df.loc[mag_summary_df[cluster_col].eq(selected_mag)] - mag_df = mag_df.round(2) - fig = metric_boxplot(df=mag_df, metrics=["length"]) - return fig - - -@app.callback( - Output("mag-selection-dropdown", "options"), - [ - Input("metagenome-annotations", "data"), - Input("mag-summary-cluster-col-dropdown", "value"), - ], -) -def mag_selection_dropdown_options_callback( - mag_annotations_json: "str | None", cluster_col: str -) -> List[Dict[str, str]]: - df = pd.read_json(mag_annotations_json, orient="split") - if cluster_col not in df.columns: - options = [] - else: - options = [ - {"label": cluster, "value": cluster} - for cluster in df[cluster_col].dropna().unique() - ] - return options diff --git a/automappa/app/assets/android-chrome-192x192.png b/automappa/assets/android-chrome-192x192.png similarity index 100% rename from automappa/app/assets/android-chrome-192x192.png rename to automappa/assets/android-chrome-192x192.png diff --git a/automappa/app/assets/android-chrome-512x512.png b/automappa/assets/android-chrome-512x512.png similarity index 100% rename from automappa/app/assets/android-chrome-512x512.png rename to automappa/assets/android-chrome-512x512.png diff --git a/automappa/app/assets/apple-touch-icon.png b/automappa/assets/apple-touch-icon.png similarity index 100% rename from automappa/app/assets/apple-touch-icon.png rename to automappa/assets/apple-touch-icon.png diff --git a/automappa/app/assets/favicon-16x16.png b/automappa/assets/favicon-16x16.png similarity index 100% rename from automappa/app/assets/favicon-16x16.png rename to automappa/assets/favicon-16x16.png diff --git a/automappa/app/assets/favicon-32x32.png b/automappa/assets/favicon-32x32.png similarity index 100% rename from automappa/app/assets/favicon-32x32.png rename to automappa/assets/favicon-32x32.png diff --git a/automappa/app/assets/favicon.ico b/automappa/assets/favicon.ico similarity index 100% rename from automappa/app/assets/favicon.ico rename to automappa/assets/favicon.ico diff --git a/automappa/app/assets/site.webmanifest b/automappa/assets/site.webmanifest similarity index 100% rename from automappa/app/assets/site.webmanifest rename to automappa/assets/site.webmanifest diff --git a/automappa/apps/__init__.py b/automappa/components/__init__.py similarity index 100% rename from automappa/apps/__init__.py rename to automappa/components/__init__.py diff --git a/automappa/components/ids.py b/automappa/components/ids.py new file mode 100644 index 00000000..3bdd1794 --- /dev/null +++ b/automappa/components/ids.py @@ -0,0 +1,130 @@ +# ROOT: stores and pages navigation +PAGES_NAVBAR = "automappa-pages-navbar" +BINNING_MAIN_UPLOAD_STORE = "binning-main-upload-store" +TASK_ID_STORE = "task-id-store" +MARKERS_UPLOAD_STORE = "markers-upload-store" +METAGENOME_UPLOAD_STORE = "metagenome-upload-store" +SAMPLES_STORE = "samples-store" +METAGENOME_ID_STORE = "metagenome-id-store" +SELECTED_TABLES_STORE = "selected-tables-store" +CYTOSCAPE_STORE = "cytoscape-upload-store" +BINNING_STORE = "binning-store" +# HOME +HOME_TAB_ID = "Home" +SELECTED_TABLES_DATATABLE = "selected-tables-datatable" +SAMPLES_DATATABLE = "samples-datatable" +LOADING_SAMPLES_DATATABLE = "loading-samples-datatable" +BINNING_SELECT = "binning-select" +MARKERS_SELECT = "markers-select" +METAGENOME_SELECT = "metagenome-select" +CYTOSCAPE_SELECT = "cytoscape-select" +BINNING_UPLOAD = "binning-upload" +MARKERS_UPLOAD = "markers-upload" +METAGENOME_UPLOAD = "metagenome-upload" +CYTOSCAPE_UPLOAD = "cytoscape-upload" +VALIDATED_UPLOADS_STORE = "validated-uploads-store" +UPLOAD_MODAL = "modal-dismiss" +OPEN_MODAL_BUTTON = "open-dismiss" +CLOSE_MODAL_BUTTON = "close-dismiss" +UPLOAD_STEPPER = "upload-stepper" +UPLOAD_STEPPER_BACK_BUTTON = "upload-stepper-back-button" +UPLOAD_STEPPER_NEXT_BUTTON = "upload-stepper-next-button" +UPLOAD_STEPPER_SUBMIT_BUTTON = "upload-stepper-submit-button" +UPLOADED_DATA_TYPE = "uploaded-data-type" +REFINE_MAGS_BUTTON = "refine-mags-button" +LOADING_SELECTED_TABLES_DATATABLE = "loading-selected-tables-datatable" +EMBEDDING_TASKS = "embedding-tasks" +SAMPLE_NAME_TEXT_INPUT = "sample-name-text-input" +SAMPLE_CARDS_CONTAINER = "sample-cards-container" +SAMPLE_CARD_INDEX = "sample-card-index" +SAMPLE_CARD_TYPE = "sample-card-type" +SAMPLE_CARD_REMOVE_BTN = "sample-card-remove-btn" +SAMPLE_CARD_TASK_ID = "sample-card-task-id" +SAMPLE_CARD_CHIP_TYPE = "sample-card-chip-type" +SAMPLE_CARD_METAGENOME_BADGE_TYPE = "sample-card-metagenome-badge-type" +SAMPLE_CARD_METAGENOME_BADGE_LABEL = "metagenome" +SAMPLE_CARD_BINNING_BADGE_TYPE = "sample-card-binning-badge-type" +SAMPLE_CARD_BINNING_BADGE_LABEL = "contigs" +SAMPLE_CARD_MARKERS_BADGE_TYPE = "sample-card-markers-badge-type" +SAMPLE_CARD_MARKERS_BADGE_LABEL = "markers" +SAMPLE_CARD_CONNECTIONS_BADGE_TYPE = "sample-card-connections-badge-type" +SAMPLE_CARD_CONNECTIONS_BADGE_LABEL = "connections" +BACKGROUND_TASK_BADGE = "background-task-badge" +BACKGROUND_TASK_DIV = "background-task-div" +BACKGROUND_TASK_INTERVAL = "background-task-interval" +NOTIFICATION_TASK_ID = "notification-task-id" +BACKGROUND_TASK_BUTTON = "background-task-button" +BADGE_TASK_STORE = "badge-task-store" +BADGE_STATUS_INTERVAL = "badge-status-interval" +# MAG_REFINEMENT +MAG_REFINEMENT_TAB_ID = "MAG-refinement" +COLOR_BY_COLUMN_DROPDOWN = "color-by-column" +COLOR_BY_COLUMN_DROPDOWN_VALUE_DEFAULT = "cluster" +SCATTERPLOT_2D_LEGEND_TOGGLE = "scatterplot-2d-legend-toggle" +SCATTERPLOT_2D_LEGEND_TOGGLE_VALUE_DEFAULT = True +KMER_SIZE_DROPDOWN = "kmer-size-dropdown" +KMER_SIZE_DROPDOWN_VALUE_DEFAULT = 5 +NORM_METHOD_DROPDOWN = "norm-method-dropdown" +NORM_METHOD_DROPDOWN_VALUE_DEFAULT = "am_clr" +AXES_2D_DROPDOWN = "axes-2d-dropdown" +AXES_2D_DROPDOWN_VALUE_DEFAULT = "coverage|gc_content" +COVERAGE_RANGE_SLIDER = "coverage-range-slider" +SCATTERPLOT_3D_LEGEND_TOGGLE = "scatterplot-3d-legend-toggle" +SCATTERPLOT_3D_LEGEND_TOGGLE_VALUE_DEFAULT = True +SCATTERPLOT_3D_ZAXIS_DROPDOWN = "scatterplot-3d-zaxis-dropdown" +SCATTERPLOT_3D_ZAXIS_DROPDOWN_VALUE_DEFAULT = "length" +TAXONOMY_DISTRIBUTION_DROPDOWN = "taxonomy-distribution-dropdown" +TAXONOMY_DISTRIBUTION_DROPDOWN_VALUE_DEFAULT = "species" +REFINEMENTS_DOWNLOAD_BUTTON = "refinements-download-button" +REFINEMENTS_CLEAR_BUTTON = "refinements-clear-button" +REFINEMENTS_CLEARED_NOTIFICATION = "refinements-cleared-notification" +REFINEMENTS_NOTIFICATION = "refinements-notification" +REFINEMENTS_DOWNLOAD = "refinements-download" +REFINEMENTS_SUMMARY_BUTTON = "refinements-summary-button" +SETTINGS_OFFCANVAS = "settings-offcanvas" +MAG_REFINEMENTS_SAVE_BUTTON = "mag-refinement-save-button" +HIDE_SELECTIONS_TOGGLE = "hide-selections-toggle" +HIDE_SELECTIONS_TOGGLE_VALUE_DEFAULT = False +LOADING_MAG_METRICS_DATATABLE = "loading-mag-metrics-datatable" +MAG_METRICS_DATATABLE = "mag-metrics-datatable" +LOADING_SCATTERPLOT_2D = "loading-scatterplot-2d" +SCATTERPLOT_2D_FIGURE = "scatterplot-2d-figure" +LOADING_SCATTERPLOT_3D = "loading-scatterplot-3d" +SCATTERPLOT_3D = "scatterplot-3d" +LOADING_TAXONOMY_DISTRIBUTION = "loading-taxonomy-distribution" +TAXONOMY_DISTRIBUTION = "taxonomy-distribution" +LOADING_MAG_REFINEMENT_COVERAGE_BOXPLOT = "loading-mag-refinement-coverage-boxplot" +MAG_REFINEMENT_COVERAGE_BOXPLOT = "mag-refinement-coverage-boxplot" +LOADING_MAG_REFINEMENT_GC_CONTENT_BOXPLOT = "loading-mag-refinement-gc-content-boxplot" +MAG_REFINEMENT_GC_CONTENT_BOXPLOT = "mag-refinement-gc-content-boxplot" +LOADING_MAG_REFINEMENT_LENGTH_BOXPLOT = "loading-mag-refinement-length-boxplot" +MAG_REFINEMENT_LENGTH_BOXPLOT = "mag-refinement-length-boxplot" +LOADING_REFINEMENTS_TABLE = "loading-refinements-table" +REFINEMENTS_TABLE = "refinements-table" +SETTINGS_BUTTON = "settings-button" +LOADING_CONTIG_CYTOSCAPE = "loading-contig-cytoscape" +CONTIG_CYTOSCAPE = "contig-cytoscape" +# MAG SUMMARY +MAG_SUMMARY_TAB_ID = "MAG-summary" +MAG_OVERVIEW_METRICS_BOXPLOT = "mag-overview-metrics-boxplot" +LOADING_MAG_OVERVIEW_METRICS_BOXPLOT = "loading-mag-overview-metrics-boxplot" +LOADING_MAG_OVERVIEW_GC_CONTENT_BOXPLOT = "loading-mag-overview-gc-content-boxplot" +MAG_OVERVIEW_GC_CONTENT_BOXPLOT = "mag-overview-gc-content-boxplot" +LOADING_MAG_OVERVIEW_LENGTH_BOXPLOT = "loading-mag-overview-length-boxplot" +MAG_OVERVIEW_LENGTH_BOXPLOT = "mag-overview-length-boxplot" +LOADING_MAG_OVERVIEW_COVERAGE_BOXPLOT = "loading-mag-overview-coverage-boxplot" +MAG_OVERVIEW_COVERAGE_BOXPLOT = "mag-overview-coverage-boxplot" +LOADING_MAG_SUMMARY_STATS_DATATABLE = "loading-mag-summary-stats-datatable" +MAG_SUMMARY_STATS_DATATABLE = "mag-summary-stats-datatable" +LOADING_MAG_TAXONOMY_SANKEY = "loading-mag-taxonomy-sankey" +MAG_TAXONOMY_SANKEY = "mag-taxonomy-sankey" +LOADING_MAG_METRICS_BARPLOT = "loading-mag-metrics-barplot" +MAG_METRICS_BARPLOT = "mag-metrics-barplot" +LOADING_MAG_GC_CONTENT_BOXPLOT = "loading-mag-gc-content-boxplot" +MAG_GC_CONTENT_BOXPLOT = "mag-gc-content-boxplot" +LOADING_MAG_LENGTH_BOXPLOT = "loading-mag-length-boxplot" +MAG_LENGTH_BOXPLOT = "mag-length-boxplot" +LOADING_MAG_COVERAGE_BOXPLOT = "loading-mag-coverage-boxplot" +MAG_COVERAGE_BOXPLOT = "mag-coverage-boxplot" +MAG_SUMMARY_CLUSTER_COL_DROPDOWN = "mag-summary-cluster-col-dropdown" +MAG_SELECTION_DROPDOWN = "mag-selection-dropdown" diff --git a/automappa/components/layout.py b/automappa/components/layout.py new file mode 100644 index 00000000..98bc9e6b --- /dev/null +++ b/automappa/components/layout.py @@ -0,0 +1,94 @@ +import logging +import dash + +from typing import Literal +from dash_extensions.enrich import DashProxy, html +import dash_mantine_components as dmc + +from automappa.components import ( + metagenome_id_store, + tasks_store, + pages_navbar, +) + +from automappa.pages.home.source import HomeDataSource +from automappa.pages.mag_refinement.source import RefinementDataSource +from automappa.pages.mag_summary.source import SummaryDataSource + +from automappa.pages.home.layout import render as render_home_layout +from automappa.pages.mag_refinement.layout import render as render_mag_refinement_layout +from automappa.pages.mag_summary.layout import render as render_mag_summary_layout +from automappa.pages.not_found_404 import render as render_not_found_404 + +logger = logging.getLogger(__name__) + + +def render( + app: DashProxy, + storage_type: Literal["memory", "session", "local"] = "session", + clear_data: bool = False, +) -> html.Div: + home_data_source = HomeDataSource() + home_page = render_home_layout(source=home_data_source) + home_page.register( + app=app, + module=home_page.name, + **dict( + name=home_page.name, + description=home_page.description, + title=home_page.title, + icon=home_page.icon, + top_nav=True, + order=0, + path="/", + redirect_from=["/home"], + ) + ) + refinement_source = RefinementDataSource() + mag_refinement_page = render_mag_refinement_layout(source=refinement_source) + mag_refinement_page.register( + app=app, + module=mag_refinement_page.name, + **dict( + name=mag_refinement_page.name, + description=mag_refinement_page.description, + title=mag_refinement_page.title, + icon=mag_refinement_page.icon, + top_nav=False, + order=1, + ) + ) + summary_source = SummaryDataSource() + mag_summary_page = render_mag_summary_layout(source=summary_source) + mag_summary_page.register( + app=app, + module=mag_summary_page.name, + **dict( + name=mag_summary_page.name, + description=mag_summary_page.description, + title=mag_summary_page.title, + icon=mag_summary_page.icon, + top_nav=False, + order=2, + ) + ) + not_found_404_page = render_not_found_404() + not_found_404_page.register( + app=app, + module=not_found_404_page.name, + ) + + # Setup main app layout. + return dmc.MantineProvider( + dmc.NotificationsProvider( + dmc.Container( + [ + metagenome_id_store.render(app, storage_type, clear_data), + tasks_store.render(app, storage_type, clear_data), + pages_navbar.render(), + dash.page_container, + ], + fluid=True, + ) + ) + ) diff --git a/automappa/components/metagenome_id_store.py b/automappa/components/metagenome_id_store.py new file mode 100644 index 00000000..12277b79 --- /dev/null +++ b/automappa/components/metagenome_id_store.py @@ -0,0 +1,45 @@ +from typing import Dict, List, Literal +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import ( + dcc, + DashProxy, + State, + Input, + Serverside, + Output, + ALL, +) +from automappa.components import ids +from automappa.data.database import redis_backend + + +def render( + app: DashProxy, + storage_type: Literal["memory", "session", "local"] = "session", + clear_data: bool = False, +) -> dcc.Store: + @app.callback( + Output(ids.METAGENOME_ID_STORE, "data"), + Input( + {"type": ids.SAMPLE_CARD_TYPE, ids.SAMPLE_CARD_INDEX: ALL}, + "withBorder", + ), + State({"type": ids.SAMPLE_CARD_TYPE, ids.SAMPLE_CARD_INDEX: ALL}, "id"), + prevent_initial_call=True, + ) + def update_metagenome_id( + sample_cards_borders: List[str], sample_cards_ids: List[Dict[str, str]] + ) -> int: + if not any(sample_cards_borders): + raise PreventUpdate + sample_card_index = [ + i for i, border in enumerate(sample_cards_borders) if border + ][0] + metagenome_id = sample_cards_ids[sample_card_index].get(ids.SAMPLE_CARD_INDEX) + return Serverside(metagenome_id, backend=redis_backend) + + return dcc.Store( + id=ids.METAGENOME_ID_STORE, + storage_type=storage_type, + clear_data=clear_data, + ) diff --git a/automappa/components/pages_navbar.py b/automappa/components/pages_navbar.py new file mode 100644 index 00000000..4c000345 --- /dev/null +++ b/automappa/components/pages_navbar.py @@ -0,0 +1,57 @@ +import dash_mantine_components as dmc +from dash_iconify import DashIconify +import dash +from dash_extensions.enrich import html + + +def render() -> html.Div: + # @app.callback( + # Output(ids.PAGES_NAVBAR, "children"), + # ) + # def update_disabled_navlinks(): + # ... + + logo = html.Img(src=dash.get_asset_url("favicon.ico"), height="30px") + brand = dmc.Anchor( + [logo, " Automappa"], + href="https://github.com/WiscEvan/Automappa", + target="_blank", + underline=False, + color="dark", + size="md", + transform="capitalize", + weight=550, + ) + link_group = dmc.Group( + [ + dmc.NavLink( + label=dmc.Text(page["name"], align="center", weight=500), + href=page["path"], + icon=DashIconify(icon=page["icon"], height=25), + variant="subtle", + color="gray", + id=page["name"], + ) + for page in dash.page_registry.values() + if page["module"] != "not_found_404" + ], + position="apart", + grow=True, + spacing="xs", + ) + + header = dmc.Header( + dmc.Grid( + children=[ + dmc.Col(brand, span=2, style={"textAlign": "center"}), + dmc.Col(link_group, span=10), + ], + justify="space-around", + align="center", + gutter="xl", + ), + height=55, + style={"backgroundColor": "#FFFFFF"}, + zIndex=99999999, + ) + return html.Div(header) diff --git a/automappa/components/samples_store.py b/automappa/components/samples_store.py new file mode 100644 index 00000000..ae571d9f --- /dev/null +++ b/automappa/components/samples_store.py @@ -0,0 +1,84 @@ +# -*- coding: utf-8 -*- + +import logging +from typing import Literal +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import Input, Output, State, DashProxy, dcc, Serverside +import pandas as pd + +from automappa.data.loader import ( + get_uploaded_files_table, +) +from automappa.components import ids +from automappa.data.database import redis_backend + +logger = logging.getLogger(__name__) + + +def render( + app: DashProxy, + storage_type: Literal["memory", "session", "local"] = "session", + clear_data: bool = False, +) -> dcc.Store: + @app.callback( + Output(ids.SAMPLES_STORE, "data"), + [ + Input(ids.BINNING_MAIN_UPLOAD_STORE, "modified_timestamp"), + Input(ids.MARKERS_UPLOAD_STORE, "modified_timestamp"), + Input(ids.METAGENOME_UPLOAD_STORE, "modified_timestamp"), + Input(ids.CYTOSCAPE_STORE, "modified_timestamp"), + ], + [ + State(ids.BINNING_MAIN_UPLOAD_STORE, "data"), + State(ids.MARKERS_UPLOAD_STORE, "data"), + State(ids.METAGENOME_UPLOAD_STORE, "data"), + State(ids.CYTOSCAPE_STORE, "data"), + State(ids.SAMPLES_STORE, "data"), + ], + ) + def on_upload_stores_data( + binning_uploads_timestamp: str, + markers_uploads_timestamp: str, + metagenome_uploads_timestamp: str, + cytoscape_uploads_timestamp: str, + binning_uploads_df: pd.DataFrame, + markers_uploads_df: pd.DataFrame, + metagenome_uploads_df: pd.DataFrame, + cytoscape_uploads_df: pd.DataFrame, + samples_store_data_df: pd.DataFrame, + ): + if ( + binning_uploads_df is None + and markers_uploads_df is None + and metagenome_uploads_df is None + and cytoscape_uploads_df is None + ) or ( + binning_uploads_timestamp is None + and markers_uploads_timestamp is None + and metagenome_uploads_timestamp is None + and cytoscape_uploads_timestamp is None + ): + # Check if db has any samples in table + uploaded_files_df = get_uploaded_files_table() + if not uploaded_files_df.empty: + return Serverside(uploaded_files_df, backend=redis_backend) + raise PreventUpdate + samples_df = pd.concat( + [ + binning_uploads_df, + markers_uploads_df, + metagenome_uploads_df, + cytoscape_uploads_df, + samples_store_data_df, + ] + ).drop_duplicates(subset=["table_id"]) + logger.debug( + f"{samples_df.shape[0]:,} samples retrieved from data upload stores" + ) + return Serverside(samples_df, backend=redis_backend) + + return dcc.Store( + id=ids.SAMPLES_STORE, + storage_type=storage_type, + clear_data=clear_data, + ) diff --git a/automappa/components/selected_tables_store.py b/automappa/components/selected_tables_store.py new file mode 100644 index 00000000..2e83e52f --- /dev/null +++ b/automappa/components/selected_tables_store.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- + +import itertools +import logging +from typing import Literal +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import Input, Output, DashProxy, Serverside, dcc +from automappa.data.source import SampleTables +from automappa.components import ids +from automappa.tasks import ( + preprocess_clusters_geom_medians, + preprocess_embeddings, + preprocess_marker_symbols, +) +from automappa.data.database import redis_backend + +logger = logging.getLogger(__name__) + + +def render( + app: DashProxy, + storage_type: Literal["memory", "session", "local"] = "session", + clear_data: bool = False, +) -> dcc.Store: + @app.callback( + Output(ids.SELECTED_TABLES_STORE, "data"), + [ + Input(ids.REFINE_MAGS_BUTTON, "n_clicks"), + Input(ids.BINNING_SELECT, "value"), + Input(ids.MARKERS_SELECT, "value"), + Input(ids.METAGENOME_SELECT, "value"), + Input(ids.CYTOSCAPE_SELECT, "value"), + ], + ) + def on_refine_mags_button_click( + n: int, + binning_select_value: str, + markers_select_value: str, + metagenome_select_value: str, + cytoscape_select_value: str, + ): + if n is None: + raise PreventUpdate + tables_dict = {} + if metagenome_select_value is not None: + tables_dict["metagenome"] = {"id": metagenome_select_value} + if binning_select_value is not None: + tables_dict.update( + { + "binning": {"id": binning_select_value}, + "refinements": { + "id": binning_select_value.replace("-binning", "-refinement") + }, + } + ) + if markers_select_value is not None: + tables_dict["markers"] = {"id": markers_select_value} + if cytoscape_select_value is not None: + tables_dict["cytoscape"] = {"id": cytoscape_select_value} + + sample = SampleTables(**tables_dict) + + # BEGIN task-queue submissions + # TODO Refactor to separate bg-task submission (Tasks should be methods for specific components datasources) + # TODO Show table of running tasks for user to monitor... + # TODO Monitor tasks progress with dcc.Interval in another callback... + + # TASK: compute marker symbols + # if sample.binning and sample.markers: + # marker_symbols_task = preprocess_marker_symbols.delay( + # sample.binning.id, sample.markers.id + # ) + + # TASK: compute k-mer freq. embeddings + # NOTE: Possibly use transfer list component to allow user to select which embeddings they want to compute + # https://www.dash-mantine-components.com/components/transferlist + # if sample.metagenome: + # embedding_tasks = [] + # # kmer_sizes = set([kmer_table.size for kmer_table in sample.kmers]) + # kmer_sizes = set([5]) + # # norm_methods = set([kmer_table.norm_method for kmer_table in sample.kmers]) + # norm_methods = set(["am_clr"]) + # embed_methods = set( + # [kmer_table.embed_method for kmer_table in sample.kmers] + # ) + # embed_methods = ["umap", "densmap", "bhsne"] + # for kmer_size, norm_method in itertools.product(kmer_sizes, norm_methods): + # embeddings_task = preprocess_embeddings( + # metagenome_table=sample.metagenome.id, + # kmer_size=kmer_size, + # norm_method=norm_method, + # embed_methods=embed_methods, + # ) + # embedding_tasks.append(embeddings_task) + # TASK: compute geometric medians from cluster assignments + # if sample.binning: + # clusters_geom_medians_task = preprocess_clusters_geom_medians.delay( + # sample.binning.id, "cluster" + # ) + # END task-queue submissions + return Serverside(sample, backend=redis_backend) + + return dcc.Store( + id=ids.SELECTED_TABLES_STORE, + storage_type=storage_type, + clear_data=clear_data, + ) diff --git a/automappa/components/tasks_store.py b/automappa/components/tasks_store.py new file mode 100644 index 00000000..d478aa49 --- /dev/null +++ b/automappa/components/tasks_store.py @@ -0,0 +1,15 @@ +from typing import Literal +from dash_extensions.enrich import dcc, DashProxy +from automappa.components import ids + + +def render( + app: DashProxy, + storage_type: Literal["memory", "session", "local"] = "session", + clear_data: bool = False, +) -> dcc.Store: + return dcc.Store( + id=ids.TASK_ID_STORE, + storage_type=storage_type, + clear_data=clear_data, + ) diff --git a/automappa/utils/tasks.py b/automappa/conf/__init__.py similarity index 100% rename from automappa/utils/tasks.py rename to automappa/conf/__init__.py diff --git a/automappa/conf/celeryconfig.py b/automappa/conf/celeryconfig.py new file mode 100644 index 00000000..a6779639 --- /dev/null +++ b/automappa/conf/celeryconfig.py @@ -0,0 +1,7 @@ +broker_connection_retry_on_startup = True +worker_send_task_events = True +task_send_sent_event = True +worker_prefetch_multiplier = 1 +worker_concurrency = 2 +task_track_started = True +imports = ("automappa.pages.home.tasks", "automappa.tasks") diff --git a/automappa/conf/rabbitmq.conf b/automappa/conf/rabbitmq.conf new file mode 100644 index 00000000..3faeaae0 --- /dev/null +++ b/automappa/conf/rabbitmq.conf @@ -0,0 +1,5 @@ +# Default is 30 mins... +# 1 hour in millisecs +# consumer_timeout = 3600000 +# 4 hours in millisecs +consumer_timeout = 14400000 diff --git a/automappa/data/__init__.py b/automappa/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automappa/data/database.py b/automappa/data/database.py new file mode 100644 index 00000000..afd66f86 --- /dev/null +++ b/automappa/data/database.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +from typing import List +from sqlmodel import SQLModel, create_engine + +from dash_extensions.enrich import RedisBackend, FileSystemBackend + +from automappa import settings + +# SQL DATABASE file +# sqlite_url = f"sqlite:///database.db" +# engine = create_engine(sqlite_url, echo=False) + +# SQL in-memory +# sqlite_url = f"sqlite:///:memory:" +# engine = create_engine(sqlite_url, echo=False) + +# POSTGRES DATABASE +engine = create_engine( + url=settings.db.url, + pool_size=settings.db.pool_size, + pool_pre_ping=settings.db.pool_pre_ping, +) + +redis_backend = RedisBackend( + **dict(host=settings.redis.host, port=settings.redis.port, db=settings.redis.db), + # password=settings.redis.password, +) +file_system_backend = FileSystemBackend(cache_dir=settings.server.root_upload_folder) + + +def create_db_and_tables() -> None: + SQLModel.metadata.create_all(engine) + + +def get_table_names() -> List[str]: + return SQLModel.metadata.tables.keys() + + +def main(): + create_db_and_tables() + + +if __name__ == "__main__": + main() diff --git a/automappa/data/loader.py b/automappa/data/loader.py new file mode 100644 index 00000000..824956f4 --- /dev/null +++ b/automappa/data/loader.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python +# DataLoader for Autometa results ingestion +import logging +from pathlib import Path +import uuid +import pandas as pd + +from functools import partial, reduce +from typing import Callable, List, Optional, Union + +from Bio import SeqIO +from sqlmodel import Session, select, SQLModel + +from automappa.data.schemas import ContigSchema, CytoscapeConnectionSchema, MarkerSchema + +from automappa.settings import server +from automappa.data.database import ( + create_db_and_tables, + engine, + get_table_names, +) +from automappa.data.models import ( + Contig, + Marker, + Metagenome, + CytoscapeConnection, + Refinement, +) + +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) +numba_logger = logging.getLogger("numba") +numba_logger.setLevel(logging.WARNING) +numba_logger.propagate = False +h5py_logger = logging.getLogger("h5py") +h5py_logger.setLevel(logging.WARNING) +h5py_logger.propagate = False + + +Preprocessor = Callable[[pd.DataFrame], pd.DataFrame] + + +def compose(*functions: Preprocessor) -> Preprocessor: + return reduce(lambda f, g: lambda x: g(f(x)), functions) + + +def sqlmodel_to_df(objects: List[SQLModel], set_index: bool = True) -> pd.DataFrame: + """Converts SQLModel objects into a Pandas DataFrame. + + From https://github.com/tiangolo/sqlmodel/issues/215#issuecomment-1092348993 + + Usage + ---------- + df = sqlmodel_to_df(list_of_sqlmodels) + Parameters + ---------- + :param objects: List[SQLModel]: List of SQLModel objects to be converted. + :param set_index: bool: Sets the first column, usually the primary key, to dataframe index. + """ + + records = [obj.dict() for obj in objects] + columns = list(objects[0].schema()["properties"].keys()) + df = pd.DataFrame.from_records(records, columns=columns) + return df.set_index(columns[0]) if set_index else df + + +def validate_uploader( + is_completed: bool, filenames: List[str], upload_id: uuid.UUID +) -> Union[Path, None]: + """Ensure only one file was uploaded and create Path to uploaded file + + Parameters + ---------- + is_completed : bool + Whether or not the upload has finished + filenames : list + list of filenames + upload_id : uuid + unique user id associated with upload + + Returns + ------- + Path + Server-side path to uploaded file + + Raises + ------ + ValueError + You may only upload one file at a time! + """ + if not is_completed: + return + if filenames is None: + return + if upload_id: + root_folder = server.root_upload_folder / upload_id + else: + root_folder = server.root_upload_folder + + uploaded_files = [] + for filename in filenames: + file = root_folder / filename + uploaded_files.append(file) + if len(uploaded_files) > 1: + raise ValueError("You may only upload one file at a time!") + return uploaded_files[0] + + +def create_metagenome( + name: str, fpath: Optional[str], contigs: Optional[List[Contig]] +) -> Metagenome: + logger.info(f"Adding metagenome from {fpath} to db") + if not contigs: + contigs = [ + Contig(header=record.id, seq=str(record.seq)) + for record in SeqIO.parse(fpath, "fasta") + ] + else: + pass + # Need to ensure Seq column is in contigs otherwise add them + # contig_seq_df = pd.DataFrame( + # [ + # dict(header=record.id, seq=str(record.seq)) + # for record in SeqIO.parse(metagenome_fpath, "fasta") + # ] + # ) + # merge_seq_column = partial(add_seq_column, seqrecord_df=contig_seq_df) + metagenome = Metagenome(name=name, contigs=contigs) + with Session(engine) as session: + session.add(metagenome) + session.commit() + session.refresh(metagenome) + return metagenome + + +def read_metagenome(metagenome_id: int) -> Metagenome: + with Session(engine) as session: + metagenomes = session.exec( + select(Metagenome).where(Metagenome.id == metagenome_id) + ).first() + return metagenomes + + +def read_metagenomes() -> List[Metagenome]: + with Session(engine) as session: + metagenomes = session.exec(select(Metagenome)).all() + return metagenomes + + +def update_metagenomes() -> None: + raise NotImplemented + + +def delete_metagenomes() -> None: + raise NotImplemented + + +def rename_class_column_to_klass(df: pd.DataFrame) -> pd.DataFrame: + return df.rename(columns={ContigSchema.CLASS: ContigSchema.KLASS}) + + +def rename_contig_column_to_header(df: pd.DataFrame) -> pd.DataFrame: + return df.rename(columns={ContigSchema.CONTIG: ContigSchema.HEADER}) + + +def replace_cluster_na_values_with_unclustered(df: pd.DataFrame) -> pd.DataFrame: + return df.fillna(value={ContigSchema.CLUSTER: "unclustered"}) + + +def add_seq_column(df: pd.DataFrame, seqrecord_df: pd.DataFrame) -> pd.DataFrame: + return pd.merge(df, seqrecord_df, on=ContigSchema.HEADER, how="left") + + +def add_markers_column(df: pd.DataFrame, markers_list_df: pd.DataFrame) -> pd.DataFrame: + return pd.merge( + df, + markers_list_df, + left_on=ContigSchema.HEADER, + right_on=MarkerSchema.CONTIG, + how="left", + ) + + +def load_contigs(fpath: str) -> pd.DataFrame: + logger.info(f"Loading contigs: {fpath}") + return pd.read_table( + fpath, + dtype={ + ContigSchema.CONTIG: str, + ContigSchema.CLUSTER: str, + ContigSchema.COMPLETENESS: float, + ContigSchema.PURITY: float, + ContigSchema.COVERAGE_STDDEV: float, + ContigSchema.GC_CONTENT_STDDEV: float, + ContigSchema.COVERAGE: float, + ContigSchema.GC_CONTENT: float, + ContigSchema.LENGTH: int, + ContigSchema.SUPERKINGDOM: str, + ContigSchema.PHYLUM: str, + ContigSchema.CLASS: str, + ContigSchema.ORDER: str, + ContigSchema.FAMILY: str, + ContigSchema.GENUS: str, + ContigSchema.SPECIES: str, + ContigSchema.TAXID: int, + ContigSchema.X_1: float, + ContigSchema.X_2: float, + }, + ) + + +async def create_contigs(fpath: str, markers: Optional[List[Marker]]) -> None: + logger.info(f"Adding binned contigs from {fpath} to db") + raw_data = load_contigs(fpath) + if markers: + merge_markers_column = partial(add_markers_column, markers_list_df=markers) + preprocessor = compose( + rename_class_column_to_klass, + rename_contig_column_to_header, + replace_cluster_na_values_with_unclustered, + merge_markers_column, + ) + data = preprocessor(raw_data) + contigs = df_to_sqlmodel(data, Contig) + contig_df = preprocessor(raw_data) + nonmarker_contigs_mask = contig_df.markers.isna() + nonmarker_contigs = df_to_sqlmodel( + contig_df.loc[nonmarker_contigs_mask].drop(columns=["markers"]), Contig + ) + marker_contigs = df_to_sqlmodel(contig_df.loc[~nonmarker_contigs_mask], Contig) + contigs = nonmarker_contigs + marker_contigs + + with Session(engine) as session: + session.add_all(contigs) + session.commit(contigs) + session.refresh(contigs) + await contigs + + +def read_contigs(headers: List[str] = []) -> List[Contig]: + statement = select(Contig) + if headers: + statement = statement.where(Contig.header.in_(headers)) + with Session(engine) as session: + contigs = session.exec(statement).all() + return contigs + + +def update_contigs(): + raise NotImplemented + + +def delete_contig(contig: Contig) -> None: + with Session(engine) as session: + session.delete(contig) + session.commit() + + +def rename_qname_column_to_orf(df: pd.DataFrame) -> pd.DataFrame: + return df.rename(columns={MarkerSchema.QNAME: MarkerSchema.ORF}) + + +def drop_contig_column(df: pd.DataFrame) -> pd.DataFrame: + return df.drop(columns=[MarkerSchema.CONTIG]) + + +def agg_to_markers_list_column(df: pd.DataFrame) -> pd.DataFrame: + return ( + df.set_index(MarkerSchema.CONTIG) + .apply(func=lambda row: Marker(**row), axis=1, result_type="reduce", raw=False) + .to_frame(name="markers") + .reset_index() + .groupby(MarkerSchema.CONTIG) + .agg({"markers": lambda x: x.tolist()}) + ) + + +def load_markers(fpath: str) -> pd.DataFrame: + logger.info(f"Loading markers: {fpath}") + return pd.read_table( + fpath, + usecols=[ + MarkerSchema.CONTIG, + MarkerSchema.QNAME, + MarkerSchema.SNAME, + MarkerSchema.SACC, + MarkerSchema.FULL_SEQ_SCORE, + MarkerSchema.CUTOFF, + ], + dtype={ + MarkerSchema.CONTIG: str, + MarkerSchema.QNAME: str, + MarkerSchema.SNAME: str, + MarkerSchema.SACC: str, + MarkerSchema.FULL_SEQ_SCORE: float, + MarkerSchema.CUTOFF: float, + }, + ) + + +async def create_markers(fpath: str) -> List[Marker]: + logger.info(f"Adding markers from {fpath} to db") + raw_data = load_markers(fpath) + preprocessor = compose(rename_qname_column_to_orf, drop_contig_column) + data = preprocessor(raw_data) + markers = [Marker(**row) for row in data.to_dict("records")] + with Session(engine) as session: + session.add_all(markers) + session.commit() + session.refresh(markers) + return markers + + +def read_markers(contig_headers: List[str] = None) -> List[Marker]: + statement = select(Marker) + if contig_headers: + statement = statement.where(Marker.contig.header.in_(contig_headers)) + with Session(engine) as session: + markers = session.exec(statement).all() + return markers + + +def update_markers() -> None: + raise NotImplemented + + +def delete_markers() -> None: + raise NotImplemented + + +def load_cytoscape_connections(fpath: str) -> pd.DataFrame: + logger.info(f"Loading cytoscape connections: {fpath}") + return pd.read_table( + fpath, + low_memory=False, + usecols=[ + CytoscapeConnectionSchema.NODE1, + CytoscapeConnectionSchema.INTERACTION, + CytoscapeConnectionSchema.NODE2, + CytoscapeConnectionSchema.CONNECTIONS, + CytoscapeConnectionSchema.MAPPINGTYPE, + CytoscapeConnectionSchema.NAME, + CytoscapeConnectionSchema.CONTIGLENGTH, + ], + dtype={ + CytoscapeConnectionSchema.NODE1: str, + CytoscapeConnectionSchema.INTERACTION: int, + CytoscapeConnectionSchema.NODE2: str, + CytoscapeConnectionSchema.CONNECTIONS: int, + CytoscapeConnectionSchema.MAPPINGTYPE: str, # Literal['intra', 'ss', 'se', 'ee'] + # Below are commented as these are missing when mapping type != intra + # causing pd.read_table(...) to fail... + # CytoscapeConnectionSchema.NAME: str, + # CytoscapeConnectionSchema.CONTIGLENGTH: int, + }, + ) + + +def create_cytoscape_connections(fpath: str) -> None: + logger.info(f"Adding cytoscape connections from {fpath} to db") + cyto_df = load_cytoscape_connections(fpath) + cytoscape_connections = [ + CytoscapeConnection(**record) for record in cyto_df.to_dict("records") + ] + with Session(engine) as session: + session.add_all(cytoscape_connections) + session.commit() + + +def read_cytoscape_connections() -> List[CytoscapeConnection]: + logger.info("Reading cytoscape connections...") + with Session(engine) as session: + results = session.exec(select(CytoscapeConnection)).all() + return results + + +def update_cytoscape_connection() -> None: + raise NotImplemented + + +def delete_cytoscape_connection(connection: CytoscapeConnection) -> None: + with Session(engine) as session: + session.delete(connection) + session.commit() + + +def create_sample_metagenome( + name: str, + metagenome_fpath: str, + binning_fpath: str, + markers_fpath: str, + connections_fpath: Optional[str] = None, +) -> Metagenome: + logger.info(f"Creating Metagenome {name=}") + raw_markers = load_markers(markers_fpath) + marker_preprocessor = compose( + rename_qname_column_to_orf, agg_to_markers_list_column + ) + contig_markers_df = marker_preprocessor(raw_markers) + + contig_seq_df = pd.DataFrame( + [ + dict(header=record.id, seq=str(record.seq)) + for record in SeqIO.parse(metagenome_fpath, "fasta") + ] + ) + merge_seq_column = partial(add_seq_column, seqrecord_df=contig_seq_df) + merge_markers_column = partial( + add_markers_column, markers_list_df=contig_markers_df + ) + contig_preprocessor = compose( + rename_class_column_to_klass, + rename_contig_column_to_header, + replace_cluster_na_values_with_unclustered, + merge_seq_column, + merge_markers_column, + ) + raw_binning = load_contigs(binning_fpath) + contig_df = contig_preprocessor(raw_binning) + nonmarker_contigs_mask = contig_df.markers.isna() + nonmarker_contigs = [ + Contig(**record) + for record in contig_df.loc[nonmarker_contigs_mask] + .drop(columns=["markers"]) + .to_dict("records") + ] + marker_contigs = [ + Contig(**record) + for record in contig_df.loc[~nonmarker_contigs_mask].to_dict("records") + ] + contigs = nonmarker_contigs + marker_contigs + # Add cytoscape connection mapping if available + if connections_fpath: + connections_df = load_cytoscape_connections(connections_fpath) + connections = [ + CytoscapeConnection(**record) + for record in connections_df.to_dict("records") + ] + else: + connections = [] + + metagenome = Metagenome( + name=name, contigs=contigs, connections=connections, refinements=[] + ) + with Session(engine) as session: + session.add(metagenome) + session.commit() + session.refresh(metagenome) + return metagenome + + +def create_initial_refinements(metagenome_id: int) -> None: + """Initialize Contig.refinements for contigs with Contig.cluster values + + Parameters + ---------- + metagenome_id : int + Metagenome.id value corresponding to Contigs + """ + clusters_stmt = ( + select([Contig.cluster]) + .where( + Contig.metagenome_id == metagenome_id, + Contig.cluster != None, + Contig.cluster != "nan", + Contig.cluster != "unclustered", + ) + .distinct() + ) + with Session(engine) as session: + clusters = session.exec(clusters_stmt).all() + + for cluster in clusters: + contigs_stmt = select(Contig).where(Contig.cluster == cluster) + contigs = session.exec(contigs_stmt).all() + + refinement = Refinement( + contigs=contigs, + outdated=False, + initial_refinement=True, + metagenome_id=metagenome_id, + ) + session.add(refinement) + session.commit() + + +def main(): + # init database and tables + create_db_and_tables() + table_names = get_table_names() + print(f"db table names: {', '.join(table_names)}") + + # CRUD sample (this will take some time with the connection mapping...) + # NOTE: Create two samples for testing... + sponge_mg = create_sample_metagenome( + name="lasonolide", + metagenome_fpath="data/lasonolide/metagenome.filtered.fna", + binning_fpath="data/lasonolide/binning.tsv", + markers_fpath="data/lasonolide/bacteria.markers.tsv", + # connections_fpath="data/lasonolide/cytoscape.connections.tab", + ) + create_initial_refinements(sponge_mg.id) + nubbins_mg = create_sample_metagenome( + name="nubbins", + metagenome_fpath="data/nubbins/scaffolds.fasta", + binning_fpath="data/nubbins/nubbins.tsv", + markers_fpath="data/nubbins/bacteria.markers.tsv", + ) + create_initial_refinements(nubbins_mg.id) + + +if __name__ == "__main__": + main() diff --git a/automappa/data/models.py b/automappa/data/models.py new file mode 100644 index 00000000..75df5cd6 --- /dev/null +++ b/automappa/data/models.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +from typing import List, Optional +from sqlmodel import Field, Relationship, SQLModel +from datetime import datetime, timezone + + +def utc_now() -> datetime: + return datetime.now(tz=timezone.utc) + + +class Metagenome(SQLModel, table=True): + id: Optional[int] = Field(default=None, primary_key=True) + name: str + contigs: List["Contig"] = Relationship(back_populates="metagenome") + refinements: List["Refinement"] = Relationship(back_populates="metagenome") + connections: List["CytoscapeConnection"] = Relationship(back_populates="metagenome") + + +class ContigRefinementLink(SQLModel, table=True): + refinement_id: Optional[int] = Field( + default=None, foreign_key="refinement.id", primary_key=True + ) + contig_id: Optional[int] = Field( + default=None, foreign_key="contig.id", primary_key=True + ) + + +class Refinement(SQLModel, table=True): + id: Optional[int] = Field(default=None, primary_key=True) + timestamp: datetime = Field(default=utc_now(), index=True) + outdated: bool = False + initial_refinement: bool = False + contigs: List["Contig"] = Relationship( + back_populates="refinements", link_model=ContigRefinementLink + ) + metagenome_id: Optional[int] = Field(default=None, foreign_key="metagenome.id") + metagenome: Optional[Metagenome] = Relationship(back_populates="refinements") + + +class Contig(SQLModel, table=True): + id: Optional[int] = Field(default=None, primary_key=True) + header: str = Field(index=True) + seq: Optional[str] + cluster: Optional[str] = Field(index=True) + completeness: Optional[float] + purity: Optional[float] + coverage_stddev: Optional[float] + gc_content_stddev: Optional[float] + coverage: Optional[float] = Field(index=True) + gc_content: Optional[float] + length: Optional[int] + superkingdom: Optional[str] + phylum: Optional[str] + klass: Optional[str] + order: Optional[str] + family: Optional[str] + genus: Optional[str] + species: Optional[str] + taxid: Optional[int] + x_1: Optional[float] + x_2: Optional[float] + marker_symbol: Optional[str] + marker_size: Optional[int] + refinements: Optional[List[Refinement]] = Relationship( + back_populates="contigs", link_model=ContigRefinementLink + ) + metagenome_id: Optional[int] = Field(default=None, foreign_key="metagenome.id") + metagenome: Optional[Metagenome] = Relationship(back_populates="contigs") + markers: Optional[List["Marker"]] = Relationship(back_populates="contig") + + +class Marker(SQLModel, table=True): + id: Optional[int] = Field(default=None, primary_key=True) + orf: str # qname + sacc: str + sname: str + full_seq_score: float = Field(index=True) + cutoff: float = Field(index=True) + contig_id: Optional[int] = Field(default=None, foreign_key="contig.id") + contig: Contig = Relationship(back_populates="markers") + + +class CytoscapeConnection(SQLModel, table=True): + __tablename__ = "cytoscape_connection" + id: Optional[int] = Field(default=None, primary_key=True) + node1: str + interaction: int + node2: str + connections: int + mappingtype: str # Literal["intra", "ss", "se", "ee"] + name: Optional[str] + contiglength: Optional[int] + metagenome_id: Optional[int] = Field(default=None, foreign_key="metagenome.id") + metagenome: Optional[Metagenome] = Relationship(back_populates="connections") diff --git a/automappa/data/schemas.py b/automappa/data/schemas.py new file mode 100644 index 00000000..670ecb41 --- /dev/null +++ b/automappa/data/schemas.py @@ -0,0 +1,47 @@ +class MetagenomeSchema: + pass + + +class ContigSchema: + CONTIG = "contig" + HEADER = "header" + CLUSTER = "cluster" + COMPLETENESS = "completeness" + PURITY = "purity" + COVERAGE_STDDEV = "coverage_stddev" + GC_CONTENT_STDDEV = "gc_content_stddev" + COVERAGE = "coverage" + GC_CONTENT = "gc_content" + LENGTH = "length" + SUPERKINGDOM = "superkingdom" + DOMAIN = "domain" + PHYLUM = "phylum" + CLASS = "class" + KLASS = "klass" + ORDER = "order" + FAMILY = "family" + GENUS = "genus" + SPECIES = "species" + TAXID = "taxid" + X_1 = "x_1" + X_2 = "x_2" + + +class MarkerSchema: + QNAME = "qname" + ORF = "orf" + SACC = "sacc" + SNAME = "sname" + FULL_SEQ_SCORE = "full_seq_score" + CUTOFF = "cutoff" + CONTIG = "contig" + + +class CytoscapeConnectionSchema: + NODE1 = "node1" + INTERACTION = "interaction" + NODE2 = "node2" + CONNECTIONS = "connections" + MAPPINGTYPE = "mappingtype" + NAME = "name" + CONTIGLENGTH = "contiglength" diff --git a/automappa/index.py b/automappa/index.py deleted file mode 100755 index 33fda173..00000000 --- a/automappa/index.py +++ /dev/null @@ -1,211 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import argparse -import os -import logging - -from dash.dependencies import Input, Output -from dash import dcc, html -import dash_bootstrap_components as dbc -import pandas as pd - -from autometa.common.markers import load as load_markers - -from automappa.utils.markers import ( - convert_marker_counts_to_marker_symbols, - get_contig_marker_counts, -) - -from automappa.apps import mag_refinement, mag_summary -from automappa.app import app - -logging.basicConfig( - format="[%(levelname)s] %(name)s: %(message)s", - level=logging.DEBUG, -) - -logger = logging.getLogger(__name__) - - - -@app.callback(Output("tab-content", "children"), [Input("tabs", "active_tab")]) -def render_content(active_tab): - if active_tab == "mag_refinement": - return mag_refinement.layout - elif active_tab == "mag_summary": - return mag_summary.layout - else: - return active_tab - - -def main(): - parser = argparse.ArgumentParser( - description="Automappa: An interactive interface for exploration of metagenomes", - formatter_class=argparse.RawTextHelpFormatter, - ) - parser.add_argument( - "--binning-main", - help="Path to --binning-main output of Autometa binning/recruitment results", - type=str, - metavar="filepath", - required=True, - ) - parser.add_argument( - "--markers", - help="Path to Autometa-formatted markers table (may be taxon-specific)", - type=str, - metavar="filepath", - required=True, - ) - parser.add_argument( - "--fasta", - help="Path to metagenome.fasta", - type=str, - metavar="filepath", - required=False, - ) - parser.add_argument( - "--port", - help="port to expose. (default: %(default)s)", - default=8050, - type=int, - metavar="number", - ) - parser.add_argument( - "--host", - help="host ip address to expose. (default: %(default)s)", - type=str, - default="0.0.0.0", - metavar="ip address", - ) - parser.add_argument( - "--storage-type", - help=( - "The type of the web storage. (default: %(default)s)\n" - "- memory: only kept in memory, reset on page refresh.\n" - "- session: data is cleared once the browser quit.\n" - "- local: data is kept after the browser quit. (Currently not supported)\n" - ), - choices=["memory", "session"], - default="session", - ) - parser.add_argument( - "--clear-store-data", - help=( - "Clear storage data (default: %(default)s)\n" - "(only required if using 'session' or 'local' for `--storage-type`)" - ), - action="store_true", - default=False, - ) - parser.add_argument( - "--debug", - help="Turn on debug mode", - action="store_true", - default=False, - ) - args = parser.parse_args() - - logger.info("Please wait a moment while all of the data is loaded.") - # Needed separately for binning refinement selections. - binning = pd.read_csv(args.binning_main, sep="\t", low_memory=False) - # Needed for completeness/purity calculations - markers = load_markers(args.markers).reset_index().copy() - - # Check dataset size for dcc.Store(...) with browser limits... - # For details see: https://stackoverflow.com/a/61018107 and https://arty.name/localstorage.html - chrome_browser_quota = 5200000 - dataset_chars = len(binning.to_json(orient="split")) - if dataset_chars >= chrome_browser_quota: - logger.warning(f"{args.binning_main} exceeds browser storage limits ({dataset_chars} > {chrome_browser_quota}).") - logger.warning("Persisting refinements is DISABLED!") - - # Metagenome Annotations Store - metagenome_annotations_store = dcc.Store( - id="metagenome-annotations", - storage_type=args.storage_type, - data=binning.to_json(orient="split"), - clear_data=args.clear_store_data, - ) - - # Kingdom Markers Store - markers_store = dcc.Store( - id="markers-store", - storage_type=args.storage_type, - data=markers.to_json(orient="split"), - clear_data=args.clear_store_data, - ) - # MAG Refinement Data Store - # NOTE: MAG refinement columns are enumerated (1-indexed) and prepended with 'refinement_' - if "cluster" not in binning.columns: - binning["cluster"] = "unclustered" - else: - binning["cluster"].fillna("unclustered", inplace=True) - - binning_cols = [ - col - for col in binning.columns - if "refinement_" in col or "cluster" in col or "contig" in col - ] - - refinement_data_store = dcc.Store( - id="refinement-data", - storage_type=args.storage_type, - data=binning[binning_cols].to_json(orient="split"), - clear_data=args.clear_store_data, - ) - - # Contig Marker Symbols Store - contig_marker_counts = get_contig_marker_counts( - binning.set_index("contig"), markers.set_index("contig") - ) - contig_marker_symbols = convert_marker_counts_to_marker_symbols( - contig_marker_counts - ).reset_index() - - contig_marker_symbols_store = dcc.Store( - id="contig-marker-symbols-store", - storage_type=args.storage_type, - data=contig_marker_symbols.to_json(orient="split"), - clear_data=args.clear_store_data, - ) - - if args.clear_store_data: - logger.info(f"Store data cleared. Now re-run automappa *without* --clear-store-data") - exit() - - logger.info(f"binning shape:\t\t{binning.shape}") - logger.info(f"markers shape:\t\t{markers.shape}") - logger.info( - "Data loaded. It may take a minute or two to construct all interactive graphs..." - ) - - refinement_tab = dbc.Tab(label="MAG Refinement", tab_id="mag_refinement") - summary_tab = dbc.Tab(label="MAG Summary", tab_id="mag_summary") - - app.layout = dbc.Container( - [ - dbc.Col(markers_store), - dbc.Col(metagenome_annotations_store), - dbc.Col(refinement_data_store), - dbc.Col(contig_marker_symbols_store), - # Navbar - dbc.Tabs( - id="tabs", children=[refinement_tab, summary_tab], className="nav-fill" - ), - html.Div(id="tab-content"), - ], - fluid=True, - ) - - # TODO: Replace cli inputs (as well as updating title once file is uploaded...) - # dcc.Upload(id='metagenome-annotations-upload', children=dbc.Button("Upload annotations")) - # dcc.Upload(id='markers-upload', children=dbc.Button("Upload annotations")) - sample_name = os.path.basename(args.binning_main).replace(" ", "_").split(".")[0] - app.title = f"Automappa: {sample_name}" - app.run_server(host=args.host, port=args.port, debug=args.debug) - - -if __name__ == "__main__": - main() diff --git a/automappa/pages/__init__.py b/automappa/pages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automappa/pages/home/__init__.py b/automappa/pages/home/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automappa/pages/home/components/__init__.py b/automappa/pages/home/components/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automappa/pages/home/components/binning_select.py b/automappa/pages/home/components/binning_select.py new file mode 100644 index 00000000..1a761906 --- /dev/null +++ b/automappa/pages/home/components/binning_select.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +import logging +from typing import Dict, List +import pandas as pd + +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import Input, Output, State, DashProxy, html +import dash_mantine_components as dmc +from dash_iconify import DashIconify + +from automappa.components import ids + + +logger = logging.getLogger(__name__) + + +def render(app: DashProxy) -> html.Div: + @app.callback( + Output(ids.BINNING_SELECT, "data"), + [Input(ids.SAMPLES_STORE, "data")], + State(ids.SAMPLES_STORE, "data"), + ) + def binning_select_options( + samples_df: pd.DataFrame, new_samples_df: pd.DataFrame + ) -> List[Dict[str, str]]: + if samples_df is None or samples_df.empty: + raise PreventUpdate + if new_samples_df is not None: + samples_df = pd.concat([samples_df, new_samples_df]).drop_duplicates( + subset=["table_id"] + ) + df = samples_df.loc[samples_df.filetype.eq("binning")] + logger.debug(f"{df.shape[0]:,} binning available for mag_refinement") + return [ + { + "label": filename, + "value": table_id, + } + for filename, table_id in zip(df.filename.tolist(), df.table_id.tolist()) + ] + + return html.Div( + dmc.Select( + id=ids.BINNING_SELECT, + label="Binning", + placeholder="Select binning annotations", + icon=[DashIconify(icon="ph:chart-scatter-bold")], + rightSection=[DashIconify(icon="radix-icons:chevron-down")], + persistence=True, + persistence_type="session", + ) + ) diff --git a/automappa/pages/home/components/binning_upload.py b/automappa/pages/home/components/binning_upload.py new file mode 100644 index 00000000..4ea86365 --- /dev/null +++ b/automappa/pages/home/components/binning_upload.py @@ -0,0 +1,61 @@ +import logging +from typing import List, Protocol +from uuid import UUID +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import DashProxy, Output, Input, State, dcc, html +import dash_uploader as du +from automappa.components import ids + + +logger = logging.getLogger(__name__) + + +class UploadDataSource(Protocol): + def validate_uploader_path( + self, is_completed: bool, filenames: List[str], upload_id: UUID + ) -> str: + ... + + +def render(app: DashProxy, source: UploadDataSource) -> html.Div: + @app.callback( + Output(ids.BINNING_MAIN_UPLOAD_STORE, "data"), + Input(ids.BINNING_UPLOAD, "isCompleted"), + State(ids.BINNING_UPLOAD, "fileNames"), + State(ids.BINNING_UPLOAD, "upload_id"), + prevent_initial_call=True, + ) + def on_binning_main_upload( + is_completed: bool, filenames: List[str], upload_id: UUID + ): + try: + filepath = source.validate_uploader_path(is_completed, filenames, upload_id) + except ValueError as err: + logger.warn(err) + raise PreventUpdate + if not filepath: + raise PreventUpdate + return filepath + + return html.Div( + [ + dcc.Store( + id=ids.BINNING_MAIN_UPLOAD_STORE, + storage_type="session", + clear_data=False, + ), + du.Upload( + id=ids.BINNING_UPLOAD, + text="Drag and Drop or Select binning-main file", + default_style={ + "width": "100%", + "borderWidth": "1px", + "borderStyle": "dashed", + "borderRadius": "5px", + "margin": "10px", + }, + max_files=1, + max_file_size=10240, + ), + ] + ) diff --git a/automappa/pages/home/components/cytoscape_connections_select.py b/automappa/pages/home/components/cytoscape_connections_select.py new file mode 100644 index 00000000..6f8fcaef --- /dev/null +++ b/automappa/pages/home/components/cytoscape_connections_select.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +import logging +from typing import Dict, List +import pandas as pd + +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import Input, Output, State, DashProxy, html +import dash_mantine_components as dmc +from dash_iconify import DashIconify +from automappa.components import ids + + +logger = logging.getLogger(__name__) + + +def render(app: DashProxy) -> html.Div: + @app.callback( + Output(ids.CYTOSCAPE_SELECT, "data"), + [Input(ids.SAMPLES_STORE, "data")], + State(ids.SAMPLES_STORE, "data"), + ) + def cytoscape_select_options( + samples_df: pd.DataFrame, new_samples_df: pd.DataFrame + ) -> List[Dict[str, str]]: + if samples_df is None or samples_df.empty: + raise PreventUpdate + if new_samples_df is not None: + samples_df = pd.concat([samples_df, new_samples_df]).drop_duplicates( + subset=["table_id"] + ) + + if samples_df.empty: + raise PreventUpdate + + df = samples_df.loc[samples_df.filetype.eq("cytoscape")] + logger.debug(f"{df.shape[0]:,} cytoscapes available for mag_refinement") + return [ + { + "label": filename, + "value": table_id, + } + for filename, table_id in zip(df.filename.tolist(), df.table_id.tolist()) + ] + + return html.Div( + dmc.Select( + id=ids.CYTOSCAPE_SELECT, + label="Cytoscape connections", + placeholder="Select cytoscape connections annotations", + icon=[DashIconify(icon="bx:network-chart")], + rightSection=[DashIconify(icon="radix-icons:chevron-down")], + persistence=True, + persistence_type="session", + ) + ) diff --git a/automappa/pages/home/components/cytoscape_connections_upload.py b/automappa/pages/home/components/cytoscape_connections_upload.py new file mode 100644 index 00000000..bee1232b --- /dev/null +++ b/automappa/pages/home/components/cytoscape_connections_upload.py @@ -0,0 +1,60 @@ +import logging +from typing import List, Protocol +from uuid import UUID +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import DashProxy, Input, Output, State, dcc, html +import dash_uploader as du +from automappa.components import ids + +logger = logging.getLogger(__name__) + + +class UploadDataSource(Protocol): + def validate_uploader_path( + self, is_completed: bool, filenames: List[str], upload_id: UUID + ) -> str: + ... + + +def render(app: DashProxy, source: UploadDataSource) -> html.Div: + @app.callback( + Output(ids.CYTOSCAPE_STORE, "data"), + Input(ids.CYTOSCAPE_UPLOAD, "isCompleted"), + State(ids.CYTOSCAPE_UPLOAD, "fileNames"), + State(ids.CYTOSCAPE_UPLOAD, "upload_id"), + prevent_initial_call=True, + ) + def cytoscape_uploader_callback( + is_completed: bool, filenames: List[str], upload_id: UUID + ): + try: + filepath = source.validate_uploader_path(is_completed, filenames, upload_id) + except ValueError as err: + logger.warn(err) + raise PreventUpdate + if not filepath: + raise PreventUpdate + return filepath + + return html.Div( + [ + dcc.Store( + id=ids.CYTOSCAPE_STORE, + storage_type="session", + clear_data=False, + ), + du.Upload( + id=ids.CYTOSCAPE_UPLOAD, + text="Drag and Drop or Select cytoscape contig connections file", + default_style={ + "width": "100%", + "borderWidth": "1px", + "borderStyle": "dashed", + "borderRadius": "5px", + "margin": "10px", + }, + max_files=1, + max_file_size=10240, + ), + ] + ) diff --git a/automappa/pages/home/components/markers_select.py b/automappa/pages/home/components/markers_select.py new file mode 100644 index 00000000..7fd0e31f --- /dev/null +++ b/automappa/pages/home/components/markers_select.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +import logging +from typing import Dict, List +import pandas as pd + +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import Input, Output, State, DashProxy, html +import dash_mantine_components as dmc +from dash_iconify import DashIconify + +from automappa.components import ids + + +logger = logging.getLogger(__name__) + + +def render(app: DashProxy) -> html.Div: + @app.callback( + Output(ids.MARKERS_SELECT, "data"), + [Input(ids.SAMPLES_STORE, "data")], + State(ids.SAMPLES_STORE, "data"), + ) + def markers_select_options( + samples_df: pd.DataFrame, new_samples_df: pd.DataFrame + ) -> List[Dict[str, str]]: + if samples_df is None or samples_df.empty: + raise PreventUpdate + if new_samples_df is not None: + samples_df = pd.concat([samples_df, new_samples_df]).drop_duplicates( + subset=["table_id"] + ) + + markers_samples = samples_df.loc[samples_df.filetype.eq("markers")] + logger.debug( + f"{markers_samples.shape[0]:,} markers available for mag_refinement" + ) + return [ + { + "label": filename, + "value": table_id, + } + for filename, table_id in zip( + markers_samples.filename.tolist(), markers_samples.table_id.tolist() + ) + ] + + return html.Div( + dmc.Select( + id=ids.MARKERS_SELECT, + label="Markers", + placeholder="Select marker annotations", + icon=[DashIconify(icon="line-md:document-report")], + rightSection=[DashIconify(icon="radix-icons:chevron-down")], + persistence=True, + persistence_type="session", + ) + ) diff --git a/automappa/pages/home/components/markers_upload.py b/automappa/pages/home/components/markers_upload.py new file mode 100644 index 00000000..c6fdd0d3 --- /dev/null +++ b/automappa/pages/home/components/markers_upload.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +import logging +from typing import List, Protocol +from uuid import UUID +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import Input, Output, State, DashProxy, dcc, html +from automappa.components import ids + +import dash_uploader as du + +logger = logging.getLogger(__name__) + + +class UploadDataSource(Protocol): + def validate_uploader_path( + self, is_completed: bool, filenames: List[str], upload_id: UUID + ) -> str: + ... + + +def render(app: DashProxy, source: UploadDataSource) -> html.Div: + @app.callback( + Output(ids.MARKERS_UPLOAD_STORE, "data"), + Input(ids.MARKERS_UPLOAD, "isCompleted"), + State(ids.MARKERS_UPLOAD, "fileNames"), + State(ids.MARKERS_UPLOAD, "upload_id"), + prevent_initial_call=True, + ) + def on_markers_upload(is_completed: bool, filenames: List[str], upload_id: UUID): + try: + filepath = source.validate_uploader_path(is_completed, filenames, upload_id) + except ValueError as err: + logger.warn(err) + raise PreventUpdate + if not filepath: + raise PreventUpdate + return filepath + + return html.Div( + [ + dcc.Store( + id=ids.MARKERS_UPLOAD_STORE, + storage_type="session", + clear_data=False, + ), + du.Upload( + id=ids.MARKERS_UPLOAD, + text="Drag and Drop or Select marker annotations file", + default_style={ + "width": "100%", + "borderWidth": "1px", + "borderStyle": "dashed", + "borderRadius": "5px", + "margin": "10px", + }, + max_files=1, + # 10240 MB = 10GB + max_file_size=10240, + ), + ] + ) diff --git a/automappa/pages/home/components/metagenome_select.py b/automappa/pages/home/components/metagenome_select.py new file mode 100644 index 00000000..c65590c4 --- /dev/null +++ b/automappa/pages/home/components/metagenome_select.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +import logging +from typing import Dict, List +import pandas as pd + +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import Input, Output, State, DashProxy, html +import dash_mantine_components as dmc +from dash_iconify import DashIconify +from automappa.components import ids + + +logger = logging.getLogger(__name__) + + +def render(app: DashProxy) -> html.Div: + @app.callback( + Output(ids.METAGENOME_SELECT, "data"), + [Input(ids.SAMPLES_STORE, "data")], + State(ids.SAMPLES_STORE, "data"), + ) + def metagenome_select_options( + samples_df: pd.DataFrame, new_samples_df: pd.DataFrame + ) -> List[Dict[str, str]]: + if samples_df is None or samples_df.empty: + raise PreventUpdate + if new_samples_df is not None: + samples_df = pd.concat([samples_df, new_samples_df]).drop_duplicates( + subset=["table_id"] + ) + + df = samples_df.loc[samples_df.filetype.eq("metagenome")] + logger.debug(f"{df.shape[0]:,} metagenomes available for mag_refinement") + return [ + { + "label": filename, + "value": table_id, + } + for filename, table_id in zip(df.filename.tolist(), df.table_id.tolist()) + ] + + return html.Div( + dmc.Select( + id=ids.METAGENOME_SELECT, + label="Metagenome", + placeholder="Select metagenome annotations", + icon=[DashIconify(icon="ph:dna-bold")], + rightSection=[DashIconify(icon="radix-icons:chevron-down")], + persistence=True, + persistence_type="session", + ) + ) diff --git a/automappa/pages/home/components/metagenome_upload.py b/automappa/pages/home/components/metagenome_upload.py new file mode 100644 index 00000000..e26e5623 --- /dev/null +++ b/automappa/pages/home/components/metagenome_upload.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +import logging +from typing import List, Protocol +from uuid import UUID +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import Input, Output, State, DashProxy, dcc, html +import dash_uploader as du + +from automappa.components import ids + +logger = logging.getLogger(__name__) + + +class UploadDataSource(Protocol): + def validate_uploader_path( + self, is_completed: bool, filenames: List[str], upload_id: UUID + ) -> str: + ... + + +def render(app: DashProxy, source: UploadDataSource) -> html.Div: + @app.callback( + Output(ids.METAGENOME_UPLOAD_STORE, "data"), + Input(ids.METAGENOME_UPLOAD, "isCompleted"), + State(ids.METAGENOME_UPLOAD, "fileNames"), + State(ids.METAGENOME_UPLOAD, "upload_id"), + prevent_initial_call=True, + ) + def on_metagenome_upload(is_completed: bool, filenames: List[str], upload_id: UUID): + if not is_completed: + raise PreventUpdate + try: + filepath = source.validate_uploader_path(is_completed, filenames, upload_id) + except ValueError as err: + logger.warn(err) + raise PreventUpdate + if not filepath: + raise PreventUpdate + return filepath + + return html.Div( + [ + dcc.Store( + id=ids.METAGENOME_UPLOAD_STORE, + storage_type="session", + clear_data=False, + ), + du.Upload( + id=ids.METAGENOME_UPLOAD, + text="Drag and Drop or Select metagenome assembly", + default_style={ + "width": "100%", + "borderWidth": "1px", + "borderStyle": "dashed", + "borderRadius": "5px", + "margin": "10px", + }, + max_files=1, + max_file_size=10240, + ), + ] + ) diff --git a/automappa/pages/home/components/refine_mags_button.py b/automappa/pages/home/components/refine_mags_button.py new file mode 100644 index 00000000..e00f52ea --- /dev/null +++ b/automappa/pages/home/components/refine_mags_button.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +import logging +from dash_extensions.enrich import DashProxy, Input, Output, html +import dash_mantine_components as dmc +from dash_iconify import DashIconify + +from automappa.components import ids + + +logger = logging.getLogger(__name__) + + +def render(app: DashProxy) -> html.Div: + @app.callback( + Output(ids.REFINE_MAGS_BUTTON, "disabled"), + [ + Input(ids.BINNING_SELECT, "value"), + Input(ids.MARKERS_SELECT, "value"), + Input(ids.METAGENOME_SELECT, "value"), + ], + ) + def disable_button_callback( + binning_value: str, markers_value: str, metagenome_value: str + ) -> bool: + return ( + binning_value is None or markers_value is None or metagenome_value is None + ) + + return html.Div( + dmc.Button( + "Refine MAGs", + id=ids.REFINE_MAGS_BUTTON, + leftIcon=[DashIconify(icon="mingcute:broom-line", width=25)], + variant="gradient", + gradient={"from": "#642E8D", "to": "#1f58a6", "deg": 150}, + fullWidth=True, + ) + ) diff --git a/automappa/pages/home/components/sample_card.py b/automappa/pages/home/components/sample_card.py new file mode 100644 index 00000000..0ae43956 --- /dev/null +++ b/automappa/pages/home/components/sample_card.py @@ -0,0 +1,260 @@ +from dash_iconify import DashIconify +import dash_mantine_components as dmc + +from automappa.components import ids +from typing import Optional, Protocol, Tuple, Union + + +class SampleCardDataSource(Protocol): + def get_metagenome_name(self, metagenome_id: int) -> str: + """Get Metagenome.name where Metagenome.id == metagenome_id""" + ... + + def contig_count(self, metagenome_id: int) -> int: + ... + + def marker_count(self, metagenome_id: int) -> int: + ... + + def connections_count(self, metagenome_id: int) -> int: + ... + + def get_approximate_marker_sets(self, metagenome_id: int) -> int: + ... + + def get_mimag_counts(self, metagenome_id: int) -> Tuple[int, int, int]: + """Retrieve counts of clusters following MIMAG standards. + + standards: + + - High-quality >90% complete > 95% pure + - Medium-quality >=50% complete > 90% pure + - Low-quality <50% complete < 90% pure + + """ + ... + + def get_refinements_count( + self, metagenome_id: int, initial: Optional[bool], outdated: Optional[bool] + ) -> int: + """Get Refinement count where Refinement.metagenome_id == metagenome_id + + Providing `initial` will add where(Refinement.initial_refinement == True) + otherwise will omit this filter + + Providing `outdated` will add where(Refinement.outdated == outdated) + otherwise will omit this filter + """ + ... + + def get_refined_contig_count(self, metagenome_id: int) -> int: + ... + + +def get_badge(label: str, id: dict[str, Union[str, int]], color: str) -> dmc.Badge: + return dmc.Badge(label, id=id, color=color, variant="dot", size="xs") + + +def render(source: SampleCardDataSource, metagenome_id: int) -> dmc.Card: + metagenome_badge = get_badge( + label=ids.SAMPLE_CARD_METAGENOME_BADGE_LABEL, + id={ + ids.SAMPLE_CARD_INDEX: metagenome_id, + "type": ids.SAMPLE_CARD_METAGENOME_BADGE_TYPE, + }, + color="lime", + ) + contig_count = source.contig_count(metagenome_id) + binning_badge = get_badge( + label=f"{ids.SAMPLE_CARD_BINNING_BADGE_LABEL}: {contig_count:,}", + id={ + ids.SAMPLE_CARD_INDEX: metagenome_id, + "type": ids.SAMPLE_CARD_BINNING_BADGE_TYPE, + }, + color="lime", + ) + marker_count = source.marker_count(metagenome_id) + marker_badge = get_badge( + label=f"{ids.SAMPLE_CARD_MARKERS_BADGE_LABEL}: {marker_count:,}", + id={ + ids.SAMPLE_CARD_INDEX: metagenome_id, + "type": ids.SAMPLE_CARD_MARKERS_BADGE_TYPE, + }, + color="lime", + ) + connections_count = source.connections_count(metagenome_id) + connections_badge = get_badge( + label=ids.SAMPLE_CARD_CONNECTIONS_BADGE_LABEL, + id={ + ids.SAMPLE_CARD_INDEX: metagenome_id, + "type": ids.SAMPLE_CARD_CONNECTIONS_BADGE_TYPE, + }, + color="lime" if connections_count > 0 else "red", + ) + badges = [ + metagenome_badge, + binning_badge, + marker_badge, + connections_badge, + ] + chip = dmc.Chip( + "Select", + id={ + ids.SAMPLE_CARD_INDEX: metagenome_id, + "type": ids.SAMPLE_CARD_CHIP_TYPE, + }, + size="sm", + variant="outline", + radius="xl", + checked=False, + ) + high_quality, medium_quality, low_quality = source.get_mimag_counts(metagenome_id) + hiqh_quality_badge = dmc.Tooltip( + label=">95% complete & >90% pure", + position="top-end", + offset=3, + children=dmc.Badge(high_quality, color="lime", variant="filled"), + ) + medium_quality_badge = dmc.Tooltip( + label=(">=50% complete & >90% pure"), + position="top", + offset=3, + children=dmc.Badge(medium_quality, color="yellow", variant="filled"), + ) + low_quality_badge = dmc.Tooltip( + label="<50% complete & <90% pure", + position="top-start", + offset=3, + children=dmc.Badge(low_quality, color="orange", variant="filled"), + ) + mimag_section = dmc.Center( + dmc.Group( + [ + hiqh_quality_badge, + dmc.Divider(orientation="vertical", style={"height": 20}), + medium_quality_badge, + dmc.Divider(orientation="vertical", style={"height": 20}), + low_quality_badge, + ], + ), + ) + approx_markers = dmc.Group( + [ + dmc.Text(f"Approx. Markers Sets:", size="xs"), + dmc.Badge( + source.get_approximate_marker_sets(metagenome_id), + size="xs", + variant="outline", + color="gray", + ), + ], + position="apart", + ) + uploaded_clusters = dmc.Group( + [ + dmc.Text(f"Uploaded Clusters:", size="xs"), + dmc.Badge( + source.get_refinements_count(metagenome_id, initial=True), + size="xs", + variant="outline", + color="gray", + ), + ], + position="apart", + ) + user_refinements = dmc.Group( + [ + dmc.Text(f"User Refinements:", size="xs"), + dmc.Badge( + source.get_refinements_count( + metagenome_id, initial=False, outdated=False + ), + size="xs", + variant="outline", + color="gray", + ), + ], + position="apart", + ) + current_refinements = dmc.Group( + [ + dmc.Text(f"Current Refinements:", size="xs"), + dmc.Badge( + source.get_refinements_count(metagenome_id, outdated=False), + size="xs", + variant="outline", + ), + ], + position="apart", + ) + refined_contig_count = source.get_refined_contig_count(metagenome_id) + percent_clustered = round(refined_contig_count / contig_count * 100, 2) + percent_clustered_text = dmc.Group( + [ + dmc.Text(f"Contigs Clustered (%):", size="xs"), + dmc.Badge(percent_clustered, size="xs", variant="outline"), + ], + position="apart", + ) + return dmc.Card( + id={ids.SAMPLE_CARD_INDEX: metagenome_id, "type": ids.SAMPLE_CARD_TYPE}, + children=[ + dmc.CardSection( + dmc.Group( + [ + dmc.Text( + source.get_metagenome_name(metagenome_id) + .replace("_", " ") + .title(), + weight=500, + ), + dmc.Tooltip( + dmc.ActionIcon( + DashIconify(icon="tabler:trash-x", width=18), + id={ + ids.SAMPLE_CARD_INDEX: metagenome_id, + "type": ids.SAMPLE_CARD_REMOVE_BTN, + }, + variant="filled", + n_clicks=0, + size="md", + radius="md", + color="red", + ), + label="Delete Sample", + position="top", + color="red", + radius="xl", + offset=3, + ), + ], + position="apart", + ), + withBorder=True, + inheritPadding=True, + py="xs", + ), + dmc.Space(h=10), + dmc.SimpleGrid(cols=2, children=badges), + dmc.Space(h=10), + dmc.Text("MIMAG cluster counts", size="sm"), + dmc.Space(h=5), + mimag_section, + dmc.Space(h=10), + dmc.Divider(variant="dotted"), + dmc.Space(h=10), + approx_markers, + uploaded_clusters, + user_refinements, + current_refinements, + percent_clustered_text, + dmc.Space(h=10), + dmc.CardSection(dmc.Divider(variant="dashed"), withBorder=False), + dmc.Space(h=10), + dmc.Group(chip), + ], + withBorder=False, + shadow="sm", + radius="md", + styles=dict(color="lime"), + ) diff --git a/automappa/pages/home/components/sample_cards.py b/automappa/pages/home/components/sample_cards.py new file mode 100644 index 00000000..2f60f740 --- /dev/null +++ b/automappa/pages/home/components/sample_cards.py @@ -0,0 +1,239 @@ +from typing import Dict, List, Literal, Optional, Protocol, Tuple, Union +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import ( + DashProxy, + html, + Input, + Output, + State, + dcc, + ctx, + MATCH, + ALL, +) +from dash_iconify import DashIconify +import dash_mantine_components as dmc + +from celery.result import GroupResult, AsyncResult + +from automappa.components import ids +from automappa.pages.home.components import sample_card + + +class SampleCardsDataSource(Protocol): + def preprocess_metagenome( + self, + name: str, + metagenome_fpath: str, + binning_fpath: str, + markers_fpath: str, + connections_fpath: Optional[str] = None, + ) -> GroupResult: + """Create Metagenome object in db + + Returns + ------- + AsyncResult + Celery AsyncResult from task canvas preprocess metagenome pipeline + """ + ... + + def get_preprocess_metagenome_tasks( + self, task_ids: List[str] + ) -> List[Tuple[str, AsyncResult]]: + ... + + def remove_metagenome(self, metagenome_id: int) -> None: + ... + + def get_metagenome_ids(self) -> List[int]: + ... + + +def render(app: DashProxy, source: SampleCardsDataSource) -> html.Div: + @app.callback( + Output( + {ids.SAMPLE_CARD_INDEX: MATCH, "type": ids.SAMPLE_CARD_TYPE}, + "withBorder", + ), + Input( + {ids.SAMPLE_CARD_INDEX: MATCH, "type": ids.SAMPLE_CARD_CHIP_TYPE}, "checked" + ), + prevent_initial_call=True, + ) + def sample_card_selected(sample_chip_checked: bool) -> bool: + return sample_chip_checked + + @app.callback( + Output(ids.TASK_ID_STORE, "data", allow_duplicate=True), + Input(ids.METAGENOME_UPLOAD_STORE, "data"), + Input(ids.BINNING_MAIN_UPLOAD_STORE, "data"), + Input(ids.MARKERS_UPLOAD_STORE, "data"), + Input(ids.CYTOSCAPE_STORE, "data"), + Input(ids.UPLOAD_STEPPER_SUBMIT_BUTTON, "n_clicks"), + State(ids.SAMPLE_NAME_TEXT_INPUT, "value"), + prevent_initial_call="initial_duplicate", + ) + def submit_sample_ingestion_task( + metagenome_fpath: str, + binning_fpath: str, + marker_fpath: str, + connection_fpath: Union[str, None], + submit_btn: int, + metagenome_name: str, + ) -> List[str]: + if not ctx.triggered_id == ids.UPLOAD_STEPPER_SUBMIT_BUTTON: + raise PreventUpdate + group_result = source.preprocess_metagenome( + metagenome_name, + metagenome_fpath, + binning_fpath, + marker_fpath, + connection_fpath, + ) + task_ids = [group_result.parent.id] + [task.id for task in group_result.results] + return task_ids + + @app.callback( + Output(ids.BACKGROUND_TASK_DIV, "children"), + Output(ids.TASK_ID_STORE, "data", allow_duplicate=True), + Input(ids.BACKGROUND_TASK_INTERVAL, "n_intervals"), + State(ids.TASK_ID_STORE, "data"), + prevent_initial_call=True, + ) + def notify_task_progress( + n_intervals: int, task_ids: List[str] + ) -> Tuple[List[dmc.Notification], List[str]]: + notifications = [] + if not task_ids: + raise PreventUpdate + tasks = source.get_preprocess_metagenome_tasks(task_ids) + n_tasks = len(tasks) + tasks_completed = 0 + for task_name, task in tasks: + if task.status in { + "PENDING", + "RETRY", + }: + loading = True + color = "orange" + autoclose = False + action = "show" + icon = DashIconify(icon="la:running") + elif task.status == "RECEIVED": + loading = True + color = "blue" + autoclose = False + action = "update" + icon = DashIconify(icon="la:running") + elif task.status == "STARTED": + loading = True + color = "green" + autoclose = False + action = "update" + icon = DashIconify(icon="ooui:error", color="red") + elif task.status == "FAILURE" or task.status == "REVOKED": + loading = False + color = "red" + autoclose = 15000 + action = "update" + icon = DashIconify(icon="ooui:error", color="red") + else: + # task.status == "SUCCESS" + loading = False + color = "green" + autoclose = 15000 + action = "update" + icon = DashIconify(icon="akar-icons:circle-check") + # Forget task upon success... + # otherwise keep in tasks list + tasks_completed += 1 + + notification = dmc.Notification( + id={ids.NOTIFICATION_TASK_ID: task.id}, + title=f"Task: {task_name}", + message=f"pre-processing status: {task.status}", + loading=loading, + color=color, + action=action, + autoClose=autoclose, + disallowClose=False, + icon=icon, + ) + notifications.append(notification) + + if tasks_completed == n_tasks: + for name, task in tasks: + task.forget() + task_ids.pop(task_ids.index(task.id)) + for notification in notifications: + notification.action = "update" + return notifications, task_ids + + @app.callback( + Output(ids.SAMPLE_CARDS_CONTAINER, "children", allow_duplicate=True), + Input(ids.TASK_ID_STORE, "data"), + prevent_initial_call=True, + ) + def get_sample_cards(task_ids: List[str]) -> List[dmc.Card]: + if task_ids: + raise PreventUpdate + return [ + sample_card.render(source, metagenome_id=mg_id) + for mg_id in source.get_metagenome_ids() + ] + + @app.callback( + Output(ids.SAMPLE_CARDS_CONTAINER, "children", allow_duplicate=True), + Input( + {ids.SAMPLE_CARD_INDEX: ALL, "type": ids.SAMPLE_CARD_REMOVE_BTN}, + "n_clicks", + ), + State({ids.SAMPLE_CARD_INDEX: ALL, "type": ids.SAMPLE_CARD_REMOVE_BTN}, "id"), + prevent_initial_call=True, + ) + def remove_button_clicked( + remove_btns_clicks: List[int], remove_btn_ids: Dict[str, str] + ) -> List[dmc.Card]: + if not any(remove_btns_clicks): + raise PreventUpdate + sample_card_index = [ + i for i, n_clicks in enumerate(remove_btns_clicks) if n_clicks > 0 + ][0] + metagenome_id = remove_btn_ids[sample_card_index].get(ids.SAMPLE_CARD_INDEX) + source.remove_metagenome(metagenome_id) + return [ + sample_card.render(source, metagenome_id=mg_id) + for mg_id in source.get_metagenome_ids() + ] + + @app.callback( + Output(ids.SAMPLE_CARDS_CONTAINER, "children", allow_duplicate=True), + Input(ids.UPLOAD_STEPPER_SUBMIT_BUTTON, "n_clicks"), + prevent_initial_call="initial_duplicate", + ) + def get_sample_cards(submit_btn: int) -> List[dmc.Card]: + return [ + sample_card.render(source, metagenome_id=mg_id) + for mg_id in source.get_metagenome_ids() + ] + + # TODO Callback to delete sample card + return html.Div( + [ + dcc.Interval(id=ids.BACKGROUND_TASK_INTERVAL, interval=3000), + html.Div(id=ids.BACKGROUND_TASK_DIV), + dmc.SimpleGrid( + id=ids.SAMPLE_CARDS_CONTAINER, + spacing="xs", + cols=6, + breakpoints=[ + dict(maxWidth=1500, cols=5), + dict(maxWidth=1200, cols=4), + dict(maxWidth=980, cols=3), + dict(maxWidth=755, cols=2), + dict(maxWidth=600, cols=1), + ], + ), + ] + ) diff --git a/automappa/pages/home/components/sample_name_text_input.py b/automappa/pages/home/components/sample_name_text_input.py new file mode 100644 index 00000000..e679d406 --- /dev/null +++ b/automappa/pages/home/components/sample_name_text_input.py @@ -0,0 +1,61 @@ +import dash_mantine_components as dmc +from dash.exceptions import PreventUpdate +import string +from typing import Protocol +from dash_extensions.enrich import html, DashProxy, Output, Input + +from automappa.components import ids + +MAX_CHARS = 24 + + +class SampleNameTextInputDataSource(Protocol): + def name_is_unique(self, name: str) -> bool: + ... + + +def has_symbols_or_whitespace(text: str) -> bool: + # Define the set of allowed characters (letters, digits, and underscores) + allowed_chars = string.ascii_letters + string.digits + "_" + # Check if any character in the string is not in the set of allowed characters + return any(char not in allowed_chars for char in text) + + +def exceeds_max_char_length(text: str) -> bool: + return len(text) >= MAX_CHARS + + +def render(app: DashProxy, source: SampleNameTextInputDataSource) -> html.Div: + @app.callback( + Output(ids.SAMPLE_NAME_TEXT_INPUT, "error"), + Input(ids.SAMPLE_NAME_TEXT_INPUT, "value"), + prevent_initial_call=True, + ) + def update_is_valid_sample_name(input_text: str) -> str: + if input_text is None: + raise PreventUpdate + if not source.name_is_unique(input_text): + return "Sample name must be unique!" + if has_symbols_or_whitespace(input_text): + return "Sample may not contain any symbols or whitespace!" + if exceeds_max_char_length(input_text): + return f"Name must be less than {MAX_CHARS}" + return "" + + @app.callback( + Output(ids.SAMPLE_NAME_TEXT_INPUT, "value"), + Input(ids.UPLOAD_STEPPER_SUBMIT_BUTTON, "n_clicks"), + prevent_initial_call=True, + ) + def reset_text_input_value_on_submit(submit_btn: int) -> str: + return "" + + return html.Div( + dmc.TextInput( + id=ids.SAMPLE_NAME_TEXT_INPUT, + label="Sample Name", + placeholder="i.e. forcepia_sponge", + description="Provide a name to identify this sample", + required=True, + ), + ) diff --git a/automappa/pages/home/components/samples_datatable.py b/automappa/pages/home/components/samples_datatable.py new file mode 100644 index 00000000..87de9e74 --- /dev/null +++ b/automappa/pages/home/components/samples_datatable.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +import logging +from dash.dash_table import DataTable +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import DashProxy, Input, Output, State, html, dcc +import pandas as pd + +from automappa.components import ids + + +logger = logging.getLogger(__name__) + + +def render(app: DashProxy) -> html.Div: + @app.callback( + Output(ids.SAMPLES_DATATABLE, "children"), + [Input(ids.SAMPLES_STORE, "data")], + State(ids.SAMPLES_STORE, "data"), + ) + def on_samples_store_data(samples_df: pd.DataFrame, new_samples_df: pd.DataFrame): + if samples_df is None or samples_df.empty: + raise PreventUpdate + if new_samples_df is not None: + samples_df = pd.concat([samples_df, new_samples_df]).drop_duplicates( + subset=["table_id"] + ) + + logger.debug(f"retrieved {samples_df.shape[0]:,} samples from samples store") + + if samples_df.empty: + raise PreventUpdate + + return DataTable( + data=samples_df.to_dict("records"), + columns=[ + {"id": col, "name": col, "editable": False} + for col in samples_df.columns + ], + persistence=True, + persistence_type="session", + ) + + return html.Div( + children=[ + dcc.Loading( + id=ids.LOADING_SAMPLES_DATATABLE, + children=[ + html.Label("Uploaded Datasets"), + html.Div(id=ids.SAMPLES_DATATABLE), + ], + type="dot", + color="#646569", + ), + ] + ) diff --git a/automappa/pages/home/components/selected_tables_datatable.py b/automappa/pages/home/components/selected_tables_datatable.py new file mode 100644 index 00000000..6c489acb --- /dev/null +++ b/automappa/pages/home/components/selected_tables_datatable.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +from dash.dash_table import DataTable +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import DashProxy, Input, Output, State, html, dcc +from automappa.data.source import SampleTables + +from automappa.components import ids + + +def render(app: DashProxy) -> html.Div: + @app.callback( + Output(ids.SELECTED_TABLES_DATATABLE, "children"), + [ + Input(ids.SELECTED_TABLES_STORE, "data"), + ], + State(ids.SELECTED_TABLES_STORE, "data"), + ) + def selected_tables_datatable_children( + samples: SampleTables, + new_tables: SampleTables, + ): + if samples is None: + raise PreventUpdate + if new_tables is not None: + if new_tables != samples: + tables_dict = samples.dict() + tables_dict.update(new_tables.dict()) + samples = SampleTables.parse_obj(tables_dict) + + has_table = False + for __, table_id in samples: + if table_id: + has_table = True + break + + if not has_table: + raise PreventUpdate + + return DataTable( + data=[ + {"filetype": sample, "table_id": table.id} + for sample, table in samples + if sample not in {"kmers"} + ], + columns=[ + {"id": "filetype", "name": "filetype", "editable": False}, + {"id": "table_id", "name": "table_id", "editable": False}, + ], + persistence=True, + persistence_type="session", + ) + + return html.Div( + children=[ + dcc.Loading( + id=ids.LOADING_SELECTED_TABLES_DATATABLE, + children=[ + html.Label("Selected Datasets for Refinement & Summary:"), + html.Div(id=ids.SELECTED_TABLES_DATATABLE), + ], + type="dot", + color="#646569", + ), + ] + ) diff --git a/automappa/pages/home/components/task_status_badge.py b/automappa/pages/home/components/task_status_badge.py new file mode 100644 index 00000000..c77e419a --- /dev/null +++ b/automappa/pages/home/components/task_status_badge.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +import random +from dash.exceptions import PreventUpdate +from dash import Patch +from dash_extensions.enrich import DashProxy, Output, Input, State, html, dcc, ctx +import dash_mantine_components as dmc +from typing import List + +from automappa.components import ids +from automappa.pages.home.tasks.task_status_badge import set_badge_color + + +PENDING = "PENDING" +STARTED = "STARTED" +RETRY = "RETRY" +FAILURE = "FAILURE" +SUCCESS = "SUCCESS" + + +def render(app: DashProxy) -> html.Div: + @app.callback( + Output(ids.BADGE_TASK_STORE, "data", allow_duplicate=True), + Input(ids.BACKGROUND_TASK_BUTTON, "n_clicks"), + prevent_initial_call="initial_duplicate", + ) + def create_task(btn: int): + if not ctx.triggered_id == ids.BACKGROUND_TASK_BUTTON: + raise PreventUpdate + task_ids = Patch() + color = random.choice(["green", "lime"]) + task = set_badge_color.delay((color,)) + task_ids.append(task.id) + return task_ids + + @app.callback( + Output(ids.BACKGROUND_TASK_BADGE, "color"), + Output(ids.BADGE_TASK_STORE, "data", allow_duplicate=True), + Input(ids.BADGE_STATUS_INTERVAL, "n_intervals"), + State(ids.BADGE_TASK_STORE, "data"), + prevent_initial_call=True, + ) + def read_tasks(n_intervals: int, task_ids: List[str]) -> str: + if not task_ids: + raise PreventUpdate + for task_id in task_ids: + task = set_badge_color.AsyncResult(task_id) + if task.status == PENDING: + color = "orange" + elif task.status == STARTED: + color = "blue" + elif task.status == RETRY: + color = "yellow" + elif task.status == FAILURE: + color = "red" + else: + # i.e. task.status == SUCCESS + color = task.get() + task_ids.pop(task_ids.index(task_id)) + return color, task_ids + + @app.callback( + Output(ids.BACKGROUND_TASK_BADGE, "children"), + Input(ids.BADGE_STATUS_INTERVAL, "n_intervals"), + Input(ids.BADGE_TASK_STORE, "data"), + ) + def read_tasks_count(n_intervals: int, task_ids: List[str]) -> str: + if not task_ids: + text = f"0 tasks!" + else: + n_tasks = len(task_ids) + text = f"{n_tasks} task" if n_tasks == 1 else f"{n_tasks} tasks" + return text + + # NOTE: You can uncomment this callback + # for disabling submit button when task is submitted + @app.callback( + Output(ids.BACKGROUND_TASK_BUTTON, "disabled"), + Input(ids.BADGE_TASK_STORE, "data"), + ) + def disable_task_button(task_ids: List[str]) -> bool: + return True if task_ids else False + + return html.Div( + [ + dcc.Store(ids.BADGE_TASK_STORE, data=[]), + dcc.Interval(ids.BADGE_STATUS_INTERVAL, interval=500), + dmc.Button( + children=[dmc.Text("task button")], id=ids.BACKGROUND_TASK_BUTTON + ), + dmc.Badge(id=ids.BACKGROUND_TASK_BADGE), + ] + ) diff --git a/automappa/pages/home/components/tasks_table.py b/automappa/pages/home/components/tasks_table.py new file mode 100644 index 00000000..69b4d733 --- /dev/null +++ b/automappa/pages/home/components/tasks_table.py @@ -0,0 +1,10 @@ +# -*- coding: utf-8 -*- + +from dash_extensions.enrich import DashProxy, Input, Output, html +import dash_bootstrap_components as dbc + +from automappa.components import ids + + +def render(app: DashProxy) -> html.Div: + return html.Div(id=ids.EMBEDDING_TASKS) diff --git a/automappa/pages/home/components/upload_modal.py b/automappa/pages/home/components/upload_modal.py new file mode 100644 index 00000000..f2a44349 --- /dev/null +++ b/automappa/pages/home/components/upload_modal.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +from typing import Protocol +from dash_extensions.enrich import DashProxy, Input, Output, State, html + +import dash_bootstrap_components as dbc +import dash_mantine_components as dmc +from dash_iconify import DashIconify + +from automappa.components import ids + +from automappa.pages.home.components import upload_stepper + + +class UploadModalDataSource(Protocol): + def name_is_unique(self, name: str) -> bool: + ... + + +def render(app: DashProxy, source: UploadModalDataSource) -> html.Div: + @app.callback( + Output(ids.UPLOAD_MODAL, "is_open"), + [ + Input(ids.OPEN_MODAL_BUTTON, "n_clicks"), + Input(ids.CLOSE_MODAL_BUTTON, "n_clicks"), + Input(ids.UPLOAD_STEPPER_SUBMIT_BUTTON, "n_clicks"), + ], + [State(ids.UPLOAD_MODAL, "is_open")], + ) + def toggle_modal( + open_btn: int, + close_btn: int, + submit_btn: int, + is_open: bool, + ) -> bool: + if open_btn or close_btn or submit_btn: + return not is_open + return is_open + + # TODO Add text to modal with max_file_size info... + return html.Div( + dbc.Modal( + [ + dbc.ModalHeader( + dbc.ModalTitle("Upload sample annotations"), + close_button=False, + ), + dbc.ModalBody(upload_stepper.render(app, source)), + dbc.ModalFooter( + dmc.Button( + "Close", + id=ids.CLOSE_MODAL_BUTTON, + leftIcon=[DashIconify(icon="line-md:close-small")], + style={"textAlign": "center"}, + color="dark", + ) + ), + ], + id=ids.UPLOAD_MODAL, + keyboard=False, + backdrop="static", + size="lg", + fullscreen=False, + centered=True, + ), + ) diff --git a/automappa/pages/home/components/upload_modal_button.py b/automappa/pages/home/components/upload_modal_button.py new file mode 100644 index 00000000..92c83b63 --- /dev/null +++ b/automappa/pages/home/components/upload_modal_button.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +from typing import List, Protocol +from dash_iconify import DashIconify +from dash_extensions.enrich import DashProxy, html, Output, Input + +import dash_mantine_components as dmc + +from automappa.components import ids +from automappa.pages.home.components import upload_modal + + +class UploadModalButtonDataSource(Protocol): + def name_is_unique(self, name: str) -> bool: + ... + + +def render(app: DashProxy, source: UploadModalButtonDataSource) -> html.Div: + @app.callback( + Output(ids.OPEN_MODAL_BUTTON, "disabled"), + Input(ids.TASK_ID_STORE, "data"), + ) + def disable_task_button(task_ids: List[str]) -> bool: + return True if task_ids and task_ids is not None else False + + return html.Div( + [ + dmc.Button( + "New Sample", + id=ids.OPEN_MODAL_BUTTON, + leftIcon=[DashIconify(icon="line-md:upload-outline", width=25)], + variant="gradient", + gradient={"from": "#CA2270", "to": "#F36E2D"}, + fullWidth=False, + ), + upload_modal.render(app, source), + ] + ) diff --git a/automappa/pages/home/components/upload_stepper.py b/automappa/pages/home/components/upload_stepper.py new file mode 100644 index 00000000..049cb365 --- /dev/null +++ b/automappa/pages/home/components/upload_stepper.py @@ -0,0 +1,383 @@ +from typing import Protocol +from dash_extensions.enrich import ( + DashProxy, + Input, + Output, + html, + State, + ctx, +) +from dash.exceptions import PreventUpdate +import dash_mantine_components as dmc +from dash_iconify import DashIconify + +from automappa.components import ids +from automappa.pages.home.components import ( + binning_upload, + cytoscape_connections_upload, + markers_upload, + metagenome_upload, + sample_name_text_input, +) + +# LOGIC: +# Steps: +# 1. Upload metagenome +# 2. Upload binning (will eventually link to metagenome) +# 3. Upload markers (will eventually link to binning contigs) +# 4. Upload cytoscape connections (optional will eventually link to metagenome) +# 5. Finish upload (unique sample name required as text input to act as link b/w uploaded data) +# On completion (adds metagenome card to home page) + + +class UploadStepperDataSource(Protocol): + def name_is_unique(self, name: str) -> bool: + ... + + +def render(app: DashProxy, source: UploadStepperDataSource) -> html.Div: + @app.callback( + Output(ids.UPLOAD_STEPPER, "active", allow_duplicate=True), + Input(ids.UPLOAD_STEPPER_BACK_BUTTON, "n_clicks"), + Input(ids.UPLOAD_STEPPER_NEXT_BUTTON, "n_clicks"), + State(ids.UPLOAD_STEPPER, "active"), + prevent_initial_call=True, + ) + def update_active_step(back_btn: int, next_btn: int, current_step: int) -> int: + button_id = ctx.triggered_id + step = current_step if current_step is not None else ACTIVE + if button_id == ids.UPLOAD_STEPPER_BACK_BUTTON: + step = step - 1 if step > MIN_STEP else step + else: + step = step + 1 if step < MAX_STEP else step + return step + + @app.callback( + Output(ids.UPLOAD_STEPPER_BACK_BUTTON, "disabled"), + Input(ids.UPLOAD_STEPPER, "active"), + prevent_initial_call=True, + ) + def disable_back_button_on_upload_metagenome_step( + current_step: int, + ) -> bool: + is_disabled = True + if current_step == MIN_STEP: + return is_disabled + return not is_disabled + + @app.callback( + Output(ids.UPLOAD_STEPPER_NEXT_BUTTON, "disabled", allow_duplicate=True), + Input(ids.METAGENOME_UPLOAD, "isCompleted"), + Input(ids.UPLOAD_STEPPER, "active"), + prevent_initial_call=True, + ) + def toggle_next_button_on_metagenome_upload( + is_completed: bool, + current_step: int, + ) -> bool: + if current_step != METAGENOME_UPLOAD_STEP: + raise PreventUpdate + return not is_completed + + @app.callback( + Output(ids.UPLOAD_STEPPER_NEXT_BUTTON, "disabled", allow_duplicate=True), + Input(ids.BINNING_UPLOAD, "isCompleted"), + Input(ids.UPLOAD_STEPPER, "active"), + prevent_initial_call=True, + ) + def toggle_next_button_on_binning_upload( + is_completed: bool, current_step: int + ) -> bool: + if current_step != BINNING_UPLOAD_STEP: + raise PreventUpdate + return not is_completed + + @app.callback( + Output(ids.UPLOAD_STEPPER_NEXT_BUTTON, "disabled", allow_duplicate=True), + Input(ids.MARKERS_UPLOAD, "isCompleted"), + Input(ids.UPLOAD_STEPPER, "active"), + prevent_initial_call=True, + ) + def toggle_next_button_on_marker_upload( + is_completed: bool, current_step: int + ) -> bool: + if current_step != MARKERS_UPLOAD_STEP: + raise PreventUpdate + return not is_completed + + @app.callback( + Output(ids.UPLOAD_STEPPER_NEXT_BUTTON, "disabled", allow_duplicate=True), + Input(ids.SAMPLE_NAME_TEXT_INPUT, "error"), + Input(ids.SAMPLE_NAME_TEXT_INPUT, "value"), + Input(ids.UPLOAD_STEPPER, "active"), + prevent_initial_call=True, + ) + def toggle_next_button_on_sample_name_input( + text_input_error: str, text_input_value: str, current_step: int + ) -> bool: + if current_step != SAMPLE_NAME_STEP: + raise PreventUpdate + return not text_input_value or text_input_error != "" + + @app.callback( + Output(ids.UPLOAD_STEPPER_NEXT_BUTTON, "disabled", allow_duplicate=True), + Input(ids.UPLOAD_STEPPER, "active"), + prevent_initial_call=True, + ) + def disable_next_button_on_completed_step(current_step: int) -> bool: + if current_step != MAX_STEP: + raise PreventUpdate + return True + + @app.callback( + Output(ids.UPLOAD_STEPPER_SUBMIT_BUTTON, "disabled"), + Input(ids.SAMPLE_NAME_TEXT_INPUT, "value"), + ) + def toggle_submit_button(text_input: str) -> bool: + return not text_input + + @app.callback( + Output(ids.UPLOAD_STEPPER, "active", allow_duplicate=True), + Input(ids.UPLOAD_STEPPER_SUBMIT_BUTTON, "n_clicks"), + prevent_initial_call=True, + ) + def on_submit_button(submit_btn: int) -> int: + return ACTIVE + + @app.callback( + Output(ids.UPLOAD_STEPPER, "active", allow_duplicate=True), + Input(ids.CLOSE_MODAL_BUTTON, "n_clicks"), + prevent_initial_call=True, + ) + def on_close_modal_button(close_modal_btn: int) -> int: + return ACTIVE + + def get_icon(icon: str, height: int = 20) -> DashIconify: + return DashIconify(icon=icon, height=height) + + MIN_STEP = 0 + METAGENOME_UPLOAD_STEP = 0 + BINNING_UPLOAD_STEP = 1 + MARKERS_UPLOAD_STEP = 2 + CONNECTIONS_UPLOAD_STEP = 3 + SAMPLE_NAME_STEP = 4 + MAX_STEP = 5 + ACTIVE = 0 + + upload_metagenome_step = dmc.StepperStep( + label="First step", + description="Upload metagenome", + icon=get_icon("tabler:dna-2-off"), + iconSize=30, + progressIcon=get_icon("tabler:dna-2-off"), + completedIcon=get_icon("tabler:dna-2"), + children=[ + dmc.Text( + "Upload your sample's metagenome assembly fasta file", + align="center", + ), + metagenome_upload.render(app, source), + ], + ) + upload_binning_step = dmc.StepperStep( + label="Second step", + description="Upload binning", + icon=get_icon("ph:chart-scatter"), + iconSize=30, + progressIcon=get_icon("ph:chart-scatter"), + completedIcon=get_icon("ph:chart-scatter-bold"), + children=[ + dmc.Group( + children=[ + dmc.Text( + "Upload the corresponding main binning results", + align="center", + ), + dmc.Anchor( + dmc.Tooltip( + get_icon( + "material-symbols:info-outline", + height=15, + ), + label=dmc.Code( + "autometa-binning --output-main " + ), + color="gray", + withArrow=True, + position="right", + radius="md", + ), + href="https://autometa.readthedocs.io/en/latest/step-by-step-tutorial.html#binning", + target="_blank", + underline=False, + color="dark", + ), + ], + spacing="xs", + ), + binning_upload.render(app, source), + ], + ) + + upload_markers_step = dmc.StepperStep( + label="Third step", + description="Upload markers", + icon=get_icon("fluent:document-dismiss-24-regular"), + iconSize=30, + progressIcon=get_icon("fluent:document-dismiss-24-regular"), + completedIcon=get_icon("fluent:document-checkmark-24-regular"), + children=[ + dmc.Group( + children=[ + dmc.Text( + "Upload the corresponding marker annotation results", + align="center", + ), + dmc.Anchor( + dmc.Tooltip( + get_icon( + "material-symbols:info-outline", + height=15, + ), + label=dmc.Code( + "autometa-markers --out " + ), + color="gray", + withArrow=True, + position="right", + radius="md", + ), + href="https://autometa.readthedocs.io/en/latest/step-by-step-tutorial.html#single-copy-markers", + target="_blank", + color="dark", + underline=False, + ), + ], + ), + markers_upload.render(app, source), + ], + ) + + connections_upload_step = dmc.StepperStep( + label="Fourth step", + description="Upload connections", + icon=get_icon("bx:network-chart"), + iconSize=30, + progressIcon=get_icon("bx:network-chart"), + completedIcon=get_icon("bx:network-chart"), + children=[ + dmc.Group( + children=[ + dmc.Text("Upload corresponding cytoscape connections"), + dmc.Text("(optional)", weight=500, color="orange"), + dmc.Anchor( + dmc.Tooltip( + get_icon( + "material-symbols:info-outline", + height=15, + ), + label="Mads Albertsen tutorial on generating paired-end connections", + color="gray", + withArrow=True, + position="right", + radius="md", + ), + href="https://madsalbertsen.github.io/multi-metagenome/docs/step10.html", + target="_blank", + underline=False, + color="dark", + ), + dmc.Anchor( + dmc.Tooltip( + get_icon("openmoji:github", height=15), + label="https://github.com/MadsAlbertsen/multi-metagenome", + color="gray", + withArrow=True, + position="right", + radius="md", + ), + href="https://github.com/MadsAlbertsen/multi-metagenome", + target="_blank", + underline=False, + color="dark", + ), + ], + spacing="xs", + ), + cytoscape_connections_upload.render(app, source), + ], + ) + + name_sample_step = dmc.StepperStep( + label="Fifth step", + description="Name Sample", + icon=get_icon("mdi:rename-outline"), + iconSize=30, + progressIcon=get_icon("mdi:rename-outline"), + completedIcon=get_icon("mdi:rename"), + children=[ + dmc.Text( + "Supply a unique sample name to group your metagenome annotations" + ), + sample_name_text_input.render(app, source), + ], + ) + + completed_step = dmc.StepperCompleted( + children=[ + dmc.Text( + "That's it! Click the back button to go to a previous step or submit to save this dataset", + align="center", + ), + dmc.Center( + dmc.Button( + "Submit", + id=ids.UPLOAD_STEPPER_SUBMIT_BUTTON, + leftIcon=[get_icon("line-md:upload-outline")], + variant="gradient", + gradient={"from": "#CA2270", "to": "#F36E2D"}, + ) + ), + ] + ) + + return html.Div( + dmc.Container( + [ + dmc.Stepper( + id=ids.UPLOAD_STEPPER, + active=ACTIVE, + breakpoint="md", + color="dark", + children=[ + upload_metagenome_step, + upload_binning_step, + upload_markers_step, + connections_upload_step, + name_sample_step, + completed_step, + ], + ), + dmc.Group( + position="center", + mt="xl", + children=[ + dmc.Button( + "Back", + id=ids.UPLOAD_STEPPER_BACK_BUTTON, + variant="outline", + color="dark", + disabled=True, + ), + dmc.Button( + "Next step", + id=ids.UPLOAD_STEPPER_NEXT_BUTTON, + disabled=True, + variant="outline", + color="dark", + ), + ], + ), + ] + ) + ) diff --git a/automappa/pages/home/layout.py b/automappa/pages/home/layout.py new file mode 100644 index 00000000..701d525d --- /dev/null +++ b/automappa/pages/home/layout.py @@ -0,0 +1,35 @@ +from dash_extensions.enrich import DashBlueprint, LogTransform +import dash_mantine_components as dmc +from automappa.components import ids +from automappa.pages.home.components import ( + sample_cards, + upload_modal_button, +) +from automappa.pages.home.source import HomeDataSource + + +HEIGHT_MARGIN = 10 +WIDTH_MARGIN = 10 + + +def render(source: HomeDataSource) -> DashBlueprint: + app = DashBlueprint(transforms=[LogTransform()]) + app.name = ids.HOME_TAB_ID + app.icon = "line-md:home" + app.description = "Automappa home page to upload genome binning results." + app.title = "Automappa home" + app.layout = dmc.NotificationsProvider( + dmc.Container( + [ + dmc.Space(h=HEIGHT_MARGIN, w=WIDTH_MARGIN), + sample_cards.render(app, source), + dmc.Space(h=HEIGHT_MARGIN, w=WIDTH_MARGIN), + dmc.Affix( + upload_modal_button.render(app, source), + position={"bottom": HEIGHT_MARGIN, "left": WIDTH_MARGIN}, + ), + ], + fluid=True, + ) + ) + return app diff --git a/automappa/pages/home/source.py b/automappa/pages/home/source.py new file mode 100644 index 00000000..349a6a00 --- /dev/null +++ b/automappa/pages/home/source.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python + +from functools import partial +import uuid +import logging +from pydantic import BaseModel +from typing import List, Optional, Tuple, Union + +from sqlmodel import Session, and_, select, func + +from sqlalchemy.exc import NoResultFound, MultipleResultsFound + +from celery import group +from celery.result import GroupResult, AsyncResult +from automappa.data import loader + + +from automappa.data.database import engine +from automappa.data.models import ( + Metagenome, + Contig, + Marker, + CytoscapeConnection, + Refinement, +) +from automappa.pages.home.tasks.sample_cards import ( + assign_contigs_marker_size, + assign_contigs_marker_symbol, + create_metagenome_model, + initialize_refinement, +) + +logger = logging.getLogger(__name__) + +MARKER_SET_SIZE = 139 +HIGH_QUALITY_COMPLETENESS = 90 # gt +HIGH_QUALITY_PURITY = 95 # gt +MEDIUM_QUALITY_COMPLETENESS = 50 # gte +MEDIUM_QUALITY_PURITY = 90 # gt +# lt +LOW_QUALITY_COMPLETENESS = 50 +LOW_QUALITY_PURITY = 90 + + +class HomeDataSource(BaseModel): + def name_is_unique(self, name: str) -> bool: + """Determine whether metagenome name is unique in the database + + Parameters + ---------- + name : str + Name of the metagenome + + Returns + ------- + bool + Whether Metagenome.name occurs exactly once in the database + """ + with Session(engine) as session: + try: + session.exec(select(Metagenome).where(Metagenome.name == name)).one() + is_unique = True + except NoResultFound: + is_unique = True + except MultipleResultsFound: + is_unique = False + return is_unique + + def get_metagenome_name(self, metagenome_id: int) -> str: + with Session(engine) as session: + name = session.exec( + select(Metagenome.name).where(Metagenome.id == metagenome_id) + ).one() + return name + + def get_metagenome_ids(self) -> List[int]: + """Get all unique Metagenome names in database + + Returns + ------- + List[int] + Metagenome id available in database + """ + with Session(engine) as session: + metagenome_ids = session.exec(select(Metagenome.id)).all() + return metagenome_ids + + def validate_uploader_path( + self, + is_completed: bool, + filenames: List[str], + upload_id: uuid.UUID, + ) -> str: + """Given uploader inputs return filepaths in order: + + Tuple[metagenome_fpath, binning_fpath, markers_fpath, connections_fpath] + """ + fpath = loader.validate_uploader(is_completed, filenames, upload_id) + if fpath: + fpath = str(fpath) + return fpath + + def preprocess_metagenome( + self, + name: str, + metagenome_fpath: str, + binning_fpath: str, + markers_fpath: str, + connections_fpath: Union[str, None] = None, + ) -> GroupResult: + task_chain = create_metagenome_model.s() | group( + [ + assign_contigs_marker_size.s(), + assign_contigs_marker_symbol.s(), + initialize_refinement.s(), + ] + ) + # NOTE: task ids may be retrieved using .results method + # (will correspond to order in group) + # group_result.results: List[AsyncResult] + result: GroupResult = task_chain.set(countdown=2).delay( + name=name, + metagenome_fpath=metagenome_fpath, + binning_fpath=binning_fpath, + markers_fpath=markers_fpath, + connections_fpath=connections_fpath, + ) + return result + + def get_preprocess_metagenome_tasks( + self, task_ids: Tuple[str, str, str, str] + ) -> List[Tuple[str, AsyncResult]]: + ( + mg_model_task_id, + marker_size_task_id, + marker_symbol_task_id, + refinement_task_id, + ) = task_ids + mg_model_task = create_metagenome_model.AsyncResult(mg_model_task_id) + marker_size_task = assign_contigs_marker_size.AsyncResult(marker_size_task_id) + marker_symbol_task = assign_contigs_marker_symbol.AsyncResult( + marker_symbol_task_id + ) + refinement_task = initialize_refinement.AsyncResult(refinement_task_id) + return ( + ("ingesting metagenome data", mg_model_task), + ("pre-computing marker sizes", marker_size_task), + ("pre-computing marker symbols", marker_symbol_task), + ("initializing user refinements", refinement_task), + ) + + def remove_metagenome(self, metagenome_id: int) -> None: + with Session(engine) as session: + metagenome = session.exec( + select(Metagenome).where(Metagenome.id == metagenome_id) + ).one() + session.delete(metagenome) + session.commit() + + def marker_count(self, metagenome_id: int) -> int: + with Session(engine) as session: + statement = ( + select([func.count(Marker.id)]) + .join(Contig) + .join(Metagenome) + .where(Metagenome.id == metagenome_id) + ) + marker_count = session.exec(statement).one() + return marker_count + + def get_approximate_marker_sets(self, metagenome_id: int) -> int: + marker_count_stmt = ( + select(func.count(Marker.id)) + .join(Contig) + .where(Contig.metagenome_id == metagenome_id) + ) + with Session(engine) as session: + total_markers = session.exec(marker_count_stmt).first() + + return total_markers // MARKER_SET_SIZE + + def contig_count(self, metagenome_id: int) -> int: + with Session(engine) as session: + statement = ( + select([func.count(Metagenome.contigs)]) + .join(Contig) + .where(Metagenome.id == metagenome_id) + ) + contig_count = session.exec(statement).one() + return contig_count + + def connections_count(self, metagenome_id: int) -> int: + with Session(engine) as session: + statement = ( + select([func.count(Metagenome.connections)]) + .join(CytoscapeConnection) + .where(Metagenome.id == metagenome_id) + ) + connection_count = session.exec(statement).first() + return connection_count + + def get_refined_contig_count(self, metagenome_id: int) -> int: + stmt = ( + select(func.count(Contig.id)) + .where(Contig.metagenome_id == metagenome_id) + .where(Contig.refinements.any(Refinement.outdated == False)) + ) + with Session(engine) as session: + count = session.exec(stmt).first() or 0 + return count + + def get_refinements_count( + self, + metagenome_id: int, + initial: Optional[bool] = None, + outdated: Optional[bool] = None, + ) -> int: + """Get Refinement count where Refinement.metagenome_id == metagenome_id + + Providing `initial` will add where(Refinement.initial_refinement == True) + otherwise will omit this filter and retrieve all. + """ + stmt = select(func.count(Refinement.id)).where( + Refinement.metagenome_id == metagenome_id + ) + if isinstance(initial, bool): + stmt = stmt.where(Refinement.initial_refinement == initial) + if isinstance(outdated, bool): + stmt = stmt.where(Refinement.outdated == outdated) + with Session(engine) as session: + count = session.exec(stmt).first() or 0 + return count + + def compute_completeness_purity_metrics( + self, metagenome_id: int, refinement_id: int + ) -> Tuple[float, float]: + marker_count_stmt = ( + select(func.count(Marker.id)) + .join(Contig) + .where( + Contig.metagenome_id == metagenome_id, + Contig.refinements.any( + and_( + Refinement.outdated == False, + Refinement.id == refinement_id, + ) + ), + ) + ) + unique_marker_stmt = ( + select(Marker.sacc) + .join(Contig) + .distinct() + .where( + Contig.metagenome_id == metagenome_id, + Contig.refinements.any( + and_( + Refinement.outdated == False, + Refinement.id == refinement_id, + ) + ), + ) + ) + with Session(engine) as session: + markers_count = session.exec(marker_count_stmt).first() or 0 + unique_marker_count = session.exec( + select(func.count()).select_from(unique_marker_stmt) + ).first() + + completeness = round(unique_marker_count / MARKER_SET_SIZE * 100, 2) + purity = ( + round(unique_marker_count / markers_count * 100, 2) if markers_count else 0 + ) + return completeness, purity + + def get_mimag_counts(self, metagenome_id: int) -> Tuple[int, int, int]: + """Retrieve counts of clusters following MIMAG standards. + + standards: + + - High-quality >90% complete > 95% pure + - Medium-quality >=50% complete > 90% pure + - Low-quality <50% complete < 90% pure + + """ + stmt = select(Refinement.id).where( + Refinement.metagenome_id == metagenome_id, + Refinement.outdated == False, + ) + high_quality_count = 0 + medium_quality_count = 0 + low_quality_count = 0 + with Session(engine) as session: + refinement_ids = session.exec(stmt).all() + for refinement_id in refinement_ids: + completeness, purity = self.compute_completeness_purity_metrics( + metagenome_id, refinement_id + ) + if ( + completeness > HIGH_QUALITY_COMPLETENESS + and purity > HIGH_QUALITY_PURITY + ): + high_quality_count += 1 + elif ( + completeness >= MEDIUM_QUALITY_COMPLETENESS + and purity > MEDIUM_QUALITY_PURITY + ): + medium_quality_count += 1 + else: + # completeness < LOW_QUALITY_COMPLETENESS and purity < LOW_QUALITY_PURITY: + low_quality_count += 1 + return high_quality_count, medium_quality_count, low_quality_count diff --git a/automappa/pages/home/tasks/__init__.py b/automappa/pages/home/tasks/__init__.py new file mode 100644 index 00000000..0aefe56d --- /dev/null +++ b/automappa/pages/home/tasks/__init__.py @@ -0,0 +1,17 @@ +from .task_status_badge import set_badge_color +from .sample_cards import ( + create_metagenome_model, + initialize_refinement, + assign_contigs_marker_size, + assign_contigs_marker_symbol, + create_metagenome, +) + +__all__ = [ + "set_badge_color", + "create_metagenome", + "create_metagenome_model", + "initialize_refinement", + "assign_contigs_marker_symbol", + "assign_contigs_marker_size", +] diff --git a/automappa/pages/home/tasks/sample_cards.py b/automappa/pages/home/tasks/sample_cards.py new file mode 100644 index 00000000..53521dd6 --- /dev/null +++ b/automappa/pages/home/tasks/sample_cards.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python + +from typing import List, Optional, Tuple, Union +from sqlmodel import Session, case, func, select +from automappa.data import loader +from automappa.data.database import engine +from automappa.data.models import Contig, Marker +from automappa.tasks import queue + + +@queue.task(bind=True) +def create_metagenome( + self, + name: str, + metagenome_fpath: str, + binning_fpath: str, + markers_fpath: str, + connections_fpath: Union[List[str], None] = None, +) -> Tuple[str, int]: + # TODO Create sample should be async so sample_card with loader + # is displayed and user can continue with navigation + # TODO Disable "new sample" button while create sample is in progress + metagenome = loader.create_sample_metagenome( + name, metagenome_fpath, binning_fpath, markers_fpath, connections_fpath + ) + loader.create_initial_refinements(metagenome.id) + return metagenome.name, metagenome.id + + +@queue.task(bind=True) +def create_metagenome_model( + self, + name: str, + metagenome_fpath: str, + binning_fpath: str, + markers_fpath: str, + connections_fpath: Optional[str] = None, +) -> int: + metagenome = loader.create_sample_metagenome( + name, metagenome_fpath, binning_fpath, markers_fpath, connections_fpath + ) + return metagenome.id + + +@queue.task(bind=True) +def initialize_refinement(self, metagenome_id: int) -> None: + loader.create_initial_refinements(metagenome_id) + + +@queue.task(bind=True) +def assign_contigs_marker_symbol(self, metagenome_id: int) -> None: + subquery = ( + select( + [ + Contig.id, + case( + [ + (func.count(Marker.id) == 0, "circle"), + (func.count(Marker.id) == 1, "square"), + (func.count(Marker.id) == 2, "diamond"), + (func.count(Marker.id) == 3, "triangle-up"), + (func.count(Marker.id) == 4, "x"), + (func.count(Marker.id) == 5, "pentagon"), + (func.count(Marker.id) == 6, "hexagon2"), + (func.count(Marker.id) >= 7, "hexagram"), + ], + else_="circle", + ).label("symbol"), + ] + ) + .select_from(Contig) + .join(Marker, isouter=True) + .group_by(Contig.id) + .subquery() + ) + stmt = ( + select(Contig, subquery.c.symbol) + .select_from(Contig) + .join(subquery, subquery.c.id == Contig.id) + ) + stmt = stmt.where(Contig.metagenome_id == metagenome_id) + with Session(engine) as session: + results = session.exec(stmt).all() + for contig, symbol in results: + contig.marker_symbol = symbol + session.add(contig) + + session.commit() + + +@queue.task(bind=True) +def assign_contigs_marker_size(self, metagenome_id: int) -> None: + subquery = ( + select( + [ + Contig.id, + case( + [ + (func.count(Marker.id) == 0, 7), + (func.count(Marker.id) == 1, 8), + (func.count(Marker.id) == 2, 9), + (func.count(Marker.id) == 3, 10), + (func.count(Marker.id) == 4, 11), + (func.count(Marker.id) == 5, 12), + (func.count(Marker.id) == 6, 13), + (func.count(Marker.id) >= 7, 14), + ], + else_=7, + ).label("size"), + ] + ) + .select_from(Contig) + .join(Marker, isouter=True) + .group_by(Contig.id) + .subquery() + ) + stmt = ( + select(Contig, subquery.c.size) + .select_from(Contig) + .join(subquery, subquery.c.id == Contig.id) + ) + stmt = stmt.where(Contig.metagenome_id == metagenome_id) + with Session(engine) as session: + results = session.exec(stmt).all() + for contig, size in results: + contig.marker_size = size + session.add(contig) + + session.commit() diff --git a/automappa/pages/home/tasks/task_status_badge.py b/automappa/pages/home/tasks/task_status_badge.py new file mode 100644 index 00000000..37d2d915 --- /dev/null +++ b/automappa/pages/home/tasks/task_status_badge.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python +import time + +from automappa.tasks import queue + + +@queue.task(bind=True) +def set_badge_color(self, color: str) -> str: + time.sleep(15) + return color + + +if __name__ == "__main__": + pass diff --git a/automappa/pages/mag_refinement/__init__.py b/automappa/pages/mag_refinement/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automappa/pages/mag_refinement/components/__init__.py b/automappa/pages/mag_refinement/components/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automappa/pages/mag_refinement/components/binning_refinements_clear_button.py b/automappa/pages/mag_refinement/components/binning_refinements_clear_button.py new file mode 100644 index 00000000..39cb5c4d --- /dev/null +++ b/automappa/pages/mag_refinement/components/binning_refinements_clear_button.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +from typing import List, Protocol +from dash_extensions.enrich import DashProxy, Output, Input, html, ctx +import dash_mantine_components as dmc +from dash_iconify import DashIconify + +from automappa.components import ids + + +class RefinementsClearButtonDataSource(Protocol): + def clear_refinements(self, metagenome_id: int) -> int: + ... + + def has_user_refinements(self, metagenome_id: int) -> bool: + ... + + +def render(app: DashProxy, source: RefinementsClearButtonDataSource) -> html.Div: + @app.callback( + Output(ids.REFINEMENTS_CLEARED_NOTIFICATION, "children"), + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.REFINEMENTS_CLEAR_BUTTON, "n_clicks"), + prevent_initial_call=True, + ) + def clear_refinements_callback(metagenome_id: int, btn: int) -> dmc.Notification: + deleted_refinements_count = source.clear_refinements(metagenome_id) + message = f"Successfully cleared {deleted_refinements_count:,}" + title = ( + "Refinement cleared!" + if deleted_refinements_count == 1 + else "Refinements cleared!" + ) + message += " refinement" if deleted_refinements_count == 1 else " refinements" + return dmc.Notification( + id=ids.REFINEMENTS_NOTIFICATION, + action="show", + message=message, + title=title, + icon=DashIconify(icon="icomoon-free:fire", color="#f78f1f"), + color="dark", + autoClose=60000, + ) + + @app.callback( + Output(ids.REFINEMENTS_CLEAR_BUTTON, "disabled"), + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.REFINEMENTS_CLEARED_NOTIFICATION, "children"), + Input(ids.MAG_REFINEMENTS_SAVE_BUTTON, "n_clicks"), + ) + def disable_clear_button( + metagenome_id: int, cleared_notification: List[dmc.Notification], save_btn: int + ) -> bool: + return not source.has_user_refinements(metagenome_id) + + return html.Div( + [ + dmc.Button( + children=[dmc.Text("Clear Refinements")], + id=ids.REFINEMENTS_CLEAR_BUTTON, + variant="filled", + color="red", + leftIcon=DashIconify(icon="icomoon-free:fire", color="white"), + ), + html.Div(id=ids.REFINEMENTS_CLEARED_NOTIFICATION), + ] + ) diff --git a/automappa/pages/mag_refinement/components/binning_refinements_download_button.py b/automappa/pages/mag_refinement/components/binning_refinements_download_button.py new file mode 100644 index 00000000..b5cf4c34 --- /dev/null +++ b/automappa/pages/mag_refinement/components/binning_refinements_download_button.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Dict, Protocol + +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +import dash_mantine_components as dmc +from dash_iconify import DashIconify +import pandas as pd + +from automappa.components import ids + + +class RefinementsDownloadButtonDataSource(Protocol): + def get_refinements_dataframe(self, metagenome_id: int) -> pd.DataFrame: + ... + + def has_user_refinements(self, metagenome_id: int) -> bool: + ... + + +def render(app: DashProxy, source: RefinementsDownloadButtonDataSource) -> html.Div: + @app.callback( + Output(ids.REFINEMENTS_DOWNLOAD, "data"), + [ + Input(ids.REFINEMENTS_DOWNLOAD_BUTTON, "n_clicks"), + Input(ids.METAGENOME_ID_STORE, "data"), + ], + ) + def download_refinements( + n_clicks: int, + metagenome_id: int, + ) -> Dict[str, "str | bool"]: + if not n_clicks: + raise PreventUpdate + df = source.get_refinements_dataframe(metagenome_id) + return dcc.send_data_frame(df.to_csv, "refinements.csv", index=False) + + @app.callback( + Output(ids.REFINEMENTS_DOWNLOAD_BUTTON, "disabled"), + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.MAG_REFINEMENTS_SAVE_BUTTON, "n_clicks"), + Input(ids.REFINEMENTS_CLEARED_NOTIFICATION, "children"), + ) + def disable_download_button( + metagenome_id: int, save_btn: int, clear_btn_notification + ) -> bool: + return not source.has_user_refinements(metagenome_id) + + # Download Refinements Button + return html.Div( + [ + dmc.Button( + "Download Refinements", + id=ids.REFINEMENTS_DOWNLOAD_BUTTON, + leftIcon=[DashIconify(icon="line-md:download-loop", height=25)], + n_clicks=0, + color="dark", + variant="outline", + fullWidth=False, + ), + dcc.Download(id=ids.REFINEMENTS_DOWNLOAD), + ] + ) diff --git a/automappa/pages/mag_refinement/components/binning_refinements_summary_button.py b/automappa/pages/mag_refinement/components/binning_refinements_summary_button.py new file mode 100644 index 00000000..0d65edc5 --- /dev/null +++ b/automappa/pages/mag_refinement/components/binning_refinements_summary_button.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from dash_extensions.enrich import html +import dash_mantine_components as dmc +from automappa.components import ids + + +# Summarize Refinements Button +def render() -> html.Div: + return html.Div( + dmc.Button( + "Summarize Refinements", + id=ids.REFINEMENTS_SUMMARY_BUTTON, + n_clicks=0, + color="dark", + fullWidth=True, + ), + # TODO: Create background task to compute binning summary metrics + # TODO: Create downloader task to download file of computed summary metrics + ) diff --git a/automappa/pages/mag_refinement/components/color_by_col_dropdown.py b/automappa/pages/mag_refinement/components/color_by_col_dropdown.py new file mode 100644 index 00000000..fd88fadf --- /dev/null +++ b/automappa/pages/mag_refinement/components/color_by_col_dropdown.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +from typing import Dict, List, Literal, Protocol +from dash_extensions.enrich import DashProxy, html +import dash_mantine_components as dmc + +from automappa.components import ids + + +class ColorByColDropdownDataSource(Protocol): + def get_color_by_column_options(self) -> List[Dict[Literal["label", "value"], str]]: + ... + + +def render(app: DashProxy, source: ColorByColDropdownDataSource) -> html.Div: + options = source.get_color_by_column_options() + radios = [ + dmc.Radio(option["label"], value=option["value"], color="orange") + for option in options + ] + return html.Div( + [ + html.Label("Color contigs by:"), + dmc.RadioGroup( + radios, + id=ids.COLOR_BY_COLUMN_DROPDOWN, + value=ids.COLOR_BY_COLUMN_DROPDOWN_VALUE_DEFAULT, + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/contig_cytoscape.py b/automappa/pages/mag_refinement/components/contig_cytoscape.py new file mode 100644 index 00000000..d2c4b9c6 --- /dev/null +++ b/automappa/pages/mag_refinement/components/contig_cytoscape.py @@ -0,0 +1,116 @@ +from typing import Dict, List, Literal, Optional, Protocol, Union +import dash_cytoscape as cyto +from dash_extensions.enrich import DashProxy, html, Output, Input, dcc + +from automappa.components import ids + + +class ContigCytoscapeDataSource(Protocol): + def get_cytoscape_elements( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List[ + Dict[ + Literal["data"], + Dict[ + Literal["id", "label", "source", "target", "connections"], + Union[str, int], + ], + ] + ]: + ... + + def get_cytoscape_stylesheet( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List[ + Dict[ + Literal["selector", "style"], + Union[Literal["node", "edge"], Dict[str, Union[str, int, float]]], + ] + ]: + ... + + +def render(app: DashProxy, source: ContigCytoscapeDataSource) -> html.Div: + @app.callback( + Output(ids.CONTIG_CYTOSCAPE, "stylesheet"), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.SCATTERPLOT_2D_FIGURE, "selectedData"), + ], + prevent_initial_call=True, + ) + def highlight_selected_contigs( + metagenome_id: int, + selected_contigs: Dict[str, List[Dict[str, str]]], + ) -> List[ + Dict[ + Literal["selector", "style"], + Union[Literal["node", "edge"], Dict[str, Union[str, int, float]]], + ] + ]: + headers = {point["text"] for point in selected_contigs["points"]} + stylesheet = source.get_cytoscape_stylesheet(metagenome_id, headers) + + SELECTED_COLOR = "#B10DC9" + stylesheet += [ + { + "selector": "node", + "style": {"label": "data(label)", "opacity": 0.7}, + }, + { + "selector": "edge", + "style": { + "opacity": 0.4, + "curve-style": "bezier", + "label": "data(connections)", + }, + }, + ] + # TODO + # 1. Style connections using mappingtype + # (i.e. differentiate between start and end connections) + # - https://dash.plotly.com/cytoscape/styling#edge-arrows + # TODO + # 2. Add selector based on number of contig connections + # - https://dash.plotly.com/cytoscape/styling#comparing-data-items-using-selectors + # It looks like this could be done using the 'weight' key for the edge + # and then selecting using stylesheet = [{'selector': '[weight > 3]'}] + # where the '3' could be dynamically updated by a slider component (or other component) + return stylesheet + + @app.callback( + Output(ids.CONTIG_CYTOSCAPE, "elements"), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.SCATTERPLOT_2D_FIGURE, "selectedData"), + ], + prevent_initial_call=True, + ) + def update_cytoscape_elements( + metagenome_id: int, + selected_contigs: Dict[str, List[Dict[str, str]]], + ) -> List[ + Dict[ + Literal["data"], + Dict[ + Literal["id", "label", "source", "target", "connections"], + Union[str, int], + ], + ] + ]: + headers = {point["text"] for point in selected_contigs["points"]} + records = source.get_cytoscape_elements(metagenome_id, headers) + return records + + return html.Div( + dcc.Loading( + cyto.Cytoscape( + id=ids.CONTIG_CYTOSCAPE, + layout=dict(name="cose"), + style=dict(width="100%", height="600px"), + responsive=True, + ), + id=ids.LOADING_CONTIG_CYTOSCAPE, + type="graph", + ), + ) diff --git a/automappa/pages/mag_refinement/components/coverage_range_slider.py b/automappa/pages/mag_refinement/components/coverage_range_slider.py new file mode 100644 index 00000000..7f662479 --- /dev/null +++ b/automappa/pages/mag_refinement/components/coverage_range_slider.py @@ -0,0 +1,53 @@ +from typing import Protocol, Tuple +from dash_extensions.enrich import DashProxy, Output, Input, html +import dash_mantine_components as dmc +from dash_iconify import DashIconify + +from automappa.components import ids + + +class CoverageRangeSliderDataSource(Protocol): + def get_coverage_min_max_values(self, metagenome_id: int) -> Tuple[float, float]: + ... + + +def render(app: DashProxy, source: CoverageRangeSliderDataSource) -> html.Div: + @app.callback( + Output(ids.COVERAGE_RANGE_SLIDER, "max"), + Output(ids.COVERAGE_RANGE_SLIDER, "value"), + Input(ids.METAGENOME_ID_STORE, "data"), + ) + def update_slider_range( + metagenome_id: int, + ) -> Tuple[float, float, Tuple[float, float]]: + min_cov, max_cov = source.get_coverage_min_max_values(metagenome_id) + min_cov = round(min_cov, 2) + max_cov = round(max_cov, 2) + return max_cov, (min_cov, max_cov) + + return html.Div( + [ + dmc.Text("Coverage range slider"), + dmc.Space(h=30), + dmc.LoadingOverlay( + dmc.RangeSlider( + min=0, + showLabelOnHover=True, + labelTransition="fade", + labelTransitionDuration=1000, # in ms + color="gray", + size="lg", + thumbFromLabel="cov", + thumbSize=35, + thumbChildren=DashIconify(icon="iconamoon:sign-x-light", width=25), + id=ids.COVERAGE_RANGE_SLIDER, + ), + loaderProps=dict( + variant="oval", + color="dark", + size="sm", + ), + ), + dmc.Space(h=30), + ] + ) diff --git a/automappa/pages/mag_refinement/components/hide_selections_switch.py b/automappa/pages/mag_refinement/components/hide_selections_switch.py new file mode 100644 index 00000000..4dcad0e9 --- /dev/null +++ b/automappa/pages/mag_refinement/components/hide_selections_switch.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from typing import List, Protocol +from dash_extensions.enrich import html, Input, Output, DashProxy +import dash_mantine_components as dmc + +from automappa.components import ids + + +class HideRefinementsSwitchDataSource(Protocol): + def has_user_refinements(self, metagenome_id: int) -> bool: + ... + + +def render(app: DashProxy, source: HideRefinementsSwitchDataSource) -> html.Div: + @app.callback( + Output(ids.HIDE_SELECTIONS_TOGGLE, "disabled"), + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.MAG_REFINEMENTS_SAVE_BUTTON, "n_clicks"), + Input(ids.REFINEMENTS_CLEARED_NOTIFICATION, "children"), + ) + def disable_switch( + metagenome_id: int, save_btn: int, cleared_notification: List[dmc.Notification] + ) -> bool: + return not source.has_user_refinements(metagenome_id) + + return html.Div( + dmc.Tooltip( + dmc.Switch( + id=ids.HIDE_SELECTIONS_TOGGLE, + checked=ids.HIDE_SELECTIONS_TOGGLE_VALUE_DEFAULT, + size="lg", + radius="md", + color="indigo", + label="Hide MAG Refinements", + offLabel="Off", + onLabel="On", + ), + label='Toggling this to "On" will hide your manually-curated MAG refinement groups', + position="bottom-start", + openDelay=1000, # milliseconds + transition="pop-bottom-left", + transitionDuration=500, + multiline=True, + width=300, + withArrow=True, + ) + ) diff --git a/automappa/pages/mag_refinement/components/kmer_size_dropdown.py b/automappa/pages/mag_refinement/components/kmer_size_dropdown.py new file mode 100644 index 00000000..bacf0b34 --- /dev/null +++ b/automappa/pages/mag_refinement/components/kmer_size_dropdown.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from dash_extensions.enrich import dcc, html +from automappa.components import ids + + +def render() -> html.Div: + return html.Div( + [ + html.Label("K-mer size:"), + dcc.Dropdown( + id=ids.KMER_SIZE_DROPDOWN, + options=[3, 4, 5], + value=ids.KMER_SIZE_DROPDOWN_VALUE_DEFAULT, + clearable=False, + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/mag_metrics_table.py b/automappa/pages/mag_refinement/components/mag_metrics_table.py new file mode 100644 index 00000000..ffea7436 --- /dev/null +++ b/automappa/pages/mag_refinement/components/mag_metrics_table.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Dict, List, Literal, Optional, Protocol, Union +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html +import dash_ag_grid as dag + +from automappa.components import ids + + +class MagMetricsTableDataSource(Protocol): + def get_marker_overview( + self, metagenome_id: int + ) -> List[Dict[Literal["metric", "metric_value"], Union[str, int, float]]]: + ... + + def get_mag_metrics_row_data( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List[Dict[Literal["metric", "metric_value"], Union[str, int, float]]]: + ... + + +def render(app: DashProxy, source: MagMetricsTableDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_METRICS_DATATABLE, "rowData", allow_duplicate=True), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.SCATTERPLOT_2D_FIGURE, "selectedData"), + ], + prevent_initial_call=True, + ) + def compute_mag_metrics( + metagenome_id: int, + selected_contigs: Optional[Dict[str, List[Dict[str, str]]]], + ) -> List[Dict[Literal["metric", "metric_value"], Union[str, int, float]]]: + headers = ( + {point["text"] for point in selected_contigs["points"]} + if selected_contigs + else None + ) + row_data = source.get_mag_metrics_row_data( + metagenome_id=metagenome_id, headers=headers + ) + return row_data + + @app.callback( + Output(ids.MAG_METRICS_DATATABLE, "rowData", allow_duplicate=True), + Input(ids.METAGENOME_ID_STORE, "data"), + prevent_initial_call="initial_duplicate", + ) + def compute_markers_overview( + metagenome_id: int, + ) -> List[Dict[Literal["metric", "metric_value"], Union[str, int, float]]]: + row_data = source.get_marker_overview(metagenome_id) + return row_data + + GREEN = "#2FCC90" + YELLOW = "#f2e530" + ORANGE = "#f57600" + MIMAG_STYLE_CONDITIONS = { + "styleConditions": [ + # High-quality >90% complete > 95% pure + { + "condition": "params.data.metric == 'Completeness (%)' && params.value > 90", + "style": {"backgroundColor": GREEN}, + }, + { + "condition": "params.data.metric == 'Purity (%)' && params.value > 95", + "style": {"backgroundColor": GREEN}, + }, + # Medium-quality >=50% complete > 90% pure + { + "condition": "params.data.metric == 'Completeness (%)' && params.value >= 50", + "style": {"backgroundColor": YELLOW}, + }, + { + "condition": "params.data.metric == 'Purity (%)' && params.value > 90", + "style": {"backgroundColor": YELLOW}, + }, + # Low-quality <50% complete < 90% pure + { + "condition": "params.data.metric == 'Completeness (%)' && params.value < 50", + "style": {"backgroundColor": ORANGE, "color": "white"}, + }, + { + "condition": "params.data.metric == 'Purity (%)' && params.value < 90", + "style": {"backgroundColor": ORANGE, "color": "white"}, + }, + ] + } + + column_defs = [ + {"field": "metric", "headerName": "MAG Metric", "resizable": True}, + { + "field": "metric_value", + "headerName": "Value", + "cellStyle": MIMAG_STYLE_CONDITIONS, + }, + ] + return html.Div( + [ + html.Label("Table 1. MAG Marker Metrics"), + dcc.Loading( + dag.AgGrid( + id=ids.MAG_METRICS_DATATABLE, + className="ag-theme-material", + columnSize="responsiveSizeToFit", + style={"height": 600, "width": "100%"}, + columnDefs=column_defs, + ), + id=ids.LOADING_MAG_METRICS_DATATABLE, + type="dot", + color="#646569", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/mag_refinement_coverage_boxplot.py b/automappa/pages/mag_refinement/components/mag_refinement_coverage_boxplot.py new file mode 100644 index 00000000..4676241b --- /dev/null +++ b/automappa/pages/mag_refinement/components/mag_refinement_coverage_boxplot.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Dict, List, Optional, Protocol, Tuple +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html +from plotly import graph_objects as go + +from automappa.utils.figures import metric_boxplot + +from automappa.components import ids + + +class RefinementCoverageBoxplotDataSource(Protocol): + def get_coverage_boxplot_records( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List[Tuple[str, List[float]]]: + ... + + +def render(app: DashProxy, source: RefinementCoverageBoxplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_REFINEMENT_COVERAGE_BOXPLOT, "figure"), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.SCATTERPLOT_2D_FIGURE, "selectedData"), + ], + ) + def subset_coverage_boxplot_by_scatterplot_selection( + metagenome_id: int, + selected_data: Dict[str, List[Dict[str, str]]], + ) -> go.Figure: + headers = ( + {point["text"] for point in selected_data["points"]} + if selected_data + else None + ) + data = source.get_coverage_boxplot_records(metagenome_id, headers=headers) + fig = metric_boxplot(data, boxmean="sd") + return fig + + return html.Div( + [ + html.Label("Figure 4: MAG Refinement Coverage Boxplot"), + dcc.Loading( + id=ids.LOADING_MAG_REFINEMENT_COVERAGE_BOXPLOT, + children=[ + dcc.Graph( + id=ids.MAG_REFINEMENT_COVERAGE_BOXPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ) + ], + type="dot", + color="#646569", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/mag_refinement_gc_content_boxplot.py b/automappa/pages/mag_refinement/components/mag_refinement_gc_content_boxplot.py new file mode 100644 index 00000000..ca573586 --- /dev/null +++ b/automappa/pages/mag_refinement/components/mag_refinement_gc_content_boxplot.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Dict, List, Optional, Protocol, Tuple +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html +from plotly import graph_objects as go + +from automappa.utils.figures import metric_boxplot + +from automappa.components import ids + + +class RefinementGcContentBoxplotDataSource(Protocol): + def get_gc_content_boxplot_records( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List[Tuple[str, List[float]]]: + ... + + +def render(app: DashProxy, source: RefinementGcContentBoxplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_REFINEMENT_GC_CONTENT_BOXPLOT, "figure"), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.SCATTERPLOT_2D_FIGURE, "selectedData"), + ], + ) + def subset_gc_content_boxplot_by_scatterplot_selection( + metagenome_id: int, + selected_data: Dict[str, List[Dict[str, str]]], + ) -> go.Figure: + headers = ( + {point["text"] for point in selected_data["points"]} + if selected_data + else None + ) + data = source.get_gc_content_boxplot_records( + metagenome_id=metagenome_id, headers=headers + ) + fig = metric_boxplot(data, boxmean="sd") + return fig + + return html.Div( + [ + html.Label("Figure 5: MAG Refinement GC Content Boxplot"), + dcc.Loading( + id=ids.LOADING_MAG_REFINEMENT_GC_CONTENT_BOXPLOT, + children=[ + dcc.Graph( + id=ids.MAG_REFINEMENT_GC_CONTENT_BOXPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ) + ], + type="dot", + color="#0479a8", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/mag_refinement_length_boxplot.py b/automappa/pages/mag_refinement/components/mag_refinement_length_boxplot.py new file mode 100644 index 00000000..12a5cec0 --- /dev/null +++ b/automappa/pages/mag_refinement/components/mag_refinement_length_boxplot.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Dict, List, Optional, Protocol, Tuple +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html +from plotly import graph_objects as go + +from automappa.utils.figures import ( + metric_boxplot, +) + +from automappa.components import ids + + +class RefinementLengthBoxplotDataSource(Protocol): + def get_length_boxplot_records( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List[Tuple[str, List[int]]]: + ... + + +def render(app: DashProxy, source: RefinementLengthBoxplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_REFINEMENT_LENGTH_BOXPLOT, "figure"), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.SCATTERPLOT_2D_FIGURE, "selectedData"), + ], + ) + def subset_length_boxplot_by_scatterplot_selection( + metagenome_id: int, + selected_data: Dict[str, List[Dict[str, str]]], + ) -> go.Figure: + headers = ( + {point["text"] for point in selected_data["points"]} + if selected_data + else None + ) + data = source.get_length_boxplot_records( + metagenome_id=metagenome_id, headers=headers + ) + fig = metric_boxplot(data=data) + return fig + + return html.Div( + [ + html.Label("Figure 6: MAG Refinement Length Boxplot"), + dcc.Loading( + id=ids.LOADING_MAG_REFINEMENT_LENGTH_BOXPLOT, + children=[ + dcc.Graph( + id=ids.MAG_REFINEMENT_LENGTH_BOXPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ) + ], + type="dot", + color="#0479a8", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/marker_symbols_legend.py b/automappa/pages/mag_refinement/components/marker_symbols_legend.py new file mode 100644 index 00000000..af60606d --- /dev/null +++ b/automappa/pages/mag_refinement/components/marker_symbols_legend.py @@ -0,0 +1,109 @@ +from dash_extensions.enrich import html +import dash_bootstrap_components as dbc +import dash_mantine_components as dmc +from dash_iconify import DashIconify + + +# TODO: Refactor to update scatterplot legend with update marker symbol traces... +def render() -> html.Div: + return html.Div( + [ + dbc.Row( + dbc.Col(dmc.Title("Marker Symbol Count Legend", order=6)), + justify="start", + ), + dbc.Row( + [ + dbc.Col( + dmc.Badge( + ": 0", + leftSection=[DashIconify(icon="ph:circle-bold")], + size="lg", + radius="xl", + color="dark", + variant="outline", + fullWidth=True, + ) + ), + dbc.Col( + dmc.Badge( + ": 1", + leftSection=[DashIconify(icon="uil:square")], + size="lg", + radius="xl", + color="dark", + variant="outline", + fullWidth=True, + ) + ), + dbc.Col( + dmc.Badge( + ": 2", + leftSection=[DashIconify(icon="ph:diamond-bold")], + size="lg", + radius="xl", + color="dark", + variant="outline", + fullWidth=True, + ) + ), + dbc.Col( + dmc.Badge( + ": 3", + leftSection=[DashIconify(icon="tabler:triangle")], + size="lg", + radius="xl", + color="dark", + variant="outline", + fullWidth=True, + ) + ), + dbc.Col( + dmc.Badge( + ": 4", + leftSection=[DashIconify(icon="tabler:x")], + size="lg", + radius="xl", + color="dark", + variant="outline", + fullWidth=True, + ) + ), + dbc.Col( + dmc.Badge( + ": 5", + leftSection=[DashIconify(icon="tabler:pentagon")], + size="lg", + radius="xl", + color="dark", + variant="outline", + fullWidth=True, + ) + ), + dbc.Col( + dmc.Badge( + ": 6", + leftSection=[DashIconify(icon="tabler:hexagon")], + size="lg", + radius="xl", + color="dark", + variant="outline", + fullWidth=True, + ) + ), + dbc.Col( + dmc.Badge( + ": 7+", + leftSection=[DashIconify(icon="mdi:hexagram-outline")], + size="lg", + radius="xl", + color="dark", + variant="outline", + fullWidth=True, + ) + ), + ], + justify="evenly", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/norm_method_dropdown.py b/automappa/pages/mag_refinement/components/norm_method_dropdown.py new file mode 100644 index 00000000..f3ed1b4c --- /dev/null +++ b/automappa/pages/mag_refinement/components/norm_method_dropdown.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from dash_extensions.enrich import dcc, html +from automappa.components import ids + + +def render() -> html.Div: + return html.Div( + [ + html.Label("norm. method:"), + dcc.Dropdown( + id=ids.NORM_METHOD_DROPDOWN, + options=[ + dict(label="CLR", value="am_clr"), + dict(label="ILR", value="ilr"), + ], + value=ids.NORM_METHOD_DROPDOWN_VALUE_DEFAULT, + clearable=False, + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/refinements_table.py b/automappa/pages/mag_refinement/components/refinements_table.py new file mode 100644 index 00000000..06d1abc5 --- /dev/null +++ b/automappa/pages/mag_refinement/components/refinements_table.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + + +import dash_ag_grid as dag +from typing import Dict, List, Literal, Protocol, Union +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html +from datetime import datetime + +from automappa.components import ids + + +class RefinementsTableDataSource(Protocol): + def get_refinements_row_data( + self, metagenome_id: int + ) -> List[ + Dict[ + Literal["refinement_id", "timestamp", "contigs"], + Union[str, int, datetime], + ] + ]: + ... + + +def render(app: DashProxy, source: RefinementsTableDataSource) -> html.Div: + @app.callback( + Output(ids.REFINEMENTS_TABLE, "rowData"), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.MAG_REFINEMENTS_SAVE_BUTTON, "n_clicks"), + Input(ids.REFINEMENTS_CLEARED_NOTIFICATION, "children"), + ], + ) + def refinements_table_callback( + metagenome_id: int, save_btn: int, notification + ) -> List[ + Dict[ + Literal["refinement_id", "timestamp", "contigs"], + Union[str, int, datetime], + ] + ]: + row_data = source.get_refinements_row_data(metagenome_id) + return row_data + + column_defs = [ + {"field": "refinement_id", "headerName": "ID", "resizable": False}, + {"field": "timestamp", "headerName": "Timestamp"}, + {"field": "contigs", "headerName": "Contigs"}, + ] + + return html.Div( + [ + html.Label("Table 2. MAG Refinements"), + dcc.Loading( + dag.AgGrid( + id=ids.REFINEMENTS_TABLE, + className="ag-theme-material", + columnSize="responsiveSizeToFit", + style=dict(height=600, width="100%"), + columnDefs=column_defs, + ), + id=ids.LOADING_REFINEMENTS_TABLE, + type="circle", + color="#646569", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/save_selection_button.py b/automappa/pages/mag_refinement/components/save_selection_button.py new file mode 100644 index 00000000..789db640 --- /dev/null +++ b/automappa/pages/mag_refinement/components/save_selection_button.py @@ -0,0 +1,73 @@ +from typing import Dict, List, Protocol +import dash_mantine_components as dmc +from dash.exceptions import PreventUpdate +from dash_iconify import DashIconify + +from dash_extensions.enrich import DashProxy, html, Output, Input +from automappa.components import ids + + +class SaveSelectionButtonDataSource(Protocol): + def save_selections_to_refinement( + self, metagenome_id: int, headers: List[str] + ) -> None: + ... + + +def render(app: DashProxy, source: SaveSelectionButtonDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_REFINEMENTS_SAVE_BUTTON, "disabled"), + Input(ids.SCATTERPLOT_2D_FIGURE, "selectedData"), + ) + def disable_save_button( + selected_data: Dict[str, List[Dict[str, str]]], + ) -> bool: + if ( + selected_data + and len({point["text"] for point in selected_data["points"]}) > 0 + ): + return False + return True + + @app.callback( + Output(ids.MAG_REFINEMENTS_SAVE_BUTTON, "n_clicks"), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.SCATTERPLOT_2D_FIGURE, "selectedData"), + Input(ids.MAG_REFINEMENTS_SAVE_BUTTON, "n_clicks"), + ], + prevent_initial_call=True, + ) + def store_binning_refinement_selections( + metagenome_id: int, + selected_data: Dict[str, List[Dict[str, str]]], + n_clicks: int, + ) -> int: + # Initial load... + if not n_clicks or (n_clicks and not selected_data) or not selected_data: + raise PreventUpdate + headers = {point["text"] for point in selected_data["points"]} + source.save_selections_to_refinement( + metagenome_id=metagenome_id, headers=headers + ) + return 0 + + return html.Div( + dmc.Tooltip( + dmc.Button( + "Save MAG", + id=ids.MAG_REFINEMENTS_SAVE_BUTTON, + n_clicks=0, + size="md", + leftIcon=[DashIconify(icon="carbon:clean")], + variant="gradient", + gradient={"from": "#642E8D", "to": "#1f58a6", "deg": 150}, + disabled=True, + fullWidth=True, + ), + label="Save selection to MAG refinement", + transitionDuration=500, + openDelay=1500, + transition="fade", + ) + ) diff --git a/automappa/pages/mag_refinement/components/scatterplot_2d.py b/automappa/pages/mag_refinement/components/scatterplot_2d.py new file mode 100644 index 00000000..ca4f184a --- /dev/null +++ b/automappa/pages/mag_refinement/components/scatterplot_2d.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Dict, List, Literal, Optional, Protocol, Set, Tuple, Union +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html +from plotly import graph_objects as go + +from automappa.utils.figures import ( + format_axis_title, + get_scatterplot_2d, +) + +from automappa.components import ids + + +class Scatterplot2dDataSource(Protocol): + def get_scatterplot2d_records( + self, + metagenome_id: int, + x_axis: str, + y_axis: str, + color_by_col: str, + headers: Optional[List[str]], + ) -> Dict[ + Literal["x", "y", "marker_symbol", "marker_size", "text", "customdata"], + List[Union[float, str, Tuple[float, float, int]]], + ]: + ... + + def get_contig_headers_from_coverage_range( + self, metagenome_id: int, coverage_range: Tuple[float, float] + ) -> Set[str]: + ... + + def get_user_refinements_contig_headers(self, metagenome_id: int) -> Set[str]: + """Retrieve all contig headers for Refinements that are not outdated and that were not initially uploaded by the user""" + ... + + +def get_hovertemplate(x_axis: str, y_axis: str) -> str: + # Hovertemplate + x_hover_title = format_axis_title(x_axis) + y_hover_title = format_axis_title(y_axis) + text_hover_label = "Contig: %{text}" + coverage_label = "Coverage: %{customdata[0]:.2f}" + gc_content_label = "GC%: %{customdata[1]:.2f}" + length_label = "Length: %{customdata[2]:,} bp" + x_hover_label = f"{x_hover_title}: " + "%{x:.2f}" + y_hover_label = f"{y_hover_title}: " + "%{y:.2f}" + hovertemplate = "
".join( + [ + text_hover_label, + coverage_label, + gc_content_label, + length_label, + x_hover_label, + y_hover_label, + ] + ) + return hovertemplate + + +def get_traces( + data: Dict[ + str, + Dict[ + Literal[ + "x", "y", "z", "marker_size", "marker_symbol", "text", "customdata" + ], + List[Union[float, str, Tuple[float, float, int]]], + ], + ], + hovertemplate: Optional[str] = "Contig: %{text}", +) -> List[go.Scattergl]: + return [ + go.Scattergl( + x=trace["x"], + y=trace["y"], + text=trace["text"], # contig header + name=name, # groupby (color by column) value + mode="markers", + marker=dict( + size=trace["marker_size"], + line=dict(width=0.1, color="black"), + symbol=trace["marker_symbol"], + ), + customdata=trace["customdata"], + opacity=0.45, + hoverinfo="all", + hovertemplate=hovertemplate, + ) + for name, trace in data.items() + ] + + +def render(app: DashProxy, source: Scatterplot2dDataSource) -> html.Div: + @app.callback( + Output(ids.SCATTERPLOT_2D_FIGURE, "figure"), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.AXES_2D_DROPDOWN, "value"), + Input(ids.SCATTERPLOT_2D_LEGEND_TOGGLE, "checked"), + Input(ids.COLOR_BY_COLUMN_DROPDOWN, "value"), + Input(ids.HIDE_SELECTIONS_TOGGLE, "checked"), + Input(ids.COVERAGE_RANGE_SLIDER, "value"), + Input(ids.MAG_REFINEMENTS_SAVE_BUTTON, "n_clicks"), + ], + ) + def scatterplot_2d_figure_callback( + metagenome_id: int, + axes_columns: str, + show_legend: bool, + color_by_col: str, + hide_selection_toggle: bool, + coverage_range: Tuple[float, float], + btn_clicks: int, + ) -> go.Figure: + # NOTE: btn_clicks is an input so this figure is updated when new refinements are saved + # data: + # - data.x_axis # continuous values + # - data.y_axis # continuous values + # - data.text # Contig.header + # - data.groupby_value # Categoricals + # - data.marker_size + # - data.marker_symbol + # - data.customdata i.e. List[Tuple(coverage, gc_content, length)] + + x_axis, y_axis = axes_columns.split("|") + hovertemplate = get_hovertemplate(x_axis, y_axis) + + headers = source.get_contig_headers_from_coverage_range( + metagenome_id, coverage_range + ) + + if hide_selection_toggle: + refinements_headers = source.get_user_refinements_contig_headers( + metagenome_id + ) + headers = headers.difference(refinements_headers) + + records = source.get_scatterplot2d_records( + metagenome_id=metagenome_id, + x_axis=x_axis, + y_axis=y_axis, + color_by_col=color_by_col, + headers=headers, + ) + + traces = get_traces(records, hovertemplate=hovertemplate) + RIGHT_MARGIN = 20 + LEFT_MARGIN = 20 + BOTTOM_MARGIN = 20 + TOP_MARGIN = 20 + legend = go.layout.Legend(visible=show_legend, x=1, y=1) + # NOTE: Changing `uirevision` will trigger the graph to change + # graph properties state (like zooming, panning, clicking on legend items). + # i.e. if the axes change we want to reset the ui + # See: https://community.plotly.com/t/preserving-ui-state-like-zoom-in-dcc-graph-with-uirevision-with-dash/15793 + # for more details + layout = go.Layout( + legend=legend, + margin=dict(r=RIGHT_MARGIN, b=BOTTOM_MARGIN, l=LEFT_MARGIN, t=TOP_MARGIN), + hovermode="closest", + clickmode="event+select", + uirevision=axes_columns, + xaxis=go.layout.XAxis(title=format_axis_title(x_axis)), + yaxis=go.layout.YAxis(title=format_axis_title(y_axis)), + height=600, + ) + return go.Figure(data=traces, layout=layout) + + return html.Div( + [ + html.Label("Figure 1: 2D Metagenome Overview"), + dcc.Loading( + dcc.Graph( + id=ids.SCATTERPLOT_2D_FIGURE, + clear_on_unhover=True, + config={"displayModeBar": True, "displaylogo": False}, + ), + id=ids.LOADING_SCATTERPLOT_2D, + type="graph", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/scatterplot_2d_axes_dropdown.py b/automappa/pages/mag_refinement/components/scatterplot_2d_axes_dropdown.py new file mode 100644 index 00000000..44d02ed5 --- /dev/null +++ b/automappa/pages/mag_refinement/components/scatterplot_2d_axes_dropdown.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Dict, List, Literal, Protocol +from dash_extensions.enrich import DashProxy, dcc, html +import dash_mantine_components as dmc +from automappa.components import ids + + +class Scatterplot2dAxesDropdownDataSource(Protocol): + def get_scatterplot_2d_axes_options( + self, + ) -> List[Dict[Literal["label", "value", "disabled"], str]]: + ... + + +def render(app: DashProxy, source: Scatterplot2dAxesDropdownDataSource) -> html.Div: + options = [ + dmc.Radio(item["label"], value=item["value"], color="orange") + for item in source.get_scatterplot_2d_axes_options() + ] + return html.Div( + [ + html.Label("Axes:"), + dmc.RadioGroup( + options, + id=ids.AXES_2D_DROPDOWN, + value=ids.AXES_2D_DROPDOWN_VALUE_DEFAULT, + orientation="vertical", + size="sm", + spacing="xs", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/scatterplot_2d_legend_toggle.py b/automappa/pages/mag_refinement/components/scatterplot_2d_legend_toggle.py new file mode 100644 index 00000000..ecdf85bd --- /dev/null +++ b/automappa/pages/mag_refinement/components/scatterplot_2d_legend_toggle.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from dash_extensions.enrich import html +import dash_mantine_components as dmc +from automappa.components import ids + + +# Scatterplot 2D Legend Toggle +def render() -> html.Div: + return html.Div( + [ + html.Label("Legend"), + dmc.Switch( + id=ids.SCATTERPLOT_2D_LEGEND_TOGGLE, + checked=ids.SCATTERPLOT_2D_LEGEND_TOGGLE_VALUE_DEFAULT, + size="md", + color="dark", + offLabel="off", + onLabel="on", + label="Display", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/scatterplot_3d.py b/automappa/pages/mag_refinement/components/scatterplot_3d.py new file mode 100644 index 00000000..4af302ba --- /dev/null +++ b/automappa/pages/mag_refinement/components/scatterplot_3d.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- + +from typing import Dict, List, Literal, Optional, Protocol, Union +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html +from plotly import graph_objects as go + +from automappa.components import ids + +from automappa.utils.figures import format_axis_title + + +class Scatterplot3dDataSource(Protocol): + def get_scaterplot3d_records( + self, + metagenome_id: int, + x_axis: str, + y_axis: str, + z_axis: str, + color_by_col: str, + headers: Optional[List[str]], + ) -> Dict[ + str, + Dict[Literal["x", "y", "z", "marker_size", "text"], List[Union[float, str]]], + ]: + ... + + +def get_hovertemplate(x_axis_label: str, y_axis_label: str, z_axis_label: str) -> str: + x_hover_label = f"{x_axis_label}: " + "%{x:.2f}" + y_hover_label = f"{y_axis_label}: " + "%{y:.2f}" + z_hover_label = f"{z_axis_label}: " + "%{z:.2f}" + text_hover_label = "Contig: %{text}" + hovertemplate = "
".join( + [text_hover_label, z_hover_label, x_hover_label, y_hover_label] + ) + return hovertemplate + + +def get_traces( + data: Dict[ + str, + Dict[Literal["x", "y", "z", "marker_size", "text"], List[Union[float, str]]], + ], + hovertemplate: Optional[str] = "Contig: %{text}", +) -> List[go.Scatter3d]: + return [ + go.Scatter3d( + x=trace["x"], + y=trace["y"], + z=trace["z"], + text=trace["text"], # contig header + name=name, # groupby (color by column) value + mode="markers", + marker=dict(size=trace["marker_size"], line=dict(width=0.1, color="black")), + opacity=0.45, + hoverinfo="all", + hovertemplate=hovertemplate, + ) + for name, trace in data.items() + ] + + +def render(app: DashProxy, source: Scatterplot3dDataSource) -> html.Div: + @app.callback( + Output(ids.SCATTERPLOT_3D, "figure"), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.AXES_2D_DROPDOWN, "value"), + Input(ids.SCATTERPLOT_3D_ZAXIS_DROPDOWN, "value"), + Input(ids.SCATTERPLOT_3D_LEGEND_TOGGLE, "checked"), + Input(ids.COLOR_BY_COLUMN_DROPDOWN, "value"), + Input(ids.SCATTERPLOT_2D_FIGURE, "selectedData"), + ], + ) + def scatterplot_3d_figure_callback( + metagenome_id: int, + axes_columns: str, + z_axis: str, + show_legend: bool, + color_by_col: str, + selected_contigs: Dict[str, List[Dict[str, str]]], + ) -> go.Figure: + headers = ( + {point["text"] for point in selected_contigs["points"]} + if selected_contigs + else None + ) + if headers and len(headers) == 1: + raise PreventUpdate + x_axis, y_axis = axes_columns.split("|") + traces_data = source.get_scaterplot3d_records( + metagenome_id=metagenome_id, + x_axis=x_axis, + y_axis=y_axis, + z_axis=z_axis, + color_by_col=color_by_col, + headers=headers, + ) + x_axis_title, y_axis_title, z_axis_title, color_by_col_title = map( + format_axis_title, [x_axis, y_axis, z_axis, color_by_col] + ) + hovertemplate = get_hovertemplate(x_axis_title, y_axis_title, z_axis_title) + traces = get_traces(traces_data, hovertemplate) + legend = go.layout.Legend( + title=color_by_col_title, x=1, y=1, visible=show_legend + ) + layout = go.Layout( + legend=legend, + scene=dict( + xaxis=dict(title=x_axis_title), + yaxis=dict(title=y_axis_title), + zaxis=dict(title=z_axis_title), + ), + autosize=True, + margin=dict(r=0, b=0, l=0, t=25), + hovermode="closest", + ) + fig = go.Figure(data=traces, layout=layout) + return fig + + graph_config = { + "toImageButtonOptions": dict( + format="svg", + filename="mag-refinement-scatterplot3d-figure", + ), + "displayModeBar": True, + "displaylogo": False, + } + return html.Div( + [ + html.Label("Figure 2: 3D Metagenome Overview"), + dcc.Loading( + dcc.Graph( + id=ids.SCATTERPLOT_3D, + clear_on_unhover=True, + config=graph_config, + ), + id=ids.LOADING_SCATTERPLOT_3D, + type="graph", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/scatterplot_3d_legend_toggle.py b/automappa/pages/mag_refinement/components/scatterplot_3d_legend_toggle.py new file mode 100644 index 00000000..275ccd21 --- /dev/null +++ b/automappa/pages/mag_refinement/components/scatterplot_3d_legend_toggle.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import dash_mantine_components as dmc +from dash_extensions.enrich import html +from automappa.components import ids + + +# Scatterplot 3D Legend Toggle +def render() -> html.Div: + return html.Div( + [ + html.Label("Legend"), + dmc.Switch( + id=ids.SCATTERPLOT_3D_LEGEND_TOGGLE, + checked=ids.SCATTERPLOT_2D_LEGEND_TOGGLE_VALUE_DEFAULT, + size="md", + color="dark", + offLabel="off", + onLabel="on", + label="Display", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/scatterplot_3d_zaxis_dropdown.py b/automappa/pages/mag_refinement/components/scatterplot_3d_zaxis_dropdown.py new file mode 100644 index 00000000..710c92a2 --- /dev/null +++ b/automappa/pages/mag_refinement/components/scatterplot_3d_zaxis_dropdown.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from dash_extensions.enrich import html, DashProxy +import dash_mantine_components as dmc +from automappa.components import ids +from typing import Protocol, List, Literal, Dict + + +class Scatterplot3dDropdownOptionsDataSource(Protocol): + def get_scatterplot_3d_zaxis_dropdown_options( + self, + ) -> List[Dict[Literal["label", "value"], str]]: + ... + + +def render(app: DashProxy, source: Scatterplot3dDropdownOptionsDataSource) -> html.Div: + options = source.get_scatterplot_3d_zaxis_dropdown_options() + radio_items = [ + dmc.Radio(option["label"], value=option["value"], color="orange") + for option in options + ] + return html.Div( + [ + html.Label("Z-axis:"), + dmc.RadioGroup( + radio_items, + id=ids.SCATTERPLOT_3D_ZAXIS_DROPDOWN, + value=ids.SCATTERPLOT_3D_ZAXIS_DROPDOWN_VALUE_DEFAULT, + spacing="xs", + size="sm", + orientation="vertical", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/settings_button.py b/automappa/pages/mag_refinement/components/settings_button.py new file mode 100644 index 00000000..9b3c0224 --- /dev/null +++ b/automappa/pages/mag_refinement/components/settings_button.py @@ -0,0 +1,25 @@ +import dash_mantine_components as dmc + +from dash_iconify import DashIconify + +from dash_extensions.enrich import DashProxy, html +from automappa.components import ids +from automappa.pages.mag_refinement.components import settings_offcanvas + + +def render(app: DashProxy, source) -> html.Div: + return html.Div( + [ + dmc.Button( + "Settings", + id=ids.SETTINGS_BUTTON, + n_clicks=0, + size="md", + leftIcon=[DashIconify(icon="clarity:settings-line")], + variant="gradient", + gradient={"from": "#CA2270", "to": "#F36E2D"}, + fullWidth=True, + ), + settings_offcanvas.render(app, source), + ] + ) diff --git a/automappa/pages/mag_refinement/components/settings_offcanvas.py b/automappa/pages/mag_refinement/components/settings_offcanvas.py new file mode 100644 index 00000000..3bd61f3c --- /dev/null +++ b/automappa/pages/mag_refinement/components/settings_offcanvas.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +import dash_bootstrap_components as dbc + +from dash_extensions.enrich import DashProxy, Input, Output, State +import dash_mantine_components as dmc + +from automappa.components import ids +from automappa.pages.mag_refinement.components import ( + binning_refinements_download_button, + color_by_col_dropdown, + scatterplot_2d_legend_toggle, + scatterplot_2d_axes_dropdown, + binning_refinements_clear_button, + scatterplot_3d_zaxis_dropdown, + scatterplot_3d_legend_toggle, + taxa_rank_dropdown, +) + + +def render(app: DashProxy, source) -> dbc.Offcanvas: + @app.callback( + Output(ids.SETTINGS_OFFCANVAS, "opened"), + Input(ids.SETTINGS_BUTTON, "n_clicks"), + [State(ids.SETTINGS_OFFCANVAS, "opened")], + ) + def toggle_offcanvas(n1: int, opened: bool) -> bool: + if n1: + return not opened + return opened + + return dmc.Drawer( + [ + dbc.Accordion( + [ + dbc.AccordionItem( + dmc.Stack( + [ + dmc.Group( + [ + scatterplot_2d_axes_dropdown.render( + app, source + ), + scatterplot_2d_legend_toggle.render(), + ], + spacing="xl", + ), + color_by_col_dropdown.render(app, source), + ] + ), + title="Figure 1: 2D Metagenome Overview", + ), + dbc.AccordionItem( + dmc.Group( + [ + scatterplot_3d_zaxis_dropdown.render(app, source), + scatterplot_3d_legend_toggle.render(), + ], + position="left", + spacing="xl", + ), + title="Figure 2: 3D Metagenome Overview", + ), + dbc.AccordionItem( + [ + dmc.Stack(taxa_rank_dropdown.render(app, source)), + ], + title="Figure 3: Taxonomic Distribution", + ), + ], + start_collapsed=True, + flush=True, + ), + dmc.Space(h=15), + dmc.Divider( + label="Get MAG refinements data", + labelPosition="center", + ), + dmc.Space(h=10), + dmc.Group( + [ + dmc.Space(w=10), + binning_refinements_download_button.render(app, source), + ] + ), + dmc.Space(h=15), + dmc.Divider( + label=dmc.Text("Danger zone", weight=700), + labelPosition="center", + color="red", + size="md", + ), + dmc.Space(h=10), + dmc.Group( + [dmc.Space(w=10), binning_refinements_clear_button.render(app, source)] + ), + ], + id=ids.SETTINGS_OFFCANVAS, + title="Settings", + opened=False, + position="right", + size=420, + ) diff --git a/automappa/pages/mag_refinement/components/taxa_rank_dropdown.py b/automappa/pages/mag_refinement/components/taxa_rank_dropdown.py new file mode 100644 index 00000000..41b2a0ba --- /dev/null +++ b/automappa/pages/mag_refinement/components/taxa_rank_dropdown.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Dict, List, Literal, Protocol +import dash_mantine_components as dmc +from dash_extensions.enrich import html, DashProxy +from automappa.components import ids + + +class TaxonomyDistributionDropdownDataSource(Protocol): + def get_taxonomy_distribution_dropdown_options( + self, + ) -> List[Dict[Literal["label", "value"], str]]: + ... + + +def render(app: DashProxy, source: TaxonomyDistributionDropdownDataSource) -> html.Div: + options = source.get_taxonomy_distribution_dropdown_options() + radios = [ + dmc.Radio(option["label"], value=option["value"], color="orange") + for option in options + ] + return html.Div( + [ + html.Label("Distribute taxa by rank:"), + dmc.RadioGroup( + radios, + id=ids.TAXONOMY_DISTRIBUTION_DROPDOWN, + value=ids.TAXONOMY_DISTRIBUTION_DROPDOWN_VALUE_DEFAULT, + orientation="vertical", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/components/taxonomy_distribution.py b/automappa/pages/mag_refinement/components/taxonomy_distribution.py new file mode 100644 index 00000000..9b11edb4 --- /dev/null +++ b/automappa/pages/mag_refinement/components/taxonomy_distribution.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from typing import Dict, List, Optional, Protocol +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html +import pandas as pd +from plotly import graph_objects as go + +# from automappa.data.source import SampleTables +from automappa.utils.figures import ( + taxonomy_sankey, +) + +from automappa.components import ids + + +class TaxonomyDistributionDataSource(Protocol): + def get_sankey_records( + self, + metagenome_id: int, + headers: Optional[List[str]], + selected_rank: Optional[str], + ) -> pd.DataFrame: + ... + + +def render(app: DashProxy, source: TaxonomyDistributionDataSource) -> html.Div: + @app.callback( + Output(ids.TAXONOMY_DISTRIBUTION, "figure"), + [ + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.SCATTERPLOT_2D_FIGURE, "selectedData"), + Input(ids.TAXONOMY_DISTRIBUTION_DROPDOWN, "value"), + ], + ) + def taxonomy_distribution_figure_callback( + metagenome_id: int, + selected_contigs: Dict[str, List[Dict[str, str]]], + selected_rank: str, + ) -> go.Figure: + if selected_contigs and selected_contigs["points"]: + headers = {point["text"] for point in selected_contigs["points"]} + else: + headers = None + df = source.get_sankey_records( + metagenome_id, headers=headers, selected_rank=selected_rank + ) + fig = taxonomy_sankey(df) + return fig + + return html.Div( + [ + html.Label("Figure 3: Taxonomic Distribution"), + dcc.Loading( + dcc.Graph( + id=ids.TAXONOMY_DISTRIBUTION, + config={ + "displayModeBar": False, + "displaylogo": False, + "staticPlot": False, + }, + ), + id=ids.LOADING_TAXONOMY_DISTRIBUTION, + type="graph", + ), + ] + ) diff --git a/automappa/pages/mag_refinement/layout.py b/automappa/pages/mag_refinement/layout.py new file mode 100644 index 00000000..deaff27e --- /dev/null +++ b/automappa/pages/mag_refinement/layout.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from dash_extensions.enrich import DashBlueprint +import dash_bootstrap_components as dbc +import dash_mantine_components as dmc +from automappa.pages.mag_refinement.source import RefinementDataSource +from automappa.components import ids +from automappa.pages.mag_refinement.components import ( + marker_symbols_legend, + scatterplot_2d, + settings_button, + save_selection_button, + hide_selections_switch, + mag_metrics_table, + taxonomy_distribution, + scatterplot_3d, + refinements_table, + mag_refinement_coverage_boxplot, + mag_refinement_gc_content_boxplot, + mag_refinement_length_boxplot, + # contig_cytoscape, # TODO + coverage_range_slider, +) + + +def render(source: RefinementDataSource) -> DashBlueprint: + app = DashBlueprint() + app.name = ids.MAG_REFINEMENT_TAB_ID + app.icon = "la:brush" + app.description = ( + "Automappa MAG refinement page to manually inspect genome binning results." + ) + app.title = "Automappa MAG refinement" + + app.layout = dbc.Container( + children=[ + dmc.Space(h=10), + dmc.Affix( + settings_button.render(app, source), position=dict(bottom=10, left=10) + ), + dmc.Affix( + save_selection_button.render(app, source), + position=dict(bottom=10, left=145), + ), + dbc.Row( + [ + dbc.Col(marker_symbols_legend.render(), width=9, align="center"), + dbc.Col(width=3), + ], + justify="center", + ), + dbc.Row( + [ + dbc.Col(scatterplot_2d.render(app, source), width=9), + dbc.Col(mag_metrics_table.render(app, source), width=3), + ] + ), + dbc.Row( + [ + dbc.Col(coverage_range_slider.render(app, source), width=9), + dbc.Col( + hide_selections_switch.render(app, source), + width=3, + align="center", + ), + ] + ), + dbc.Row( + [ + dbc.Col(taxonomy_distribution.render(app, source), width=7), + dbc.Col(scatterplot_3d.render(app, source), width=5), + ] + ), + dbc.Row( + [ + dbc.Col( + mag_refinement_coverage_boxplot.render(app, source), width=4 + ), + dbc.Col( + mag_refinement_gc_content_boxplot.render(app, source), width=4 + ), + dbc.Col(mag_refinement_length_boxplot.render(app, source), width=4), + ] + ), + # TODO Uncomment when cytoscape functionality implemented + # dbc.Row( + # [dbc.Col(contig_cytoscape.render(app, source), width=12)], + # justify="center", + # ), + dbc.Row(dbc.Col(refinements_table.render(app, source), width=12)), + ], + fluid=True, + ) + return app diff --git a/automappa/pages/mag_refinement/source.py b/automappa/pages/mag_refinement/source.py new file mode 100644 index 00000000..cd0071f3 --- /dev/null +++ b/automappa/pages/mag_refinement/source.py @@ -0,0 +1,705 @@ +#!/usr/bin/env python + +import logging +import pandas as pd +from pydantic import BaseModel +from typing import Dict, List, Literal, Optional, Set, Tuple, Union + +from sqlmodel import Session, and_, or_, select, func + +from automappa.data.database import engine +from automappa.data.models import ( + Metagenome, + Contig, + Marker, + CytoscapeConnection, + Refinement, +) +from automappa.data.schemas import ContigSchema +from datetime import datetime + +logger = logging.getLogger(__name__) + +MARKER_SET_SIZE = 139 + + +class RefinementDataSource(BaseModel): + def get_sankey_records( + self, + metagenome_id: int, + headers: Optional[List[str]], + selected_rank: Literal[ + "superkingdom", "phylum", "class", "order", "family", "genus", "species" + ] = ContigSchema.SPECIES, + ) -> pd.DataFrame: + ranks = [ + "superkingdom", + "phylum", + "class", + "order", + "family", + "genus", + "species", + ] + ranks = ranks[: ranks.index(selected_rank) + 1] + model_ranks = { + "superkingdom": Contig.superkingdom, + "phylum": Contig.phylum, + "class": Contig.klass, + "order": Contig.order, + "family": Contig.family, + "genus": Contig.genus, + "species": Contig.species, + } + selections = [model_ranks.get(rank) for rank in ranks] + selections.insert(0, Contig.header) + with Session(engine) as session: + statement = ( + select(*selections) + .join(Metagenome) + .where(Metagenome.id == metagenome_id) + ) + if headers: + statement = statement.where(Contig.header.in_(headers)) + results = session.exec(statement).all() + + schema_ranks = { + "superkingdom": ContigSchema.DOMAIN, + "phylum": ContigSchema.PHYLUM, + "class": ContigSchema.CLASS, + "order": ContigSchema.ORDER, + "family": ContigSchema.FAMILY, + "genus": ContigSchema.GENUS, + "species": ContigSchema.SPECIES, + } + columns = [schema_ranks[rank] for rank in ranks] + columns.insert(0, ContigSchema.HEADER) + + df = pd.DataFrame.from_records( + results, + index=ContigSchema.HEADER, + columns=columns, + ).fillna("unclassified") + + for rank in df.columns: + df[rank] = df[rank].map(lambda taxon: f"{rank[0]}_{taxon}") + + return df + + def get_coverage_min_max_values(self, metagenome_id: int) -> Tuple[float, float]: + with Session(engine) as session: + statement = select( + func.min(Contig.coverage), func.max(Contig.coverage) + ).where(Contig.metagenome_id == metagenome_id) + min_cov, max_cov = session.exec(statement).first() + return min_cov, max_cov + + def get_contig_headers_from_coverage_range( + self, metagenome_id: int, coverage_range: Tuple[float, float] + ) -> Set[str]: + min_cov, max_cov = coverage_range + with Session(engine) as session: + headers = session.exec( + select([Contig.header]).where( + Contig.metagenome_id == metagenome_id, + Contig.coverage >= min_cov, + Contig.coverage <= max_cov, + ) + ).all() + return set(headers) + + def get_user_refinements_contig_headers(self, metagenome_id: int) -> Set[str]: + stmt = select(Contig.header).where( + Contig.metagenome_id == metagenome_id, + Contig.refinements.any( + and_( + Refinement.initial_refinement == False, + Refinement.outdated == False, + ) + ), + ) + with Session(engine) as session: + headers = session.exec(stmt).all() + return set(headers) + + def get_scatterplot2d_records( + self, + metagenome_id: int, + x_axis: str, + y_axis: str, + color_by_col: str, + headers: Optional[List[str]] = [], + ) -> Dict[ + Literal["x", "y", "marker_symbol", "marker_size", "text", "customdata"], + List[Union[float, str, Tuple[float, float, int]]], + ]: + axes = { + ContigSchema.LENGTH: Contig.length, + ContigSchema.COVERAGE: Contig.coverage, + ContigSchema.GC_CONTENT: Contig.gc_content, + ContigSchema.X_1: Contig.x_1, + ContigSchema.X_2: Contig.x_2, + } + # Set color by column + categoricals = { + ContigSchema.CLUSTER: Contig.cluster, + ContigSchema.SUPERKINGDOM: Contig.superkingdom, + ContigSchema.PHYLUM: Contig.phylum, + ContigSchema.CLASS: Contig.klass, + ContigSchema.ORDER: Contig.order, + ContigSchema.FAMILY: Contig.family, + ContigSchema.GENUS: Contig.genus, + ContigSchema.SPECIES: Contig.species, + } + + name_select = categoricals[color_by_col] + x_select = axes[x_axis] + y_select = axes[y_axis] + stmt = select( + x_select, + y_select, + Contig.marker_size, + Contig.marker_symbol, + Contig.coverage, + Contig.gc_content, + Contig.length, + Contig.header, + name_select, + ).select_from(Contig) + + if headers: + stmt = stmt.where(Contig.header.in_(headers)) + + stmt = stmt.where(Contig.metagenome_id == metagenome_id) + + # query db + with Session(engine) as session: + results = session.exec(stmt).all() + + # format for traces + data = {} + for ( + x, + y, + marker_size, + marker_symbol, + coverage, + gc_content, + length, + header, + name, + ) in results: + customdata = (coverage, gc_content, length) + if name not in data: + data[name] = dict( + x=[x], + y=[y], + marker_size=[marker_size], + marker_symbol=[marker_symbol], + customdata=[customdata], + text=[header], + ) + else: + data[name]["x"].append(x) + data[name]["y"].append(y) + data[name]["marker_size"].append(marker_size) + data[name]["marker_symbol"].append(marker_symbol) + data[name]["customdata"].append(customdata) + data[name]["text"].append(header) + return data + + def get_scaterplot3d_records( + self, + metagenome_id: int, + x_axis: str, + y_axis: str, + z_axis: str, + color_by_col: str, + headers: Optional[List[str]] = [], + ) -> Dict[ + str, + Dict[Literal["x", "y", "z", "marker_size", "text"], List[Union[float, str]]], + ]: + # Set x,y,z axes + axes = { + ContigSchema.LENGTH: Contig.length, + ContigSchema.COVERAGE: Contig.coverage, + ContigSchema.GC_CONTENT: Contig.gc_content, + ContigSchema.X_1: Contig.x_1, + ContigSchema.X_2: Contig.x_2, + } + # Set color by column + categoricals = { + ContigSchema.CLUSTER: Contig.cluster, + ContigSchema.SUPERKINGDOM: Contig.superkingdom, + ContigSchema.PHYLUM: Contig.phylum, + ContigSchema.CLASS: Contig.klass, + ContigSchema.ORDER: Contig.order, + ContigSchema.FAMILY: Contig.family, + ContigSchema.GENUS: Contig.genus, + ContigSchema.SPECIES: Contig.species, + } + name_select = categoricals[color_by_col] + x_select = axes[x_axis] + y_select = axes[y_axis] + z_select = axes[z_axis] + stmt = select( + x_select, + y_select, + z_select, + ( + ( + func.ceil( + (Contig.length - func.min(Contig.length).over()) + / ( + func.max(Contig.length).over() + - func.min(Contig.length).over() + ) + ) + * 2 + + 4 + ).label("marker_size") + ), + Contig.header, + name_select, + ) + + if headers: + stmt = stmt.where(Contig.header.in_(headers)) + + stmt = stmt.where(Contig.metagenome_id == metagenome_id) + with Session(engine) as session: + results = session.exec(stmt).all() + + data = {} + for x, y, z, marker_size, header, name in results: + if name not in data: + data[name] = dict( + x=[x], y=[y], z=[z], marker_size=[marker_size], text=[header] + ) + else: + data[name]["x"].append(x) + data[name]["y"].append(y) + data[name]["z"].append(z) + data[name]["marker_size"].append(marker_size) + data[name]["text"].append(header) + return data + + def get_color_by_column_options(self) -> List[Dict[Literal["label", "value"], str]]: + categoricals = [ + ContigSchema.CLUSTER, + ContigSchema.SUPERKINGDOM, + ContigSchema.PHYLUM, + ContigSchema.CLASS, + ContigSchema.ORDER, + ContigSchema.FAMILY, + ContigSchema.GENUS, + ContigSchema.SPECIES, + ] + return [ + {"label": category.title(), "value": category} for category in categoricals + ] + + def get_scatterplot_2d_axes_options( + self, + ) -> List[Dict[Literal["label", "value", "disabled"], str]]: + options = [] + axes_combinations = [ + (ContigSchema.X_1, ContigSchema.X_2), + (ContigSchema.COVERAGE, ContigSchema.GC_CONTENT), + ] + for x_axis, y_axis in axes_combinations: + x_axis_label = ( + "GC content" if ContigSchema.GC_CONTENT in x_axis else x_axis.title() + ) + y_axis_label = ( + "GC content" if ContigSchema.GC_CONTENT in y_axis else y_axis.title() + ) + label = f"{x_axis_label} vs. {y_axis_label}" + value = "|".join([x_axis, y_axis]) + options.append(dict(label=label, value=value)) + return options + + def get_scatterplot_3d_zaxis_dropdown_options( + self, + ) -> List[Dict[Literal["label", "value", "disabled"], str]]: + axes = { + ContigSchema.LENGTH, + ContigSchema.COVERAGE, + ContigSchema.GC_CONTENT, + } + options = [] + for value in axes: + label = "GC content" if ContigSchema.GC_CONTENT in value else value.title() + options.append({"label": label, "value": value}) + + return options + + def get_taxonomy_distribution_dropdown_options( + self, + ) -> List[Dict[Literal["label", "value"], str]]: + ranks = [ + ContigSchema.CLASS, + ContigSchema.ORDER, + ContigSchema.FAMILY, + ContigSchema.GENUS, + ContigSchema.SPECIES, + ] + return [{"label": rank.title(), "value": rank} for rank in ranks] + + def get_marker_overview( + self, metagenome_id: int + ) -> List[Dict[Literal["metric", "metric_value"], Union[str, int, float]]]: + marker_count_stmt = ( + select(func.count(Marker.id)) + .join(Contig) + .where(Contig.metagenome_id == metagenome_id) + ) + marker_contig_count_stmt = ( + select(func.count(func.distinct(Marker.contig_id))) + .join(Contig) + .where(Contig.metagenome_id == metagenome_id) + ) + + with Session(engine) as session: + total_markers = session.exec(marker_count_stmt).first() + marker_contigs_count = session.exec(marker_contig_count_stmt).first() or 0 + + markers_sets = total_markers // MARKER_SET_SIZE + return [ + {"metric": "Total Markers", "metric_value": total_markers}, + {"metric": "Marker Set Size", "metric_value": MARKER_SET_SIZE}, + {"metric": "Approx. Marker Sets", "metric_value": markers_sets}, + {"metric": "Marker Contigs", "metric_value": marker_contigs_count}, + ] + + def get_mag_metrics_row_data( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List[Dict[Literal["metric", "metric_value"], Union[str, int, float]]]: + contig_count_stmt = select(func.count(Contig.id)).where( + Contig.metagenome_id == metagenome_id + ) + contig_length_stmt = select(func.sum(Contig.length)).where( + Contig.metagenome_id == metagenome_id + ) + marker_contig_count_stmt = ( + select(func.count(func.distinct(Marker.contig_id))) + .join(Contig) + .where(Contig.metagenome_id == metagenome_id) + ) + # - single-copy marker contig count + single_copy_stmt = ( + select(Contig.id) + .where(Contig.metagenome_id == metagenome_id) + .join(Marker) + .group_by(Contig.id) + .having(func.count(Marker.id) == 1) + ) + # - multi-copy marker contig count + multi_copy_stmt = ( + select(Contig.id) + .where(Contig.metagenome_id == metagenome_id) + .join(Marker) + .group_by(Contig.id) + .having(func.count(Marker.id) > 1) + ) + marker_count_stmt = ( + select(func.count(Marker.id)) + .join(Contig) + .where(Contig.metagenome_id == metagenome_id) + ) + unique_marker_stmt = ( + select(Marker.sacc) + .join(Contig) + .distinct() + .where(Contig.metagenome_id == metagenome_id) + ) + redundant_marker_sacc_stmt = ( + select(Marker.sacc) + .join(Contig) + .where(Contig.metagenome_id == metagenome_id) + .group_by(Marker.sacc) + .having(func.count(Marker.id) > 1) + ) + if headers: + contig_count_stmt = contig_count_stmt.where(Contig.header.in_(headers)) + contig_length_stmt = contig_length_stmt.where(Contig.header.in_(headers)) + marker_contig_count_stmt = marker_contig_count_stmt.where( + Contig.header.in_(headers) + ) + multi_copy_stmt = multi_copy_stmt.where(Contig.header.in_(headers)) + single_copy_stmt = single_copy_stmt.where(Contig.header.in_(headers)) + redundant_marker_sacc_stmt = redundant_marker_sacc_stmt.where( + Contig.header.in_(headers) + ) + marker_count_stmt = marker_count_stmt.where(Contig.header.in_(headers)) + unique_marker_stmt = unique_marker_stmt.where(Contig.header.in_(headers)) + + with Session(engine) as session: + contig_count = session.exec(contig_count_stmt).first() or 0 + length_sum = session.exec(contig_length_stmt).first() or 0 + marker_contigs_count = session.exec(marker_contig_count_stmt).first() or 0 + single_copy_contig_count = ( + session.exec(select(func.count()).select_from(single_copy_stmt)).first() + or 0 + ) + multi_copy_contig_count = ( + session.exec(select(func.count()).select_from(multi_copy_stmt)).first() + or 0 + ) + markers_count = session.exec(marker_count_stmt).first() or 0 + unique_marker_count = session.exec( + select(func.count()).select_from(unique_marker_stmt) + ).first() + redundant_marker_sacc = session.exec(redundant_marker_sacc_stmt).all() + + completeness = round(unique_marker_count / MARKER_SET_SIZE * 100, 2) + purity = ( + round(unique_marker_count / markers_count * 100, 2) if markers_count else 0 + ) + length_sum_mbp = round(length_sum / 1_000_000, 3) + + row_data = [ + {"metric": "Contigs", "metric_value": contig_count}, + {"metric": "Length Sum (Mbp)", "metric_value": length_sum_mbp}, + { + "metric": "Marker Contigs", + "metric_value": marker_contigs_count, + }, + { + "metric": "Multi-Marker Contigs", + "metric_value": multi_copy_contig_count, + }, + { + "metric": "Single-Marker Contigs", + "metric_value": single_copy_contig_count, + }, + {"metric": "Markers Count", "metric_value": markers_count}, + { + "metric": "Redundant Markers", + "metric_value": len(redundant_marker_sacc), + }, + { + "metric": "Redundant Marker Accessions", + "metric_value": ", ".join(redundant_marker_sacc), + }, + ] + if headers: + row_data.insert(0, {"metric": "Purity (%)", "metric_value": purity}) + row_data.insert( + 0, {"metric": "Completeness (%)", "metric_value": completeness} + ) + return row_data + + def get_coverage_boxplot_records( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List[Tuple[str, List[float]]]: + with Session(engine) as session: + stmt = select(Contig.coverage).where(Contig.metagenome_id == metagenome_id) + if headers: + stmt = stmt.where(Contig.header.in_(headers)) + coverages = session.exec(stmt).all() + coverages = [round(coverage, 2) for coverage in coverages] + return [(ContigSchema.COVERAGE.title(), coverages)] + + def get_gc_content_boxplot_records( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List[Tuple[str, List[float]]]: + with Session(engine) as session: + stmt = select(Contig.gc_content).where( + Contig.metagenome_id == metagenome_id + ) + if headers: + stmt = stmt.where(Contig.header.in_(headers)) + gc_contents = session.exec(stmt).all() + + gc_contents = [round(gc_content, 2) for gc_content in gc_contents] + return [("GC Content", gc_contents)] + + def get_length_boxplot_records( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List[Tuple[str, List[int]]]: + with Session(engine) as session: + stmt = select(Contig.length).where(Contig.metagenome_id == metagenome_id) + if headers: + stmt = stmt.where(Contig.header.in_(headers)) + lengths = session.exec(stmt).all() + return [(ContigSchema.LENGTH.title(), lengths)] + + def get_cytoscape_elements( + self, metagenome_id: int, headers: Optional[List[str]] = [] + ) -> List[ + Dict[ + Literal["data"], + Dict[ + Literal["id", "label", "source", "target", "connections"], + Union[str, int], + ], + ] + ]: + stmt = ( + select( + CytoscapeConnection.node1, + CytoscapeConnection.node2, + CytoscapeConnection.connections, + ) + .select_from(CytoscapeConnection) + .where(CytoscapeConnection.metagenome_id == metagenome_id) + ) + if headers: + start_nodes = {f"{header}s" for header in headers} + end_nodes = {f"{header}e" for header in headers} + nodes = start_nodes.union(end_nodes) + stmt = stmt.where( + or_( + CytoscapeConnection.node1.in_(nodes), + CytoscapeConnection.node2.in_(nodes), + ) + ) + with Session(engine) as session: + records = session.exec(stmt).all() + + src_nodes = {src_node for src_node, *_ in records} + target_nodes = {target_node for _, target_node, _ in records} + nodes = [ + dict(data=dict(id=node, label=node)) + for node in src_nodes.union(target_nodes) + ] + edges = [ + dict( + data=dict(source=src_node, target=target_node, connections=connections) + ) + for src_node, target_node, connections in records + ] + return nodes + edges + + def get_cytoscape_stylesheet( + self, metagenome_id: int, headers: Optional[List[str]] + ) -> List: + stmt = ( + select( + CytoscapeConnection.node1, + CytoscapeConnection.node2, + CytoscapeConnection.connections, + ) + .select_from(CytoscapeConnection) + .where(CytoscapeConnection.metagenome_id == metagenome_id) + ) + if headers: + start_nodes = {f"{header}s" for header in headers} + end_nodes = {f"{header}e" for header in headers} + nodes = start_nodes.union(end_nodes) + stmt = stmt.where( + or_( + CytoscapeConnection.node1.in_(nodes), + CytoscapeConnection.node2.in_(nodes), + ) + ) + with Session(engine) as session: + records = session.exec(stmt).all() + stylesheet = [ + dict( + selector=f"[label = {node1}]", + style={"line-color": "blue", "opacity": 0.8}, + ) + for node1, *_ in records + ] + stylesheet += [ + dict( + selector=f"[label = {node2}]", + style={"line-color": "blue", "opacity": 0.8}, + ) + for _, node2, _ in records + ] + return stylesheet + + def has_user_refinements(self, metagenome_id: int) -> bool: + with Session(engine) as session: + refinement = session.exec( + select(Refinement).where( + Refinement.metagenome_id == metagenome_id, + Refinement.initial_refinement == False, + Refinement.outdated == False, + ) + ).first() + if refinement: + return True + return False + + def clear_refinements(self, metagenome_id: int) -> int: + with Session(engine) as session: + refinements = session.exec( + select(Refinement).where( + Refinement.metagenome_id == metagenome_id, + Refinement.initial_refinement == False, + ) + ).all() + n_refinements = len(refinements) + for refinement in refinements: + session.delete(refinement) + session.commit() + return n_refinements + + def get_refinements_row_data( + self, metagenome_id: int + ) -> List[ + Dict[ + Literal["refinement_id", "timestamp", "contigs"], + Union[str, int, datetime], + ] + ]: + stmt = select(Refinement).where( + Refinement.metagenome_id == metagenome_id, + Refinement.outdated == False, + Refinement.initial_refinement == False, + ) + data = [] + with Session(engine) as session: + refinements = session.exec(stmt).all() + for refinement in refinements: + row = dict( + refinement_id=refinement.id, + timestamp=refinement.timestamp.strftime("%d-%b-%Y, %H:%M:%S"), + contigs=len(refinement.contigs), + ) + data.append(row) + return data + + def save_selections_to_refinement( + self, metagenome_id: int, headers: List[str] + ) -> None: + with Session(engine) as session: + contigs = session.exec( + select(Contig).where( + Contig.metagenome_id == metagenome_id, Contig.header.in_(headers) + ) + ).all() + for contig in contigs: + for refinement in contig.refinements: + refinement.outdated = True + refinement = Refinement( + contigs=contigs, + metagenome_id=metagenome_id, + outdated=False, + initial_refinement=False, + ) + session.add(refinement) + session.commit() + + def get_refinements_dataframe(self, metagenome_id: int) -> pd.DataFrame: + stmt = select(Refinement).where( + Refinement.metagenome_id == metagenome_id, + Refinement.outdated == False, + ) + data = [] + with Session(engine) as session: + refinements = session.exec(stmt).all() + for refinement in refinements: + data.append( + dict( + refinement_id=f"refinement_{refinement.id}", + timestamp=refinement.timestamp.strftime("%d-%b-%Y"), + contig=[contig.header for contig in refinement.contigs], + ) + ) + return pd.DataFrame(data).explode("contig") diff --git a/automappa/pages/mag_summary/__init__.py b/automappa/pages/mag_summary/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automappa/pages/mag_summary/components/__init__.py b/automappa/pages/mag_summary/components/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automappa/pages/mag_summary/components/mag_coverage_boxplot.py b/automappa/pages/mag_summary/components/mag_coverage_boxplot.py new file mode 100644 index 00000000..e29e833e --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_coverage_boxplot.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +from plotly import graph_objects as go +from typing import Protocol, List, Tuple +from automappa.utils.figures import metric_boxplot +from automappa.components import ids + + +class ClusterCoverageBoxplotDataSource(Protocol): + def get_coverage_boxplot_records( + self, metagenome_id: int, refinement_id: int + ) -> List[Tuple[str, List[float]]]: + ... + + +def render(app: DashProxy, source: ClusterCoverageBoxplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_COVERAGE_BOXPLOT, "figure"), + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.MAG_SELECTION_DROPDOWN, "value"), + prevent_initial_call=True, + ) + def mag_summary_coverage_boxplot_callback( + metagenome_id: int, refinement_id: int + ) -> go.Figure: + data = source.get_coverage_boxplot_records( + metagenome_id, refinement_id=refinement_id + ) + fig = metric_boxplot(data) + return fig + + return html.Div( + dcc.Loading( + id=ids.LOADING_MAG_COVERAGE_BOXPLOT, + children=[ + dcc.Graph( + id=ids.MAG_COVERAGE_BOXPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ) + ], + type="default", + color="#0479a8", + ) + ) diff --git a/automappa/pages/mag_summary/components/mag_gc_content_boxplot.py b/automappa/pages/mag_summary/components/mag_gc_content_boxplot.py new file mode 100644 index 00000000..0db58036 --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_gc_content_boxplot.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +from plotly import graph_objects as go +from typing import Protocol, List, Tuple + +from automappa.utils.figures import metric_boxplot +from automappa.components import ids + + +class GcContentBoxplotDataSource(Protocol): + def get_gc_content_boxplot_records( + self, metagenome_id: int, refinement_id: int + ) -> List[Tuple[str, List[float]]]: + ... + + +def render(app: DashProxy, source: GcContentBoxplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_GC_CONTENT_BOXPLOT, "figure"), + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.MAG_SELECTION_DROPDOWN, "value"), + prevent_initial_call=True, + ) + def mag_summary_gc_content_boxplot_callback( + metagenome_id: int, refinement_id: int + ) -> go.Figure: + data = source.get_gc_content_boxplot_records( + metagenome_id, refinement_id=refinement_id + ) + fig = metric_boxplot(data) + return fig + + return html.Div( + [ + dcc.Loading( + dcc.Graph( + id=ids.MAG_GC_CONTENT_BOXPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ), + id=ids.LOADING_MAG_GC_CONTENT_BOXPLOT, + type="default", + color="#0479a8", + ) + ] + ) diff --git a/automappa/pages/mag_summary/components/mag_length_boxplot.py b/automappa/pages/mag_summary/components/mag_length_boxplot.py new file mode 100644 index 00000000..3f346de5 --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_length_boxplot.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +from plotly import graph_objects as go + +from typing import Protocol, List, Tuple +from automappa.utils.figures import metric_boxplot +from automappa.components import ids + + +class ClusterLengthBoxplotDataSource(Protocol): + def get_length_boxplot_records( + self, metagenome_id: int, refinement_id: int + ) -> List[Tuple[str, List[int]]]: + ... + + +def render(app: DashProxy, source: ClusterLengthBoxplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_LENGTH_BOXPLOT, "figure"), + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.MAG_SELECTION_DROPDOWN, "value"), + prevent_initial_call=True, + ) + def mag_summary_gc_content_boxplot_callback( + metagenome_id: int, refinement_id: int + ) -> go.Figure: + data = source.get_length_boxplot_records( + metagenome_id, refinement_id=refinement_id + ) + fig = metric_boxplot(data) + return fig + + return html.Div( + children=[ + dcc.Loading( + id=ids.LOADING_MAG_LENGTH_BOXPLOT, + children=[ + dcc.Graph( + id=ids.MAG_LENGTH_BOXPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ) + ], + type="dot", + color="#646569", + ) + ] + ) diff --git a/automappa/pages/mag_summary/components/mag_metrics_barplot.py b/automappa/pages/mag_summary/components/mag_metrics_barplot.py new file mode 100644 index 00000000..9965569b --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_metrics_barplot.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +from typing import Protocol, List, Tuple + +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +from plotly import graph_objects as go + +from automappa.utils.figures import metric_barplot +from automappa.components import ids + + +class ClusterMetricsBarplotDataSource(Protocol): + def get_metrics_barplot_records( + self, metagenome_id: int, refinement_id: int + ) -> Tuple[str, List[float], List[float]]: + ... + + +def render(app: DashProxy, source: ClusterMetricsBarplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_METRICS_BARPLOT, "figure"), + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.MAG_SELECTION_DROPDOWN, "value"), + prevent_initial_call=True, + ) + def mag_metrics_callback(metagenome_id: int, refinement_id: int) -> go.Figure: + data = source.get_metrics_barplot_records( + metagenome_id, refinement_id=refinement_id + ) + fig = metric_barplot(data) + return fig + + return html.Div( + children=[ + dcc.Loading( + id=ids.LOADING_MAG_METRICS_BARPLOT, + children=[ + dcc.Graph( + id=ids.MAG_METRICS_BARPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ) + ], + type="dot", + color="#646569", + ) + ] + ) diff --git a/automappa/pages/mag_summary/components/mag_overview_coverage_boxplot.py b/automappa/pages/mag_summary/components/mag_overview_coverage_boxplot.py new file mode 100644 index 00000000..115c1482 --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_overview_coverage_boxplot.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- + +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +from typing import Protocol, List, Tuple, Optional +from plotly import graph_objects as go + +from automappa.utils.figures import metric_boxplot +from automappa.components import ids + + +class OverviewCoverageBoxplotDataSource(Protocol): + def get_coverage_boxplot_records( + self, metagenome_id: int, cluster: Optional[str] + ) -> List[Tuple[str, List[float]]]: + ... + + +def render(app: DashProxy, source: OverviewCoverageBoxplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_OVERVIEW_COVERAGE_BOXPLOT, "figure"), + Input(ids.METAGENOME_ID_STORE, "data"), + ) + def mag_overview_coverage_boxplot_callback(metagenome_id: int) -> go.Figure: + data = source.get_coverage_boxplot_records(metagenome_id) + fig = metric_boxplot(data) + return fig + + return html.Div( + children=[ + dcc.Loading( + id=ids.LOADING_MAG_COVERAGE_BOXPLOT, + children=[ + dcc.Graph( + id=ids.MAG_OVERVIEW_COVERAGE_BOXPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ) + ], + type="dot", + color="#646569", + ) + ] + ) diff --git a/automappa/pages/mag_summary/components/mag_overview_gc_content_boxplot.py b/automappa/pages/mag_summary/components/mag_overview_gc_content_boxplot.py new file mode 100644 index 00000000..4fd09606 --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_overview_gc_content_boxplot.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +from typing import List, Optional, Protocol, Tuple +from dash.exceptions import PreventUpdate +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +from plotly import graph_objects as go + +from automappa.utils.figures import metric_boxplot +from automappa.components import ids + + +class GcContentBoxplotDataSource(Protocol): + def get_gc_content_boxplot_records( + self, metagenome_id: int, cluster: Optional[str] + ) -> List[Tuple[str, List[float]]]: + ... + + +def render(app: DashProxy, source: GcContentBoxplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_OVERVIEW_GC_CONTENT_BOXPLOT, "figure"), + Input(ids.METAGENOME_ID_STORE, "data"), + ) + def mag_overview_gc_content_boxplot_callback(metagenome_id: int) -> go.Figure: + data = source.get_gc_content_boxplot_records(metagenome_id=metagenome_id) + fig = metric_boxplot(data=data) + return fig + + return html.Div( + children=[ + dcc.Loading( + id=ids.LOADING_MAG_OVERVIEW_GC_CONTENT_BOXPLOT, + children=[ + dcc.Graph( + id=ids.MAG_OVERVIEW_GC_CONTENT_BOXPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ) + ], + type="dot", + color="#646569", + ) + ] + ) diff --git a/automappa/pages/mag_summary/components/mag_overview_length_boxplot.py b/automappa/pages/mag_summary/components/mag_overview_length_boxplot.py new file mode 100644 index 00000000..b6753bf7 --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_overview_length_boxplot.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html +from typing import Protocol, Optional, List, Tuple +from plotly import graph_objects as go + +from automappa.utils.figures import metric_boxplot +from automappa.components import ids + + +class LengthOverviewBoxplotDataSource(Protocol): + def get_length_boxplot_records( + self, metagenome_id: int, cluster: Optional[str] + ) -> List[Tuple[str, List[int]]]: + ... + + +def render(app: DashProxy, source: LengthOverviewBoxplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_OVERVIEW_LENGTH_BOXPLOT, "figure"), + Input(ids.METAGENOME_ID_STORE, "data"), + ) + def mag_overview_length_boxplot_callback(metagenome_id: int) -> go.Figure: + data = source.get_length_boxplot_records(metagenome_id) + fig = metric_boxplot(data) + return fig + + return html.Div( + children=[ + dcc.Loading( + id=ids.LOADING_MAG_OVERVIEW_LENGTH_BOXPLOT, + children=[ + dcc.Graph( + id=ids.MAG_OVERVIEW_LENGTH_BOXPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ) + ], + type="default", + color="#0479a8", + ) + ] + ) diff --git a/automappa/pages/mag_summary/components/mag_overview_metrics_boxplot.py b/automappa/pages/mag_summary/components/mag_overview_metrics_boxplot.py new file mode 100644 index 00000000..288b8d44 --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_overview_metrics_boxplot.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +from typing import List, Protocol, Tuple +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +from plotly import graph_objects as go + +from automappa.utils.figures import metric_boxplot +from automappa.components import ids + + +class OverviewMetricsBoxplotDataSource(Protocol): + def get_completeness_purity_boxplot_records( + self, metagenome_id: int + ) -> List[Tuple[str, List[float]]]: + ... + + +def render(app: DashProxy, source: OverviewMetricsBoxplotDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_OVERVIEW_METRICS_BOXPLOT, "figure"), + Input(ids.METAGENOME_ID_STORE, "data"), + ) + def subset_by_selected_mag(metagenome_id: int) -> go.Figure: + data = source.get_completeness_purity_boxplot_records(metagenome_id) + fig = metric_boxplot(data=data) + return fig + + return html.Div( + dcc.Loading( + dcc.Graph( + id=ids.MAG_OVERVIEW_METRICS_BOXPLOT, + config={"displayModeBar": False, "displaylogo": False}, + ), + id=ids.LOADING_MAG_OVERVIEW_METRICS_BOXPLOT, + type="default", + color="#0479a8", + ) + ) diff --git a/automappa/pages/mag_summary/components/mag_selection_dropdown.py b/automappa/pages/mag_summary/components/mag_selection_dropdown.py new file mode 100644 index 00000000..25b908fb --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_selection_dropdown.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +from typing import Dict, List, Protocol, Literal +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +from automappa.components import ids + + +class ClusterSelectionDropdownOptionsDataSource(Protocol): + def get_refinement_selection_dropdown_options( + self, metagenome_id: int + ) -> List[Dict[Literal["label", "value"], str]]: + ... + + +def render( + app: DashProxy, source: ClusterSelectionDropdownOptionsDataSource +) -> html.Div: + @app.callback( + Output(ids.MAG_SELECTION_DROPDOWN, "options"), + Input(ids.METAGENOME_ID_STORE, "data"), + ) + def mag_selection_dropdown_options_callback( + metagenome_id: int, + ) -> List[Dict[Literal["label", "value"], str]]: + options = source.get_refinement_selection_dropdown_options(metagenome_id) + return options + + return html.Div( + [ + html.Label("MAG Selection Dropdown"), + dcc.Dropdown( + id=ids.MAG_SELECTION_DROPDOWN, + clearable=True, + placeholder="Select a MAG from this dropdown for a MAG-specific summary", + ), + ] + ) diff --git a/automappa/pages/mag_summary/components/mag_summary_stats_datatable.py b/automappa/pages/mag_summary/components/mag_summary_stats_datatable.py new file mode 100644 index 00000000..3ac22888 --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_summary_stats_datatable.py @@ -0,0 +1,123 @@ +# -*- coding: utf-8 -*- +from typing import Protocol, List, Union, Literal, Dict +import dash_ag_grid as dag +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +from automappa.components import ids + + +class SummaryStatsTableDataSource(Protocol): + def get_mag_stats_summary_row_data( + self, metagenome_id: int + ) -> List[ + Dict[ + Literal[ + "refinement_id", + "refinement_label", + "length_sum_mbp", + "completeness", + "purity", + "contig_count", + ], + Union[str, int, float], + ] + ]: + ... + + +def render(app: DashProxy, source: SummaryStatsTableDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_SUMMARY_STATS_DATATABLE, "rowData"), + Input(ids.METAGENOME_ID_STORE, "data"), + ) + def mag_summary_stats_datatable_callback( + metagenome_id: int, + ) -> List[ + Dict[ + Literal[ + "refinement_id", + "refinement_label", + "length_sum_mbp", + "completeness", + "purity", + "contig_count", + ], + Union[str, int, float], + ] + ]: + row_data = source.get_mag_stats_summary_row_data(metagenome_id) + return row_data + + GREEN = "#2FCC90" + YELLOW = "#f2e530" + ORANGE = "#f57600" + MIMAG_STYLE_CONDITIONS = { + "styleConditions": [ + # High-quality >90% complete > 95% pure + { + "condition": "params.data.completeness == 'Completeness (%)' && params.value > 90", + "style": {"backgroundColor": GREEN}, + }, + { + "condition": "params.data.purity == 'Purity (%)' && params.value > 95", + "style": {"backgroundColor": GREEN}, + }, + # Medium-quality >=50% complete > 90% pure + { + "condition": "params.data.completeness == 'Completeness (%)' && params.value >= 50", + "style": {"backgroundColor": YELLOW}, + }, + { + "condition": "params.data.purity == 'Purity (%)' && params.value > 90", + "style": {"backgroundColor": YELLOW}, + }, + # Low-quality <50% complete < 90% pure + { + "condition": "params.data.completeness == 'Completeness (%)' && params.value < 50", + "style": {"backgroundColor": ORANGE, "color": "white"}, + }, + { + "condition": "params.data.purity == 'Purity (%)' && params.value < 90", + "style": {"backgroundColor": ORANGE, "color": "white"}, + }, + ] + } + + column_defs = [ + {"field": "refinement_id", "headerName": "Refinement Id", "resizable": True}, + { + "field": "refinement_label", + "headerName": "Refinement Label", + "resizable": True, + }, + { + "field": "length_sum_mbp", + "headerName": "Length Sum (Mbp)", + "resizable": True, + }, + {"field": "completeness", "headerName": "Completeness (%)", "resizable": True}, + {"field": "purity", "headerName": "Purity (%)", "resizable": True}, + { + "field": "contig_count", + "headerName": "Contig Count", + "cellStyle": MIMAG_STYLE_CONDITIONS, + }, + ] + + return html.Div( + [ + html.Label("Table 1. MAGs Summary"), + dcc.Loading( + dag.AgGrid( + id=ids.MAG_SUMMARY_STATS_DATATABLE, + className="ag-theme-material", + columnSize="responsiveSizeToFit", + style={"height": 600, "width": "100%"}, + columnDefs=column_defs, + ), + id=ids.LOADING_MAG_SUMMARY_STATS_DATATABLE, + type="circle", + color="#646569", + ), + ] + ) diff --git a/automappa/pages/mag_summary/components/mag_taxonomy_sankey.py b/automappa/pages/mag_summary/components/mag_taxonomy_sankey.py new file mode 100644 index 00000000..634df3d0 --- /dev/null +++ b/automappa/pages/mag_summary/components/mag_taxonomy_sankey.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +from typing import Protocol +from dash_extensions.enrich import DashProxy, Input, Output, dcc, html + +from plotly import graph_objects as go + +from automappa.utils.figures import taxonomy_sankey +from automappa.components import ids + + +class ClusterTaxonomySankeyDataSource(Protocol): + def get_taxonomy_sankey_records(self, metagenome_id: int, refinement_id: int): + ... + + +def render(app: DashProxy, source: ClusterTaxonomySankeyDataSource) -> html.Div: + @app.callback( + Output(ids.MAG_TAXONOMY_SANKEY, "figure"), + Input(ids.METAGENOME_ID_STORE, "data"), + Input(ids.MAG_SELECTION_DROPDOWN, "value"), + prevent_initial_call=True, + ) + def mag_taxonomy_sankey_callback( + metagenome_id: int, refinement_id: int + ) -> go.Figure: + data = source.get_taxonomy_sankey_records( + metagenome_id, refinement_id=refinement_id + ) + fig = taxonomy_sankey(data) + return fig + + return html.Div( + children=[ + dcc.Loading( + id=ids.LOADING_MAG_TAXONOMY_SANKEY, + children=[dcc.Graph(id=ids.MAG_TAXONOMY_SANKEY)], + type="graph", + ) + ] + ) diff --git a/automappa/pages/mag_summary/layout.py b/automappa/pages/mag_summary/layout.py new file mode 100644 index 00000000..f080f79e --- /dev/null +++ b/automappa/pages/mag_summary/layout.py @@ -0,0 +1,56 @@ +import dash_bootstrap_components as dbc +from dash_extensions.enrich import DashBlueprint +from automappa.components import ids +from automappa.pages.mag_summary.source import SummaryDataSource +from automappa.pages.mag_summary.components import ( + mag_coverage_boxplot, + mag_gc_content_boxplot, + mag_length_boxplot, + mag_overview_coverage_boxplot, + mag_overview_length_boxplot, + mag_overview_gc_content_boxplot, + mag_overview_metrics_boxplot, + mag_selection_dropdown, + mag_summary_stats_datatable, + mag_taxonomy_sankey, + mag_metrics_barplot, +) + + +def render(source: SummaryDataSource) -> DashBlueprint: + app = DashBlueprint() + app.name = ids.MAG_SUMMARY_TAB_ID + app.icon = "material-symbols:auto-graph-rounded" + app.description = ( + "Automappa MAG summary page displaying overview of genome binning results." + ) + app.title = "Automappa MAG summary" + + app.layout = dbc.Container( + [ + dbc.Row( + [ + dbc.Col(mag_overview_metrics_boxplot.render(app, source), width=3), + dbc.Col( + mag_overview_gc_content_boxplot.render(app, source), width=3 + ), + dbc.Col(mag_overview_length_boxplot.render(app, source), width=3), + dbc.Col(mag_overview_coverage_boxplot.render(app, source), width=3), + ] + ), + dbc.Row(dbc.Col(mag_summary_stats_datatable.render(app, source))), + dbc.Row(dbc.Col(mag_selection_dropdown.render(app, source))), + dbc.Row(dbc.Col(mag_taxonomy_sankey.render(app, source))), + dbc.Row( + [ + dbc.Col(mag_metrics_barplot.render(app, source), width=3), + dbc.Col(mag_gc_content_boxplot.render(app, source), width=3), + dbc.Col(mag_length_boxplot.render(app, source), width=3), + dbc.Col(mag_coverage_boxplot.render(app, source), width=3), + ] + ), + ], + fluid=True, + ) + + return app diff --git a/automappa/pages/mag_summary/source.py b/automappa/pages/mag_summary/source.py new file mode 100644 index 00000000..651ffd0e --- /dev/null +++ b/automappa/pages/mag_summary/source.py @@ -0,0 +1,258 @@ +#!/usr/bin/env python +import logging +import pandas as pd +from pydantic import BaseModel +from typing import Dict, List, Literal, Optional, Tuple, Union + +from sqlmodel import Session, and_, select, func +from automappa.data.database import engine +from automappa.data.models import Refinement, Contig, Marker +from automappa.data.schemas import ContigSchema + +logger = logging.getLogger(__name__) + +MARKER_SET_SIZE = 139 + + +class SummaryDataSource(BaseModel): + def compute_completeness_purity_metrics( + self, metagenome_id: int, refinement_id: int + ) -> Tuple[float, float]: + marker_count_stmt = ( + select(func.count(Marker.id)) + .join(Contig) + .where( + Contig.metagenome_id == metagenome_id, + Contig.refinements.any( + and_( + Refinement.outdated == False, + Refinement.id == refinement_id, + ) + ), + ) + ) + unique_marker_stmt = ( + select(Marker.sacc) + .join(Contig) + .distinct() + .where( + Contig.metagenome_id == metagenome_id, + Contig.refinements.any( + and_( + Refinement.outdated == False, + Refinement.id == refinement_id, + ) + ), + ) + ) + with Session(engine) as session: + markers_count = session.exec(marker_count_stmt).first() or 0 + unique_marker_count = session.exec( + select(func.count()).select_from(unique_marker_stmt) + ).first() + + completeness = round(unique_marker_count / MARKER_SET_SIZE * 100, 2) + purity = ( + round(unique_marker_count / markers_count * 100, 2) if markers_count else 0 + ) + return completeness, purity + + def compute_length_sum_mbp(self, metagenome_id: int, refinement_id: int) -> float: + contig_length_stmt = select(func.sum(Contig.length)).where( + Contig.metagenome_id == metagenome_id, + Contig.refinements.any( + and_( + Refinement.outdated == False, + Refinement.id == refinement_id, + ) + ), + ) + with Session(engine) as session: + length_sum = session.exec(contig_length_stmt).first() or 0 + length_sum_mbp = round(length_sum / 1_000_000, 3) + return length_sum_mbp + + def get_completeness_purity_boxplot_records( + self, metagenome_id: int + ) -> List[Tuple[str, List[float]]]: + completeness_metrics = [] + purities = [] + with Session(engine) as session: + stmt = select(Refinement.id).where( + Refinement.outdated == False, Refinement.metagenome_id == metagenome_id + ) + refinement_ids = session.exec(stmt).all() + for refinement_id in refinement_ids: + ( + completeness, + purity, + ) = self.compute_completeness_purity_metrics(metagenome_id, refinement_id) + completeness_metrics.append(completeness) + purities.append(purity) + return [ + (ContigSchema.COMPLETENESS.title(), completeness_metrics), + (ContigSchema.PURITY.title(), purities), + ] + + def get_gc_content_boxplot_records( + self, metagenome_id: int, refinement_id: Optional[int] = 0 + ) -> List[Tuple[str, List[float]]]: + stmt = select([Contig.gc_content]).where(Contig.metagenome_id == metagenome_id) + if refinement_id: + stmt = stmt.where( + Contig.refinements.any( + and_( + Refinement.outdated == False, + Refinement.id == refinement_id, + ) + ) + ) + with Session(engine) as session: + results = session.exec(stmt).all() + return [("GC Content", results)] + + def get_length_boxplot_records( + self, metagenome_id: int, refinement_id: Optional[int] = 0 + ) -> List[Tuple[str, List[int]]]: + stmt = select([Contig.length]).where(Contig.metagenome_id == metagenome_id) + if refinement_id: + stmt = stmt.where( + Contig.refinements.any( + and_( + Refinement.outdated == False, + Refinement.id == refinement_id, + ) + ) + ) + with Session(engine) as session: + results = session.exec(stmt).all() + return [(ContigSchema.LENGTH.title(), results)] + + def get_coverage_boxplot_records( + self, metagenome_id: int, refinement_id: Optional[int] = 0 + ) -> List[Tuple[str, List[float]]]: + stmt = select([Contig.coverage]).where(Contig.metagenome_id == metagenome_id) + if refinement_id: + stmt = stmt.where( + Contig.refinements.any( + and_( + Refinement.outdated == False, + Refinement.id == refinement_id, + ) + ) + ) + with Session(engine) as session: + results = session.exec(stmt).all() + return [(ContigSchema.COVERAGE.title(), results)] + + def get_metrics_barplot_records( + self, metagenome_id: int, refinement_id: int + ) -> Tuple[str, List[float], List[float]]: + completeness, purity = self.compute_completeness_purity_metrics( + metagenome_id=metagenome_id, refinement_id=refinement_id + ) + name = f"bin_{refinement_id} Metrics" + x = [ContigSchema.COMPLETENESS.title(), ContigSchema.PURITY.title()] + y = [completeness, purity] + return name, x, y + + def get_mag_stats_summary_row_data( + self, metagenome_id: int + ) -> List[ + Dict[ + Literal[ + "refinement_id", + "refinement_label", + "length_sum_mbp", + "completeness", + "purity", + "contig_count", + ], + Union[str, int, float], + ] + ]: + stmt = select(Refinement).where( + Refinement.metagenome_id == metagenome_id, + Refinement.outdated == False, + ) + row_data = {} + with Session(engine) as session: + refinements = session.exec(stmt).all() + for refinement in refinements: + contig_count = len(refinement.contigs) + row_data[refinement.id] = { + "refinement_id": refinement.id, + "refinement_label": f"bin_{refinement.id}", + "contig_count": contig_count, + } + for refinement_id in row_data: + completeness, purity = self.compute_completeness_purity_metrics( + metagenome_id, refinement_id + ) + length_sum_mbp = self.compute_length_sum_mbp( + metagenome_id, refinement_id + ) + row_data[refinement_id].update( + { + "completeness": completeness, + "purity": purity, + "length_sum_mbp": length_sum_mbp, + } + ) + return list(row_data.values()) + + def get_refinement_selection_dropdown_options( + self, metagenome_id: int + ) -> List[Dict[Literal["label", "value"], str]]: + stmt = ( + select([Refinement.id]) + .where( + Refinement.metagenome_id == metagenome_id, + Refinement.outdated == False, + ) + .distinct() + ) + with Session(engine) as session: + results = session.exec(stmt).all() + return [dict(label=f"bin_{result}", value=result) for result in results] + + def get_taxonomy_sankey_records( + self, metagenome_id: int, refinement_id: int + ) -> pd.DataFrame: + statement = select( + Contig.header, + Contig.superkingdom, + Contig.phylum, + Contig.klass, + Contig.order, + Contig.family, + Contig.genus, + Contig.species, + ).where( + Contig.metagenome_id == metagenome_id, + Contig.refinements.any(Refinement.id == refinement_id), + ) + with Session(engine) as session: + results = session.exec(statement).all() + + columns = [ + ContigSchema.HEADER, + ContigSchema.DOMAIN, + ContigSchema.PHYLUM, + ContigSchema.CLASS, + ContigSchema.ORDER, + ContigSchema.FAMILY, + ContigSchema.GENUS, + ContigSchema.SPECIES, + ] + + df = pd.DataFrame.from_records( + results, + index=ContigSchema.HEADER, + columns=columns, + ).fillna("unclassified") + + for rank in df.columns: + df[rank] = df[rank].map(lambda taxon: f"{rank[0]}_{taxon}") + + return df diff --git a/automappa/pages/not_found_404.py b/automappa/pages/not_found_404.py new file mode 100644 index 00000000..89a1e29b --- /dev/null +++ b/automappa/pages/not_found_404.py @@ -0,0 +1,197 @@ +from typing import Union +import dash_mantine_components as dmc +from dash_extensions.enrich import DashBlueprint, html +from dash_iconify import DashIconify + + +def get_icon( + icon: str, height: int = 30, width: Union[int, None] = None +) -> DashIconify: + return DashIconify(icon=icon, height=height, width=width) + + +alert = dmc.Alert( + dmc.Stack( + [ + dmc.Text( + "Uh oh! Looks like you've hit a broken link. Try returning home to continue...", + size="xl", + color="gray", + ), + dmc.Anchor( + dmc.Button( + "Home", + variant="outline", + leftIcon=get_icon("line-md:home"), + color="info", + fullWidth=True, + ), + href="/", + underline=False, + ), + ], + style={"height": 100}, + spacing="xs", + align="stretch", + justify="space-around", + ), + title="Something went wrong", + color="info", +) + +evan_rees_hover_card = dmc.HoverCard( + shadow="md", + children=[ + dmc.HoverCardTarget( + dmc.Avatar( + src="https://avatars.githubusercontent.com/u/25933122?v=4", + radius="xl", + size="xl", + ) + ), + dmc.HoverCardDropdown( + [ + dmc.Text("Evan Rees", align="center"), + dmc.Group( + [ + dmc.Anchor( + get_icon(icon="openmoji:github", width=40), + href="https://www.github.com/WiscEvan/", + target="_blank", + ), + dmc.Anchor( + get_icon(icon="openmoji:linkedin", width=40), + href="https://www.linkedin.com/in/evanroyrees/", + target="_blank", + ), + dmc.Anchor( + get_icon(icon="academicons:google-scholar", width=40), + href="https://scholar.google.com/citations?user=9TL02VUAAAAJ", + target="_blank", + ), + dmc.Anchor( + get_icon(icon="ic:round-self-improvement", width=40), + href="https://wiscevan.github.io", + target="_blank", + ), + ], + p=0, + ), + dmc.Text("Say hi!", color="dimmed", align="center"), + ] + ), + ], +) +kwanlab_hover_card = dmc.HoverCard( + shadow="md", + children=[ + dmc.HoverCardTarget( + dmc.Avatar( + src="https://avatars.githubusercontent.com/u/6548561?v=4", + radius="xl", + size="xl", + ) + ), + dmc.HoverCardDropdown( + [ + dmc.Text("Jason C. Kwan Lab", align="center"), + dmc.Group( + [ + dmc.Anchor( + get_icon(icon="openmoji:github", width=40), + href="https://www.github.com/KwanLab/", + target="_blank", + ), + dmc.Anchor( + get_icon(icon="openmoji:linkedin", width=40), + href="https://www.linkedin.com/in/jason-kwan-79137324/", + target="_blank", + ), + dmc.Anchor( + get_icon(icon="academicons:google-scholar", width=40), + href="https://scholar.google.com/citations?user=zKnYsSsAAAAJ", + target="_blank", + ), + dmc.Anchor( + get_icon(icon="openmoji:twitter", width=40), + href="https://twitter.com/kwan_lab", + target="_blank", + ), + dmc.Anchor( + get_icon(icon="guidance:medical-laboratory", width=40), + href="https://kwanlab.github.io/", + target="_blank", + ), + ], + p=0, + ), + ] + ), + ], +) + + +sample_icons = [ + get_icon("file-icons:influxdata"), + get_icon("healthicons:animal-spider-outline"), + get_icon("ph:plant"), + get_icon("healthicons:animal-chicken-outline"), + get_icon("healthicons:bacteria-outline"), + get_icon("game-icons:mushrooms-cluster"), + get_icon("healthicons:malaria-mixed-microscope-outline"), + get_icon("game-icons:scarab-beetle"), + get_icon("healthicons:animal-cow-outline"), + get_icon("fluent-emoji-high-contrast:fish"), + get_icon("streamline:nature-ecology-potted-cactus-tree-plant-succulent-pot"), +] + +new_issue_avatar = html.A( + dmc.Tooltip( + dmc.Avatar( + get_icon("emojione:waving-hand-medium-light-skin-tone"), + size="md", + radius="xl", + ), + label="Provide feedback", + position="bottom", + ), + href="https://github.com/WiscEvan/Automappa/issues/new", + target="_blank", +) + + +def render() -> DashBlueprint: + app = DashBlueprint() + app.name = "not_found_404" + app.description = "Automappa app link not found 404 page" + app.title = "Automappa 404" + app.layout = dmc.Container( + [ + dmc.Space(h=30), + dmc.Center(alert), + dmc.Center( + [ + new_issue_avatar, + dmc.Space(w=30), + evan_rees_hover_card, + dmc.Space(w=30), + kwanlab_hover_card, + ], + style={"width": "100%", "height": 200}, + ), + dmc.Footer( + dmc.Grid( + children=[dmc.Col(icon, span="auto") for icon in sample_icons], + justify="space-between", + align="center", + gutter="xs", + grow=True, + ), + height=40, + fixed=True, + withBorder=False, + ), + ], + fluid=True, + ) + return app diff --git a/automappa/settings.py b/automappa/settings.py new file mode 100644 index 00000000..a756f5d9 --- /dev/null +++ b/automappa/settings.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python + +from pathlib import Path +from typing import Optional +from pydantic import ( + BaseSettings, + HttpUrl, + RedisDsn, + PostgresDsn, + AmqpDsn, +) + +# For the pydantic docs for defining settings, also See: +# https://pydantic-docs.helpmanual.io/usage/settings/ + + +class DatabaseSettings(BaseSettings): + url: PostgresDsn + pool_size: Optional[int] = 4 + pool_pre_ping: Optional[bool] = False + + class Config: + env_prefix: str = "POSTGRES_" + env_file: str = ".env" + env_file_encoding: str = "utf-8" + + +class RedisSettings(BaseSettings): + host: str + port: int + db: int + password: str + + class Config: + env_prefix: str = "REDIS_BACKEND_" + env_file: str = ".env" + env_file_encoding: str = "utf-8" + + +class RabbitmqSettings(BaseSettings): + url: AmqpDsn + + class Config: + env_prefix: str = "RABBITMQ_" + env_file: str = ".env" + env_file_encoding: str = "utf-8" + + +class CelerySettings(BaseSettings): + backend_url: RedisDsn + broker_url: AmqpDsn + + class Config: + env_prefix: str = "CELERY_" + env_file: str = ".env" + env_file_encoding: str = "utf-8" + + +class FlowerSettings(BaseSettings): + broker_api_url: HttpUrl + + class Config: + env_prefix: str = "FLOWER_" + env_file: str = ".env" + env_file_encoding: str = "utf-8" + + +class ServerSettings(BaseSettings): + root_upload_folder: Path + # Dash/Plotly + host: Optional[str] = "localhost" + port: Optional[int] = 8050 + debug: Optional[bool] = True + + class Config: + env_prefix = "SERVER_" + env_file: str = ".env" + env_file_encoding = "utf-8" + + +server = ServerSettings() +db = DatabaseSettings() +redis = RedisSettings() +database = DatabaseSettings() +rabbitmq = RabbitmqSettings() +celery = CelerySettings() diff --git a/automappa/tasks.py b/automappa/tasks.py new file mode 100644 index 00000000..faa3d09c --- /dev/null +++ b/automappa/tasks.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +import logging + +from celery import Celery +from celery.utils.log import get_task_logger + +from automappa import settings + +logging.basicConfig( + format="[%(levelname)s] %(name)s: %(message)s", + level=logging.DEBUG, +) + +numba_logger = logging.getLogger("numba") +numba_logger.setLevel(logging.WARNING) +numba_logger.propagate = False +h5py_logger = logging.getLogger("h5py") +h5py_logger.setLevel(logging.WARNING) +h5py_logger.propagate = False +root_logger = logging.getLogger() +root_logger.setLevel(logging.WARNING) + +queue = Celery( + __name__, + backend=settings.celery.backend_url, + broker=settings.celery.broker_url, +) + +queue.config_from_object("automappa.conf.celeryconfig") +task_logger = get_task_logger(__name__) + +if settings.server.debug: + task_logger.debug( + f"celery config:\n{queue.conf.humanize(with_defaults=False, censored=True)}" + ) diff --git a/automappa/utils/README.md b/automappa/utils/README.md new file mode 100644 index 00000000..90d62166 --- /dev/null +++ b/automappa/utils/README.md @@ -0,0 +1,9 @@ +# Automappa utils.py + +## Miscellaneous resources related to Automappa Utils + +- [:elephant: Mapping between python and postgresql types](https://pynative.com/python-postgresql-tutorial/#h-the-mapping-between-python-and-postgresql-types "Mapping between python and postgresql types") +- [:ninja: Hackernoon/questdb-and-plotly]() +- [:chart_with_upwards_trend: github.com:gabor-boros/questdb-stock-market-dashboard]() +- [:tv: youtube: "Pydantic: Modern Python Data Validation and Settings by Michael Kennedy"](https://youtu.be/lon-dEXfY2I?t=1027 "Pydantic: Modern Python Data Validation and Settings by Michael Kennedy") +- [:sunflower: Celery+Flower+Prometheus+Grafana integration guide](https://flower.readthedocs.io/en/latest/prometheus-integration.html#celery-flower-prometheus-grafana-integration-guide "celery flower prometheus grafana integration guide") diff --git a/automappa/utils/figures.py b/automappa/utils/figures.py index 163405ec..df3cc488 100644 --- a/automappa/utils/figures.py +++ b/automappa/utils/figures.py @@ -1,29 +1,23 @@ #!/usr/bin/env python -from typing import List, Union +from typing import Dict, List, Tuple, Union import numpy as np import pandas as pd from dash.exceptions import PreventUpdate from plotly import graph_objects as go -def taxonomy_sankey(df: pd.DataFrame, selected_rank: str = "species") -> go.Figure: - ranks = ["superkingdom", "phylum", "class", "order", "family", "genus", "species"] - n_ranks = len(ranks[: ranks.index(selected_rank)]) - dff = df[[col for col in df.columns if col in ranks]].fillna("unclassified") - for rank in ranks: - if rank in dff: - dff[rank] = dff[rank].map( - lambda x: f"{rank[0]}_{x}" if rank != "superkingdom" else f"d_{x}" - ) +def taxonomy_sankey(df: pd.DataFrame) -> go.Figure: + ranks = df.columns.tolist() + n_ranks = len(ranks) label = [] - for rank in ranks[:n_ranks]: - label.extend(dff[rank].unique().tolist()) + for rank in ranks: + label.extend(df[rank].unique().tolist()) source = [] target = [] value = [] - for rank in ranks[:n_ranks]: - for rank_name, rank_df in dff.groupby(rank): + for rank in ranks: + for rank_name, rank_df in df.groupby(rank): source_index = label.index(rank_name) next_rank_i = ranks.index(rank) + 1 if next_rank_i >= len(ranks[:n_ranks]): @@ -38,27 +32,24 @@ def taxonomy_sankey(df: pd.DataFrame, selected_rank: str = "species") -> go.Figu target.append(target_index) value.append(value_count) return go.Figure( - data=[ - go.Sankey( - node=dict( - pad=8, - thickness=13, - line=dict(width=0.3), - label=label, - ), - link=dict( - source=source, - target=target, - value=value, - ), - ) - ] + go.Sankey( + node=dict( + pad=8, + thickness=13, + line=dict(width=0.3), + label=label, + ), + link=dict( + source=source, + target=target, + value=value, + ), + ), ) def metric_boxplot( - df: pd.DataFrame, - metrics: List[str] = [], + data: List[Tuple[str, pd.Series]], horizontal: bool = False, boxmean: Union[bool, str] = True, ) -> go.Figure: @@ -66,10 +57,8 @@ def metric_boxplot( Parameters ---------- - df : pd.DataFrame + data : List[Tuple[str,pd.Series]] MAG annotations dataframe - metrics : List[str], optional - MAG metrics to use for generating traces horizontal : bool, optional Whether to generate horizontal or vertical boxplot traces in the figure. boxmean : Union[bool,str], optional @@ -86,57 +75,84 @@ def metric_boxplot( PreventUpdate No metrics were provided to generate traces. """ - fig = go.Figure() - if not metrics: + if not data: raise PreventUpdate - for metric in metrics: - name = metric.replace("_", " ").title() + traces = [] + for metric, series in data: if horizontal: - trace = go.Box(x=df[metric], name=name, boxmean=boxmean) + trace = go.Box(x=series, name=metric, boxmean=boxmean) else: - trace = go.Box(y=df[metric], name=name, boxmean=boxmean) - # TODO: round to two decimal places - # Perhaps a hovertemplate formatting issue? - fig.add_trace(trace) - return fig + trace = go.Box(y=series, name=metric, boxmean=boxmean) + traces.append(trace) + return go.Figure(data=traces) -def marker_size_scaler(x: pd.DataFrame, scale_by: str = "length") -> int: - x_min_scaler = x[scale_by] - x[scale_by].min() - x_max_scaler = x[scale_by].max() - x[scale_by].min() - if not x_max_scaler: - # Protect Division by 0 - x_ceil = np.ceil(x_min_scaler / x_max_scaler + 1) +def metric_barplot( + data: Tuple[str, List[float], List[float]], + horizontal: bool = False, +) -> go.Figure: + if not data: + raise PreventUpdate + name, x, y = data + orientation = "h" if horizontal else "v" + trace = go.Bar(x=x, y=y, orientation=orientation, name=name) + return go.Figure([trace]) + + +def format_axis_title(axis_title: str) -> str: + """Format axis title depending on title text. Converts embed methods to uppercase then x_dim. + + Parameters + ---------- + axis_title : str + axis title to format (used from `xaxis_column` and `yaxis_column` in `scatterplot_2d_figure_callback`) + + Returns + ------- + str + formatted axis title + """ + if "_x_" in axis_title: + method_titles = { + "bhsne": "BH-tSNE", + "sksne": "(sklearn) BH-tSNE", + "umap": "UMAP", + "trimap": "TriMap", + "densmap": "DensMap", + } + # {kmer_size}mers-{norm_method}-{embed_method}_x_{1,2} + mers, norm_method, embed_method_embed_dim = axis_title.split("-") + norm_method_titles = {"am_clr": "CLR", "ilr": "ILR"} + norm_method = norm_method_titles.get(norm_method, norm_method.upper()) + embed_method, embed_dim = embed_method_embed_dim.split("_", 1) + embed_method = method_titles.get(embed_method, embed_method.upper()) + kmer_size = mers.replace("mers", "") + # formatted_axis_title = f"(k={kmer_size}, norm={norm_method}) {embed_method} {embed_dim}" + formatted_axis_title = embed_dim + elif "_" in axis_title: + metagenome_metadata_titles = {"gc_content": "GC Content"} + col_list = axis_title.split("_") + metadata_title = " ".join(col.upper() for col in col_list) + formatted_axis_title = metagenome_metadata_titles.get( + axis_title, metadata_title + ) else: - x_ceil = np.ceil(x_min_scaler / x_max_scaler) - x_scaled = x_ceil * 2 + 4 - return x_scaled + formatted_axis_title = axis_title.title() + return formatted_axis_title -def get_scatterplot_2d( - df, - x_axis: str = "x_1", - y_axis: str = "x_2", - color_by_col: str = "cluster", -) -> go.Figure: - fig = go.Figure( - layout=go.Layout( - scene=dict( - xaxis=dict(title=x_axis.title()), - yaxis=dict(title=y_axis.title()), - ), - legend=dict(x=1, y=1), - margin=dict(r=50, b=50, l=50, t=50), - hovermode="closest", - ), - ) +def get_hovertemplate_and_customdata_cols( + x_axis: str, y_axis: str +) -> Tuple[str, List[str]]: # Hovertemplate - x_hover_label = f"{x_axis.title()}: " + "%{x:.2f}" - y_hover_label = f"{y_axis.title()}: " + "%{y:.2f}" + x_hover_title = format_axis_title(x_axis) + y_hover_title = format_axis_title(y_axis) + text_hover_label = "Contig: %{text}" coverage_label = "Coverage: %{customdata[0]:.2f}" gc_content_label = "GC%: %{customdata[1]:.2f}" length_label = "Length: %{customdata[2]:,} bp" - text_hover_label = "Contig: %{text}" + x_hover_label = f"{x_hover_title}: " + "%{x:.2f}" + y_hover_label = f"{y_hover_title}: " + "%{y:.2f}" hovertemplate = "
".join( [ text_hover_label, @@ -147,38 +163,182 @@ def get_scatterplot_2d( y_hover_label, ] ) - metadata_cols = ["coverage", "gc_content", "length"] + return hovertemplate, metadata_cols + + +def get_scattergl_traces( + df: pd.DataFrame, + x_axis: str, + y_axis: str, + color_by_col: str = "cluster", + fillna: str = "unclustered", +) -> pd.DataFrame: + """Generate scattergl 2D traces from `df` with index of `contig`, x and y corresponding to `x_axis` and `y_axis`, respectively with traces + being grouped by the `color_by_col`. If there exists `nan` values in the `color_by_col`, these may be filled with the value used in `fillna`. + + Parameters + ---------- + df : pd.DataFrame + * `index` = `contigs` + + binning table of columns: + + * f`{embed_method}_x_1` + * f`{embed_method}_x_2` + * `gc_content` + * `coverage` + * `length` + * `colory_by_col` (commonly used column is `cluster`) + + x_axis : str + column to use to supply to the `x` argument in `Scattergl(x=...)` + y_axis : str + column to use to supply to the `y` argument in `Scattergl(y=...)` + color_by_col : str, by default "cluster" + Column with which to group the traces + fillna : str, optional + value to replace `nan` in `color_by_col`, by default "unclustered" + + Returns + ------- + pd.DataFrame + index=`color_by_col`, column=`trace` + """ + hovertemplate, metadata_cols = get_hovertemplate_and_customdata_cols( + x_axis=x_axis, y_axis=y_axis + ) + traces = [] + metadata_cols = [col for col in metadata_cols if col in df.columns] + df = df.fillna(value={color_by_col: fillna}) for color_col_name in df[color_by_col].unique(): dff = df.loc[df[color_by_col].eq(color_col_name)] + customdata = dff[metadata_cols] if metadata_cols else [] trace = go.Scattergl( x=dff[x_axis], y=dff[y_axis], - customdata=dff[metadata_cols], + customdata=customdata, text=dff.index, mode="markers", - opacity=0.65, + opacity=0.85, hovertemplate=hovertemplate, name=color_col_name, ) - fig.add_trace(trace) - fig.update_layout(legend_title_text=color_by_col.title()) + traces.append({color_by_col: color_col_name, "trace": trace}) + return pd.DataFrame(traces).set_index(color_by_col) + + +def get_embedding_traces_df(df: pd.DataFrame) -> pd.DataFrame: + embed_traces = [] + for embed_method in ["trimap", "densmap", "bhsne", "umap", "sksne"]: + traces_df = get_scattergl_traces( + df, f"{embed_method}_x_1", f"{embed_method}_x_2", "cluster" + ) + traces_df.rename(columns={"trace": embed_method}, inplace=True) + embed_traces.append(traces_df) + embed_traces_df = pd.concat(embed_traces, axis=1) + return embed_traces_df + + +def get_scatterplot_2d( + df: pd.DataFrame, + x_axis: str, + y_axis: str, + # embed_method: str, + color_by_col: str = "cluster", + fillna: str = "unclustered", +) -> go.Figure: + """Generate `go.Figure` of scattergl 2D traces + + Parameters + ---------- + df : pd.DataFrame + _description_ + x_axis : str + _description_ + y_axis : str + _description_ + embed_method : str + _description_ + color_by_col : str, optional + _description_, by default "cluster" + fillna : str, optional + _description_, by default "unclustered" + + Returns + ------- + go.Figure + _description_ + """ + layout = go.Layout( + legend=dict(x=1, y=1), + margin=dict(r=50, b=50, l=50, t=50), + hovermode="closest", + clickmode="event+select", + height=600, + width="100%", + ) + fig = go.Figure(layout=layout) + traces_df = get_scattergl_traces( + df, + x_axis=x_axis, + y_axis=y_axis, + color_by_col=color_by_col, + fillna=fillna, + ) + # TODO: Update function to use embed_traces_df... + fig.add_traces(traces_df.trace.tolist()) return fig def get_scatterplot_3d( - df, - x_axis: str = "x_1", - y_axis: str = "x_2", - z_axis: str = "coverage", - color_by_col: str = "cluster", + df: pd.DataFrame, + x_axis: str, + y_axis: str, + z_axis: str, + color_by_col: str, ) -> go.Figure: + """Create go.Figure from `df` + + Parameters + ---------- + df : pd.DataFrame + index_col=[contig], cols=[`x_axis`, `y_axis`, `z_axis`] + x_axis : str + continuous column for x-axis + y_axis : str + continuous column for y-axis + z_axis : str + continuous column for z-axis + color_by_col : str + categorical column for color-by-col + + Returns + ------- + go.Figure + _description_ + """ + + def marker_size_scaler(x: pd.DataFrame, scale_by: str = "length") -> int: + x_min_scaler = x[scale_by] - x[scale_by].min() + x_max_scaler = x[scale_by].max() - x[scale_by].min() + if not x_max_scaler: + # Protect Division by 0 + x_ceil = np.ceil(x_min_scaler / x_max_scaler + 1) + else: + x_ceil = np.ceil(x_min_scaler / x_max_scaler) + x_scaled = x_ceil * 2 + 4 + return x_scaled + + x_axis_title = format_axis_title(x_axis) + y_axis_title = format_axis_title(y_axis) + z_axis_title = format_axis_title(z_axis) fig = go.Figure( layout=go.Layout( scene=dict( - xaxis=dict(title=x_axis.title()), - yaxis=dict(title=y_axis.title()), - zaxis=dict(title=z_axis.replace("_", " ").title()), + xaxis=dict(title=x_axis_title), + yaxis=dict(title=y_axis_title), + zaxis=dict(title=z_axis_title), ), legend={"x": 1, "y": 1}, autosize=True, @@ -186,9 +346,9 @@ def get_scatterplot_3d( hovermode="closest", ) ) - x_hover_label = f"{x_axis.title()}: " + "%{x:.2f}" - y_hover_label = f"{y_axis.title()}: " + "%{y:.2f}" - z_hover_label = f"{z_axis.title()}: " + "%{z:.2f}" + x_hover_label = f"{x_axis_title}: " + "%{x:.2f}" + y_hover_label = f"{y_axis_title}: " + "%{y:.2f}" + z_hover_label = f"{z_axis_title}: " + "%{z:.2f}" text_hover_label = "Contig: %{text}" hovertemplate = "
".join( [text_hover_label, z_hover_label, x_hover_label, y_hover_label] @@ -198,7 +358,7 @@ def get_scatterplot_3d( x=dff[x_axis], y=dff[y_axis], z=dff[z_axis], - text=dff.contig, + text=dff.index, mode="markers", marker={ "size": df.assign(normLen=marker_size_scaler)["normLen"], diff --git a/automappa/utils/markers.py b/automappa/utils/markers.py index 6ed8c3d4..bbadbab1 100644 --- a/automappa/utils/markers.py +++ b/automappa/utils/markers.py @@ -12,17 +12,33 @@ def get_cluster_marker_counts( def get_contig_marker_counts( - bin_df: pd.DataFrame, markers_df: pd.DataFrame, marker_count_range_end: int = 7 + df: pd.DataFrame, marker_count_range_end: int = 7 ) -> pd.DataFrame: - df = bin_df.join(markers_df).fillna(0).copy() - df = df[markers_df.columns.tolist()] + """Retrieves marker counts pd.DataFrame + + Parameters + ---------- + bin_df : pd.DataFrame + _description_ + markers_df : pd.DataFrame + _description_ + marker_count_range_end : int, optional + _description_, by default 7 + + Returns + ------- + pd.DataFrame + _description_ + """ ## Get copy number marker counts dfs = [] marker_counts_range = list( range(marker_count_range_end + 1) ) # range(start=inclusive, end=exclusive) for marker_count in marker_counts_range: - # Check if last in the list of marker_counts_range to apply df.ge(...) instead of df.eq(...) + # Last count is regex str: '\d+' + # To apply df.ge(...) instead of df.eq(...) + # Check if last in the list of marker_counts_range if marker_count + 1 == len(marker_counts_range): marker_count_contig_idx = df.loc[ df.sum(axis=1).ge(marker_count) @@ -36,7 +52,7 @@ def get_contig_marker_counts( count_df = pd.DataFrame(marker_count_contig_idx) count_df["marker_count"] = marker_count dfs.append(count_df) - return pd.concat(dfs).set_index("contig") + return pd.concat(dfs) def convert_marker_counts_to_marker_symbols(df: pd.DataFrame) -> pd.DataFrame: @@ -62,3 +78,11 @@ def convert_marker_counts_to_marker_symbols(df: pd.DataFrame) -> pd.DataFrame: df["symbol"] = df.marker_count.map(lambda count: symbols.get(count, "circle")) df["marker_size"] = df.marker_count.fillna(0).map(lambda count: count + 7) return df + + +def get_marker_symbols(bin_df: pd.DataFrame, markers_df: pd.DataFrame) -> pd.DataFrame: + df = bin_df.join(markers_df).fillna(0).copy() + df = df[markers_df.columns.tolist()] + marker_counts = get_contig_marker_counts(df) + marker_symbols = convert_marker_counts_to_marker_symbols(marker_counts) + return marker_symbols diff --git a/automappa/utils/run_celery.sh b/automappa/utils/run_celery.sh deleted file mode 100644 index 360e7934..00000000 --- a/automappa/utils/run_celery.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -cd automappa -su -m automappa -c "celery -A tasks worker --loglevel INFO" diff --git a/bin/run_celery.sh b/bin/run_celery.sh new file mode 100644 index 00000000..d698a400 --- /dev/null +++ b/bin/run_celery.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +cd automappa +su -m automappa -c "celery --app=tasks worker --loglevel INFO" diff --git a/automappa/utils/run_web.sh b/bin/run_web.sh similarity index 70% rename from automappa/utils/run_web.sh rename to bin/run_web.sh index 13e48a29..cf5dd276 100644 --- a/automappa/utils/run_web.sh +++ b/bin/run_web.sh @@ -1,5 +1,5 @@ -#!/bin/sh +#!/usr/bin/env bash + # NOTE: see https://github.com/timlardner/Docker-FlaskCeleryRabbitRedis#part-4---using-docker-to-package-our-application # for more information on this script. -cd automappa -su -m automappa -c "automappa" +su -m automappa -c "python -m automappa.index" diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..c899edff --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,112 @@ +version: '3.8' + +services: + + postgres: + image: postgres + restart: always + ports: + - "5432:5432" + env_file: + - .env + volumes: + - postgres-data:/var/lib/postgresql/data + + redis: + image: redis:latest + ports: + - "6379:6379" + + rabbitmq: + image: rabbitmq:3.10.1-management-alpine + restart: always + ports: + # AMQP protocol port + - "5672:5672" + # HTTP management UI + - "15672:15672" + env_file: + - .env + volumes: + - ./automappa/conf/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf + + queue: + build: + context: . + dockerfile: Dockerfile + restart: always + command: celery --app=automappa.tasks.queue worker --loglevel=INFO -E + user: automappa + env_file: + - .env + volumes: + - .:/usr/src/app + depends_on: + - redis + - rabbitmq + + flower: + build: + context: . + dockerfile: Dockerfile + restart: always + command: celery --app=automappa.tasks.queue flower --port=5555 + user: automappa + volumes: + - .:/usr/src/app + ports: + - "5555:5555" + env_file: + - .env + depends_on: + - queue + - rabbitmq + + web: + build: + context: . + dockerfile: Dockerfile + restart: always + mem_limit: 4GB + command: python -m automappa.__main__ + user: automappa + volumes: + # /usr/src/app is location of install in Dockerfile + - .:/usr/src/app + ports: + - "8050:8050" + depends_on: + - postgres + - queue + + # prometheus: + # image: prom/prometheus:latest + # ports: + # - "9090:9090" + # volumes: + # - prometheus-data:/prometheus + # # - ./prometheus.yml:/etc/prometheus/prometheus.yml + # depends_on: + # - flower + + # grafana: + # image: grafana/grafana:latest + # ports: + # - "3000:3000" + # env_file: + # - .env + # volumes: + # - grafana-storage:/var/lib/grafana + # - ./docker/grafana/provisioning:/etc/grafana/provisioning + # - ./docker/grafana/grafana.ini:/etc/grafana/grafana.ini + # - ./docker/grafana/dashboards:/etc/grafana/dashboards + # depends_on: + # - prometheus + +volumes: + postgres-data: + # grafana-storage: + # driver: local + # prometheus-data: + # driver: local + diff --git a/docker/grafana/dashboards/celery-monitoring-grafana-dashboard.json b/docker/grafana/dashboards/celery-monitoring-grafana-dashboard.json new file mode 100644 index 00000000..04b9dfd0 --- /dev/null +++ b/docker/grafana/dashboards/celery-monitoring-grafana-dashboard.json @@ -0,0 +1,759 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "7.5.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Basic celery monitoring", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "This panel shows status of celery workers. 1 = online, 0 = offline.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "flower_worker_online", + "interval": "", + "legendFormat": "{{ worker }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Celery Worker Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:150", + "format": "short", + "label": "", + "logBase": 1, + "max": "1", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:151", + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "This panel shows number of tasks currently executing at worker.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "flower_worker_number_of_currently_executing_tasks", + "interval": "", + "legendFormat": "{{worker}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Number of Tasks Currently Executing at Worker", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:79", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:80", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "This panel shows average task runtime at worker by worker and task name.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "rate(flower_task_runtime_seconds_sum[5m]) / rate(flower_task_runtime_seconds_count[5m])", + "interval": "", + "legendFormat": "{{task}}, {{worker}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average Task Runtime at Worker", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:337", + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:338", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "This panel shows task prefetch time at worker by worker and task name.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 17 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "flower_task_prefetch_time_seconds", + "interval": "", + "legendFormat": "{{task}}, {{worker}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Task Prefetch Time at Worker", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:337", + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:338", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "This panel shows number of tasks prefetched at worker by task and worker name.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "flower_worker_prefetched_tasks", + "interval": "", + "legendFormat": "{{task}}, {{worker}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Number of Tasks Prefetched At Worker", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:337", + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:338", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "This panel presents average task success ratio over time by task name.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "(sum(avg_over_time(flower_events_total{type=\"task-succeeded\"}[15m])) by (task) / sum(avg_over_time(flower_events_total{type=~\"task-failed|task-succeeded\"}[15m])) by (task)) * 100", + "interval": "", + "legendFormat": "{{ task }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Task Success Ratio", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:63", + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:64", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "This panel presents average task failure ratio over time by task name.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.2", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "(sum(avg_over_time(flower_events_total{type=\"task-failed\"}[15m])) by (task) / sum(avg_over_time(flower_events_total{type=~\"task-failed|task-succeeded\"}[15m])) by (task)) * 100", + "interval": "", + "legendFormat": "{{ task }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Task Failure Ratio", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:63", + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:64", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "10s", + "schemaVersion": 27, + "style": "dark", + "tags": [ + "celery", + "monitoring", + "flower" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Automappa Celery Monitoring", + "uid": "3OBI1flGz", + "version": 9 +} \ No newline at end of file diff --git a/docker/grafana/grafana.ini b/docker/grafana/grafana.ini new file mode 100644 index 00000000..9f442223 --- /dev/null +++ b/docker/grafana/grafana.ini @@ -0,0 +1,23 @@ +[paths] +provisioning = /etc/grafana/provisioning + +[server] +enable_gzip = true +# To add HTTPS support: +#protocol = https +#;http_addr = +#http_port = 3000 +#domain = localhost +#enforce_domain = false +#root_url = https://localhost:3000 +#router_logging = false +#static_root_path = public +#cert_file = /etc/certs/cert.pem +#cert_key = /etc/certs/cert-key.pem + +[security] +# If you want to embed grafana into an iframe for example +allow_embedding = true + +[users] +default_theme = dark \ No newline at end of file diff --git a/docker/grafana/provisioning/dashboards/dashboard.yml b/docker/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 00000000..945453a4 --- /dev/null +++ b/docker/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,25 @@ +# config file version +apiVersion: 1 + +providers: + # an unique provider name + - name: Automappa Monitoring + # org id. will default to orgId 1 if not specified + org_id: 1 + # name of the dashboard folder. Required + folder: '' + # provider type. Required + type: 'file' + # disable dashboard deletion + disableDeletion: false + # enable dashboard editing + editable: true + # how often Grafana will scan for changed dashboards + updateIntervalSeconds: 5 + # allow updating provisioned dashboards from the UI + allowUiUpdates: true + options: + # path to dashboard files on disk. Required + path: /etc/grafana/dashboards + # use folder names from filesystem to create folders in Grafana + foldersFromFilesStructure: true \ No newline at end of file diff --git a/docker/grafana/provisioning/datasources/prometheus.yml b/docker/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 00000000..ef132d28 --- /dev/null +++ b/docker/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,9 @@ +apiVersion: 1 + +deleteDatasources: + - name: Prometheus + +datasources: + - name: Prometheus + type: prometheus + url: http://prometheus:9090 diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 100644 index 00000000..66814f97 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1,1289 @@ +# Contributing + +- [Getting started](#getting-started-with-development) + 1. [Clone Automappa](#1-retrieve-repository) + 2. [Start Automappa services](#2-create-services-using-docker-compose) + 3. [Navigate to browser url](#3-navigate-to-the-automappa-page-in-your-browser) + +- [Creating a component](#adding-a-new-component) + + 0. [Before you begin](#0-before-your-begin) + 1. [Create `component-id` in `ids.py`](#1-create-a-unique-component-id) + 2. [Create component file](#2-create-your-componentpy-file) + 3. [Define a `render` function](#3-define-a-render-function) + 4. [Component interactions](#4-defining-component-interactions) + - [The `app` argument](#the-app-argument) + - [The `source` argument](#the-source-argument) + 5. [Add component to the page's layout](#5-import-and-render-your-component-into-the-page-layout) + 6. [Component `Input` to existing components](#6-using-your_component-as-an-input-to-existing-components) + +- [Adding a page](#pages) +- [Services and dependencies](#automappa-services-and-dependencies) + - [Postgres](#postgres) + - [RabbitMQ](#rabbitmq) + - [Celery](#celery-task-queue) + - [Redis](#redis) + - [Flower](#flower) +- [Dev Resources](#development-resources) + - [Component libraries](#libraries) + - [Monitoring services](#monitoring-and-task-queue-services) + +## Getting started with development + +### 1. Retrieve repository + +```bash +git clone https://github.com/WiscEvan/Automappa.git +cd Automappa +``` + +### 2. Create services using `docker-compose` + +For convenience, the command may be found in the `Makefile` and the services setup with: + +```bash +make up +``` + +> NOTE: You can see a list of make commands by only tying `make` in the `Automappa` directory. + +This may take a few minutes if all of the images need to be pulled and constructed. + +### 3. Navigate to the Automappa page in your browser + +After all of the images have been created the services will be started in their +respective containers and you should eventually see this in the terminal: + +```console +automappa-web-1 | Dash is running on http://0.0.0.0:8050/ +automappa-web-1 | +automappa-web-1 | [INFO] dash.dash: Dash is running on http://0.0.0.0:8050/ +automappa-web-1 | +automappa-web-1 | * Serving Flask app 'automappa.app' +automappa-web-1 | * Debug mode: on +``` + +## Adding a new component + +### 0. Before your begin + +Checkout a new branch + +```bash +git checkout -b develop +``` + +Change `SERVER_DEBUG = False` in `.env` to `SERVER_DEBUG = True` + +### 1. Create a unique component id + +A unique id is required to specify what data should be set or retrieved based on the component being implemented. + +To do this, simply create a unique id for the component in `automappa/components/ids.py` + +```python +# Example contents of automappa/components/ids.py +COMPONENT_ID = "unique-component-id" +``` + +This should ultimately be imported by the respective component's file (`automappa/pages//components/your_component.py`) like so: + +```python +from automappa.components import ids +# Now you can access 'unique-component-id' with +ids.COMPONENT_ID +``` + +### 2. Create your `component.py` file + +The component should be placed respective to the page where it will be added. + +The name of the file should describe your component and be located in the `components` sub-directory. + +i.e. `automappa/pages//components/your_component.py` + +>NOTE: Try to be clear and concise when naming the component file + +### 3. define a `render` function + +Define a `render` function in `your_component.py`. + +We'll start with: + +```python +# contents of your_component.py +from dash_extensions.enrich import html + +def render() -> html.Div: + ... +``` + +All components follow a standard syntax of a `render` function. + +### 4. Defining component interactions + +#### The `app` argument + +>Prior to this you should read about [Dash basic callbacks]( "Dash callbacks documentation") +> +> and if the component behavior is more complex, you may also find +> the [Dash advanced callbacks docs](https://dash.plotly.com/advanced-callbacks "Dash advanced callbacks documentation") +> helpful. + +To create a reactive component you will need to define a *callback function* +and describe the component's behavior when interacting with other components. + +I'll provide a simple example here for registering callbacks specifically to your +component. + +Remember, all components follow a standard syntax of a `render` function and +this takes as an argument the page's app (or `DashProxy`) as input arguments. + +Here is a simple example with these arguments: + +```python +# contents of your_component.py +from dash_extensions.enrich import html, DashProxy + +def render(app: DashProxy) -> html.Div: + ... +``` + +You'll notice the type hint is a `DashProxy` object. This is especially important +when defining callbacks specific to this component. We will need to register these +callbacks using the same `dash` basic callback syntax so we pass this in as input +to decouple the presenter (the `app`) from the component. This provides us two wonderful features. + +1. We now have much greater flexibility +2. We can avoid having to put `from automappa.app import app` everywhere! + +>This also reduces our chances of running in to problematic circular imports from +>having to pass the app around the entire codebase. + +Let us proceed... + +```python +# contents of your_component.py +from automappa.components import ids +from dash_extensions.enrich import html, DashProxy + +def render(app: DashProxy) -> html.Div: + @app.callback( + Output(ids.YOUR_COMPONENT_ID, "property"), + Input(ids.A_COMPONENT_ID, "property"), + ... + ) + def callback_function(a_component_id_property): + ... +``` + +The `app.callback(...)` can take any number of `Input`s and `Output`s. +Typically we'll only use one `Output` and may have multiple `Inputs` to have our +callback perform a specific task according to the user's interactions. + +>NOTE: There are other keywords that may be supplied to `@app.callback` and you can +find more information on this in the +[Dash basic callbacks docs]( "Dash callbacks documentation") +and [Dash advanced callbacks docs](https://dash.plotly.com/advanced-callbacks "Dash advanced callbacks documentation"). + +The `callback_function` to create a reactive component in the app may have an +arbitrary name but I tend to stick with something related to the type of +reactivity of the component and what property is being updated. + +You will have to check your particular component's documentation to determine +the properties that are available (often referred to as `props`). + +```python +from automappa.components import ids +from dash_extensions.enrich import DashProxy,html,dcc + + +def render(app: DashProxy) -> html.Div: + @app.callback( + Output(ids.COMPONENT_ID, ""), + Input(ids., ""), + ) + def your_component_callback(some_input_component_prop): + ... + return ... + ... + return html.Div(dcc.Component(id=ids.COMPONENT_ID, ...), ...) +``` + +>NOTE: The actual `app` object that will ultimately be passed to this is + a `DashBlueprint` which is a wrapper of `DashProxy` used similarly to flask +blueprint templates. For more details on this see the respective +`automappa/pages//layout.py` file. + +#### The `source` argument + +Up until now we've only discussed the `app: DashProxy` argument in +`def render(...)`, but we still require one more argument--a "data source". +We will use this to access uploaded user data. + +The database setup has been isolated to its own directory under: +`automappa/data/{database,models,schemas,loader}.py` + +The respective data source will use these database objects for the respective page. + +All of the Automappa pages have their own data source. + +For example if we look at the `mag_refinement` page, we will notice three items: + +a `components` directory containing the page's components and two files, `layout.py` and `source.py` + +The components and data source are implemented, then imported in +`layout.py` to be instantiated in the page's layout (i.e. in a `render` function) + +```bash +automappa/pages/mag_refinement +├── __init__.py +├── components +├── layout.py +├── source.py +└── tests + +2 directories, 3 files +``` + +The data source handles all of the component methods used to interact with the database. + +Here we outline our database, models (w/schemas), loading and pre-processing +methods to construct a `DataSource` object that can handle any database operations. +Furthermore datasources may be defined for each page allowing us to avoid one. +"god" class that requires all data operations for all pages and components + +>(What a headache that would be 🤮) + +At startup, Automappa will create multiple sources respective to each page, e.g: a `HomeDataSource`, `RefinementDataSource` and `SummaryDataSource`. + +> (I've left `MAG` from the MAG-summary and MAG-refinement page +>names b/c I don't think they look so nice in CamelCase) + +Instead of running in to a similar problem to `from automappa.app import app` we +can decouple our data source from our components by *dependency inversion*. + +We do this by defining our component's data source *protocol* to ensure we have +all of the methods available in our data source that are required by the component. + +For a simple example, let's say our component needs to retrieve the number of contigs +within a GC% range. We can define the data source protocol with the necessary method +to use in our component. + +We will need to add a `ComponentDataSource` protocol to access the user's `Contig` data. + +>NOTE: For more information on the `Contig` model see: `automappa/data/models.py` + +```python +# contents of automappa/pages/mag_refinement/components/your_component.py +from typing import Protocol,Tuple + +class ComponentDataSource(Protocol): + def get_contig_count_in_gc_content_range(self, min_max_values: Tuple[float,float]) -> int: + ... +``` + +This allows the python typing system to detect when this method is available while +the provided data source is being used. Notice the arguments and the returned +object are typehinted to tell the editor the protocol behavior. + +Now let's see this in action with a fake example broken into bitesize pieces: + +```python +# contents of automappa/pages/mag_refinement/components/your_component.py +from dash_extenesions.enrich import DashProxy,html +from typing import Protocol,Tuple + +class ComponentDataSource(Protocol): + def get_contig_count_in_gc_content_range(self, min_max_values: Tuple[float,float]) -> int: + ... + +def render(app: DashProxy, source: ComponentDataSource) -> html.Div: + # More code below +``` + +Here we provide our source type using our protocol: `source: ComponentDataSource` and this allows us +to continue on with the component's implementation without any data source imports! 🤯 + +Let's continue with using the `source` in a callback... + +> NOTE: I've also added typehints to the callback function below (this was omitted in +>previous examples for simplicity, but should always be done 🙈). + +```python +# contents of automappa/pages/mag_refinement/components/your_component.py +from dash_extenesions.enrich import DashProxy,html +import dash_mantine_components as dmc +from typing import Protocol,Tuple + +class ComponentDataSource(Protocol): + def get_contig_count_in_gc_content_range(self, min_max_values: Tuple[float,float]) -> int: + ... + +def render(app: DashProxy, source: ComponentDataSource) -> html.Div: + @app.callback( + Output(ids.COMPONENT_ID, "children"), + Input(ids.GC_CONTENT_RANGE_SLIDER, "value"), + ) + def show_contig_count_in_range(gc_content_range: Tuple[float,float]) -> dmc.Text: + # we use our data source object here to get our count in the component + contig_count = source.get_contig_count_in_gc_content_range( + min_max_values=gc_content_range + ) + return dmc.Text(f"{contig_count=}") +``` + +Notice we can use our `source.get_contig_count_in_gc_content_range` method +(Possibly without it yet being implemented) in the callback behavior. Of course, +once all of this is in place, you will need to make sure the actual passed +`source` object has this method implemented. + +>NOTE: You should be able to find the passed `source` object's type by looking +at the respective layout file: +> +>i.e. `automappa/pages/mag_refinement/components/your_component.py` is placed in +>`automappa/pages/mag_refinement/layout.py` which gets passed a `RefinementDataSource` +>in `automappa/components/layout.py` + +#### The data source graph + +The passing of the data source objects may be visualized in the following graph + +Here is an overview of all of the data sources + +>NOTE: The square rectangles contain the implemented database operations +> while the rounded rectangles depict protocol classes to define component behavior. + +```mermaid +graph TD; + subgraph MAG-summary Page + A[SummaryDataSource]-->B(SummaryLayoutDataSource); + B(SummaryLayoutDataSource)-->C(CoverageOverviewBoxplotDataSource); + B(SummaryLayoutDataSource)-->D(GcPercentOverviewBoxplotDataSource); + B(SummaryLayoutDataSource)-->E(LengthOverviewBoxplotDataSource); + B(SummaryLayoutDataSource)-->F(ClusterMetricsBoxplotDataSource); + B(SummaryLayoutDataSource)-->G(ClusterDropdownDataSource); + B(SummaryLayoutDataSource)-->H(ClusterStatsTableDataSource); + B(SummaryLayoutDataSource)-->I(ClusterTaxonomyDistributionDataSource); + B(SummaryLayoutDataSource)-->J(ClusterMetricsBarplotDataSource); + end + subgraph MAG-refinement Page + AA[RefinementDataSource]-->BB(RefinementLayoutDataSource); + BB(RefinementLayoutDataSource)-->CC(Scatterplot2dDataSource); + BB(RefinementLayoutDataSource)-->DD(SettingsOffcanvasDataSource); + BB(RefinementLayoutDataSource)-->EE(MagMetricsDataSource); + BB(RefinementLayoutDataSource)-->FF(TaxonomyDistributionDataSource); + BB(RefinementLayoutDataSource)-->GG(Scatterplot3dDataSource); + BB(RefinementLayoutDataSource)-->HH(CoverageBoxplotDataSource); + BB(RefinementLayoutDataSource)-->II(GcPercentBoxplotDataSource); + BB(RefinementLayoutDataSource)-->JJ(LengthBoxplotDataSource); + BB(RefinementLayoutDataSource)-->KK(ContigCytoscapeDataSource); + BB(RefinementLayoutDataSource)-->LL(CoverageRangeSliderDataSource); + end + subgraph Home Page + AAA[HomeDataSource]-->BBB(HomeLayoutDataSource); + BBB(HomeLayoutDataSource)-->CCC(UploadModalButtonDataSource); + BBB(HomeLayoutDataSource)-->DDD(SampleCardsDataSource); + end +``` + +
+ +Home page data source graph + +```mermaid +graph TD; + A[HomeDataSource]-->B(HomeLayoutDataSource); + B(HomeLayoutDataSource)-->C(UploadModalButtonDataSource); + B(HomeLayoutDataSource)-->D(SampleCardsDataSource); +``` + +
+ +
+ +MAG-refinement data source graph + +```mermaid +graph TD; + A[RefinementDataSource]-->B(RefinementLayoutDataSource); + B(RefinementLayoutDataSource)-->C(Scatterplot2dDataSource); + B(RefinementLayoutDataSource)-->D(SettingsOffcanvasDataSource); + B(RefinementLayoutDataSource)-->E(MagMetricsDataSource); + B(RefinementLayoutDataSource)-->F(TaxonomyDistributionDataSource); + B(RefinementLayoutDataSource)-->G(Scatterplot3dDataSource); + B(RefinementLayoutDataSource)-->H(CoverageBoxplotDataSource); + B(RefinementLayoutDataSource)-->I(GcPercentBoxplotDataSource); + B(RefinementLayoutDataSource)-->J(LengthBoxplotDataSource); + B(RefinementLayoutDataSource)-->K(ContigCytoscapeDataSource); + B(RefinementLayoutDataSource)-->L(CoverageRangeSliderDataSource); +``` + +
+ +
+ +MAG-summary data source graph + +```mermaid +graph TD; + A[SummaryDataSource]-->B(SummaryLayoutDataSource); + B(SummaryLayoutDataSource)-->C(CoverageOverviewBoxplotDataSource); + B(SummaryLayoutDataSource)-->D(GcPercentOverviewBoxplotDataSource); + B(SummaryLayoutDataSource)-->E(LengthOverviewBoxplotDataSource); + B(SummaryLayoutDataSource)-->F(ClusterMetricsBoxplotDataSource); + B(SummaryLayoutDataSource)-->G(ClusterDropdownDataSource); + B(SummaryLayoutDataSource)-->H(ClusterStatsTableDataSource); + B(SummaryLayoutDataSource)-->I(ClusterTaxonomyDistributionDataSource); + B(SummaryLayoutDataSource)-->J(ClusterMetricsBarplotDataSource); +``` + +
+ +A placeholder definition may be made within the passed `source` objects +class while working out the component's behavior and interactions. +This makes the required method easier to find when returning to the +data handling aspect of the implementation. + +For example, we know we want a `get_contig_count_in_gc_content_range` method and because +we are placing our component in the MAG-refinement layout can we define this in +`RefinementDataSource`. + +>i.e. `RefinementDataSource` is the object that will ultimately +>be passed to `your_component.render(app, source)`. + +```python +# contents of automappa/pages/mag_refinement/source.py +class RefinementDataSource(BaseModel): + def get_contig_count_in_gc_content_range(self, min_max_values: Tuple[float,float]) -> int: + raise NotImplemented +``` + +Let's say we've finalized our component behavior and aesthetics. + +Here's the final fake example component in its entirety: + +```python +# contents of automappa/pages/mag_refinement/components/your_component.py +from dash_extenesions.enrich import DashProxy,html +import dash_mantine_components as dmc +from typing import Protocol,Tuple + +class ComponentDataSource(Protocol): + def get_contig_count_in_gc_content_range(self, min_max_values: Tuple[float,float]) -> int: + ... + +def render(app: DashProxy, source: ComponentDataSource) -> html.Div: + @app.callback( + Output(ids.COMPONENT_ID, "children"), + Input(ids.GC_CONTENT_RANGE_SLIDER, "value"), + ) + def show_contig_count_in_range(gc_content_range: Tuple[float,float]) -> dmc.Text: + contig_count = source.get_contig_count_in_gc_content_range( + min_max_values=gc_content_range + ) + return dmc.Text(f"{contig_count=}") + + return html.Div(ids.COMPONENT_ID, + children = [ + """ + Interact with the GC% slider to see how many contigs are within the + selected range. + """ + ] + ) +``` + +We can now return to the data source and should not need to make any further changes with the +component: + +Returning to our data source implementation... + +```python +# contents of automappa/pages/mag_refinement/source.py +class RefinementDataSource(BaseModel): + def get_contig_count_in_gc_content_range(self, min_max_values: Tuple[float,float]) -> int: + raise NotImplemented +``` + +...we implement the required database query operation + +```python +# contents of automappa/pages/mag_refinement/source.py +class RefinementDataSource(BaseModel): + def get_contig_count_in_gc_content_range(self, min_max_values: Tuple[float,float]) -> int: + min_gc_content, max_gc_content = min_max_values + with Session(engine) as session: + statement = ( + select(Contig) # Here we use the Contig model defined in models.py + .where(Contig.gc_content >= min_gc_content, Contig.gc_content <= max_gc_content) + ) + results = session.exec(statement).all() + return len(results) +``` + +### 5. Import and render your component into the page layout + +At this point, the majority of the work has been done, now all that is left is to simply place the component +into the layout of the page. This should correspond to the same page for which the component is implemented + +- `automappa/pages//layout.py` + +Following the example above, this would be `automappa/pages/mag_refinement/layout.py` + +```python +# contents of automappa/pages/mag_refinement/layout.py +from dash_extenesions.enrich import DashProxy,html +from automappa.pages.mag_refinement.components import your_component # importing your component +from typing import Protocol + +class RefinementLayoutDataSource(Protocol): + ... + def get_contig_count_in_gc_content_range(self, min_max_values: Tuple[float,float]) -> int: + ... + +def render(app: DashProxy, source: RefinementLayoutDataSource) -> html.Div: + # ... code above this + + # Including the component within the page layout + app.layout = html.Div(your_component.render(app, source)) + + return app +``` + +### 6. Using `your_component` as an input to existing components + +To retrieve information from components while interacting with the application +`dash` uses the `@app.callback(Output, Input)` syntax as we have seen. + +>NOTE: There are other keywords that may be supplied to `@app.callback` and you can +find more information on this in the [Dash basic callbacks docs](< "Dash callbacks documentation") and [Dash advanced callbacks docs](https://dash.plotly.com/advanced-callbacks "Dash advanced callbacks documentation"). + +###### [Back to top](#contributing) + +## Pages + +> If you are not adding a page to Automappa but simply a component to an existing page, you may skip this section. + +Automappa uses a dash feature called [`pages`](https://dash.plotly.com/urls "dash pages documentation") to allow multi-page navigation +*without* having to explicitly define callbacks for rendering each page (more on this later). + +Similarly, many useful dash utilities are also available in a package called +[`dash-extensions`](https://www.dash-extensions.com/ "dash extensions documentation") +which has been used throughout. Unfortunately these packages are not completely synchronized, +so the simple approach as described in the dash documentation may not be taken. However, some workarounds +are described in the [dash-extensions docs](https://www.dash-extensions.com/getting_started/enrich). + +###### [Back to top](#contributing) + +## Automappa services and dependencies + +Currently Automappa utilizes multiple services to manage its backend database, +task-queue and monitoring. You may find additional details on these services with +their respective docker image builds, Dockerfiles, commands, dependencies and their +ports in the `docker-compose.yml` file. + +These services are: + +| Service | Purpose | Description | +| -: | :- | :- | +| `postgres` | sample DB | backend database to store/retrieve user uploaded data | +| `rabbitmq` | task-queue broker | task-queue broker for managing worker tasks | +| `celery` | task-queue worker | task-queue worker | +| `redis` | task-queue DB | task-queue broker & worker backend for passing tasks to/from the task-queue | +| `flower` | task-queue monitor | for monitoring `celery` and `rabbitmq` task-queue | +| `web` | App | The automappa web instance | +| \(Not currently in use\) `prometheus` | service monitoring | service monitoring dashboard | +| \(Not currently in use\) `grafana` | service monitoring | service monitoring dashboard | + +Customization of the urls to these services may be +performed by editing the `.env` files as many of +these settings are configured from here. + +###### [Back to top](#contributing) + +### Postgres + +
+ +What to see at startup + +```console +The files belonging to this database system will be owned by user "postgres". +This user must also own the server process. +The database cluster will be initialized with locale "en_US.utf8". +The default database encoding has accordingly been set to "UTF8". +The default text search configuration will be set to "english". +Data page checksums are disabled. +fixing permissions on existing directory /var/lib/postgresql/data ... ok +creating subdirectories ... ok +selecting dynamic shared memory implementation ... posix +selecting default max_connections ... 100 +selecting default shared_buffers ... 128MB +selecting default time zone ... Etc/UTC +creating configuration files ... ok +running bootstrap script ... ok +performing post-bootstrap initialization ... ok +syncing data to disk ... ok +Success. You can now start the database server using: + pg_ctl -D /var/lib/postgresql/data -l logfile start +initdb: warning: enabling "trust" authentication for local connections +initdb: hint: You can change this by editing pg_hba.conf or using the option -A, or --auth-local and --auth-host, the next time you run initdb. +waiting for server to start....2023-06-30 16:28:09.344 UTC [49] LOG: starting PostgreSQL 15.3 (Debian 15.3-1.pgdg110+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit +2023-06-30 16:28:09.348 UTC [49] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" +2023-06-30 16:28:09.404 UTC [52] LOG: database system was shut down at 2023-06-30 16:28:08 UTC +2023-06-30 16:28:09.436 UTC [49] LOG: database system is ready to accept connections + done +server started +CREATE DATABASE +/usr/local/bin/docker-entrypoint.sh: ignoring /docker-entrypoint-initdb.d/* +waiting for server to shut down....2023-06-30 16:28:09.728 UTC [49] LOG: received fast shutdown request +2023-06-30 16:28:09.737 UTC [49] LOG: aborting any active transactions +2023-06-30 16:28:09.742 UTC [49] LOG: background worker "logical replication launcher" (PID 55) exited with exit code 1 +2023-06-30 16:28:09.742 UTC [50] LOG: shutting down +2023-06-30 16:28:09.744 UTC [50] LOG: checkpoint starting: shutdown immediate +2023-06-30 16:28:09.845 UTC [50] LOG: checkpoint complete: wrote 918 buffers (5.6%); 0 WAL file(s) added, 0 removed, 0 recycled; write=0.044 s, sync=0.048 s, total=0.103 s; sync files=250, longest=0.017 s, average=0.001 s; distance=4217 kB, estimate=4217 kB +2023-06-30 16:28:09.857 UTC [49] LOG: database system is shut down + done +server stopped +PostgreSQL init process complete; ready for start up. +2023-06-30 16:28:10.158 UTC [1] LOG: starting PostgreSQL 15.3 (Debian 15.3-1.pgdg110+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit +2023-06-30 16:28:10.159 UTC [1] LOG: listening on IPv4 address "0.0.0.0", port 5432 +2023-06-30 16:28:10.159 UTC [1] LOG: listening on IPv6 address "::", port 5432 +2023-06-30 16:28:10.234 UTC [1] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432" +2023-06-30 16:28:10.264 UTC [65] LOG: database system was shut down at 2023-06-30 16:28:09 UTC +2023-06-30 16:28:10.317 UTC [1] LOG: database system is ready to accept connections +``` + +
+ +###### [Back to top](#contributing) + +### RabbitMQ + +
+ +What to see at startup + +```console +2023-06-30 17:34:00.261066+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:00.283893+00:00 [info] <0.221.0> Feature flags: [ ] classic_mirrored_queue_version +2023-06-30 17:34:00.284137+00:00 [info] <0.221.0> Feature flags: [ ] implicit_default_bindings +2023-06-30 17:34:00.284185+00:00 [info] <0.221.0> Feature flags: [ ] maintenance_mode_status +2023-06-30 17:34:00.284446+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:00.284501+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:00.284579+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:00.284604+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:00.284852+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:01.106535+00:00 [noti] <0.44.0> Application syslog exited with reason: stopped +2023-06-30 17:34:01.106765+00:00 [noti] <0.221.0> Logging: switching to configured handler(s); following messages may not be visible in this log output +2023-06-30 17:34:01.148394+00:00 [notice] <0.221.0> Logging: configured log handlers are now ACTIVE +2023-06-30 17:34:03.546573+00:00 [info] <0.221.0> ra: starting system quorum_queues +2023-06-30 17:34:03.546895+00:00 [info] <0.221.0> starting Ra system: quorum_queues in directory: /var/lib/rabbitmq/mnesia/rabbit@2b6eb27dd567/quorum/rabbit@2b6eb27dd567 +2023-06-30 17:34:03.748480+00:00 [info] <0.259.0> ra system 'quorum_queues' running pre init for 0 registered servers +2023-06-30 17:34:03.805062+00:00 [info] <0.260.0> ra: meta data store initialised for system quorum_queues. 0 record(s) recovered +2023-06-30 17:34:03.831474+00:00 [notice] <0.265.0> WAL: ra_log_wal init, open tbls: ra_log_open_mem_tables, closed tbls: ra_log_closed_mem_tables +2023-06-30 17:34:03.905788+00:00 [info] <0.221.0> ra: starting system coordination +2023-06-30 17:34:03.905986+00:00 [info] <0.221.0> starting Ra system: coordination in directory: /var/lib/rabbitmq/mnesia/rabbit@2b6eb27dd567/coordination/rabbit@2b6eb27dd567 +2023-06-30 17:34:03.964700+00:00 [info] <0.272.0> ra system 'coordination' running pre init for 0 registered servers +2023-06-30 17:34:03.985701+00:00 [info] <0.273.0> ra: meta data store initialised for system coordination. 0 record(s) recovered +2023-06-30 17:34:03.986268+00:00 [notice] <0.278.0> WAL: ra_coordination_log_wal init, open tbls: ra_coordination_log_open_mem_tables, closed tbls: ra_coordination_log_closed_mem_tables +2023-06-30 17:34:03.995172+00:00 [info] <0.221.0> +2023-06-30 17:34:03.995172+00:00 [info] <0.221.0> Starting RabbitMQ 3.10.1 on Erlang 24.3.4 [jit] +2023-06-30 17:34:03.995172+00:00 [info] <0.221.0> Copyright (c) 2007-2022 VMware, Inc. or its affiliates. +2023-06-30 17:34:03.995172+00:00 [info] <0.221.0> Licensed under the MPL 2.0. Website: https://rabbitmq.com + ## ## RabbitMQ 3.10.1 + ## ## + ########## Copyright (c) 2007-2022 VMware, Inc. or its affiliates. + ###### ## + ########## Licensed under the MPL 2.0. Website: https://rabbitmq.com + Erlang: 24.3.4 [jit] + TLS Library: OpenSSL - OpenSSL 1.1.1o 3 May 2022 + Doc guides: https://rabbitmq.com/documentation.html + Support: https://rabbitmq.com/contact.html + Tutorials: https://rabbitmq.com/getstarted.html + Monitoring: https://rabbitmq.com/monitoring.html + Logs: /var/log/rabbitmq/rabbit@2b6eb27dd567_upgrade.log + + Config file(s): /etc/rabbitmq/rabbitmq.conf + /etc/rabbitmq/conf.d/10-defaults.conf + Starting broker...2023-06-30 17:34:04.002324+00:00 [info] <0.221.0> +2023-06-30 17:34:04.002324+00:00 [info] <0.221.0> node : rabbit@2b6eb27dd567 +2023-06-30 17:34:04.002324+00:00 [info] <0.221.0> home dir : /var/lib/rabbitmq +2023-06-30 17:34:04.002324+00:00 [info] <0.221.0> config file(s) : /etc/rabbitmq/rabbitmq.conf +2023-06-30 17:34:04.002324+00:00 [info] <0.221.0> : /etc/rabbitmq/conf.d/10-defaults.conf +2023-06-30 17:34:04.002324+00:00 [info] <0.221.0> cookie hash : 8HBUegOS1ZYW39ARQeqjQw== +2023-06-30 17:34:04.002324+00:00 [info] <0.221.0> log(s) : /var/log/rabbitmq/rabbit@2b6eb27dd567_upgrade.log +2023-06-30 17:34:04.002324+00:00 [info] <0.221.0> : +2023-06-30 17:34:04.002324+00:00 [info] <0.221.0> database dir : /var/lib/rabbitmq/mnesia/rabbit@2b6eb27dd567 +2023-06-30 17:34:05.276918+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:05.277003+00:00 [info] <0.221.0> Feature flags: [ ] classic_mirrored_queue_version +2023-06-30 17:34:05.277036+00:00 [info] <0.221.0> Feature flags: [ ] drop_unroutable_metric +2023-06-30 17:34:05.277062+00:00 [info] <0.221.0> Feature flags: [ ] empty_basic_get_metric +2023-06-30 17:34:05.277174+00:00 [info] <0.221.0> Feature flags: [ ] implicit_default_bindings +2023-06-30 17:34:05.277205+00:00 [info] <0.221.0> Feature flags: [ ] maintenance_mode_status +2023-06-30 17:34:05.277256+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:05.277362+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:05.277392+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:05.277414+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:05.277436+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:15.876746+00:00 [info] <0.221.0> Running boot step pre_boot defined by app rabbit +2023-06-30 17:34:15.876861+00:00 [info] <0.221.0> Running boot step rabbit_global_counters defined by app rabbit +2023-06-30 17:34:15.877406+00:00 [info] <0.221.0> Running boot step rabbit_osiris_metrics defined by app rabbit +2023-06-30 17:34:15.878840+00:00 [info] <0.221.0> Running boot step rabbit_core_metrics defined by app rabbit +2023-06-30 17:34:15.884054+00:00 [info] <0.221.0> Running boot step rabbit_alarm defined by app rabbit +2023-06-30 17:34:15.903243+00:00 [info] <0.293.0> Memory high watermark set to 3183 MiB (3338434969 bytes) of 7959 MiB (8346087424 bytes) total +2023-06-30 17:34:15.925580+00:00 [info] <0.295.0> Enabling free disk space monitoring +2023-06-30 17:34:15.926199+00:00 [info] <0.295.0> Disk free limit set to 50MB +2023-06-30 17:34:15.939181+00:00 [info] <0.221.0> Running boot step code_server_cache defined by app rabbit +2023-06-30 17:34:15.939630+00:00 [info] <0.221.0> Running boot step file_handle_cache defined by app rabbit +2023-06-30 17:34:15.940030+00:00 [info] <0.300.0> Limiting to approx 1048479 file handles (943629 sockets) +2023-06-30 17:34:15.940233+00:00 [info] <0.301.0> FHC read buffering: OFF +2023-06-30 17:34:15.940415+00:00 [info] <0.301.0> FHC write buffering: ON +2023-06-30 17:34:15.951372+00:00 [info] <0.221.0> Running boot step worker_pool defined by app rabbit +2023-06-30 17:34:15.963058+00:00 [info] <0.280.0> Will use 4 processes for default worker pool +2023-06-30 17:34:15.963293+00:00 [info] <0.280.0> Starting worker pool 'worker_pool' with 4 processes in it +2023-06-30 17:34:15.967054+00:00 [info] <0.221.0> Running boot step database defined by app rabbit +2023-06-30 17:34:15.969249+00:00 [info] <0.221.0> Node database directory at /var/lib/rabbitmq/mnesia/rabbit@2b6eb27dd567 is empty. Assuming we need to join an existing cluster or initialise from scratch... +2023-06-30 17:34:15.969382+00:00 [info] <0.221.0> Configured peer discovery backend: rabbit_peer_discovery_classic_config +2023-06-30 17:34:15.969952+00:00 [info] <0.221.0> Will try to lock with peer discovery backend rabbit_peer_discovery_classic_config +2023-06-30 17:34:15.970220+00:00 [info] <0.221.0> All discovered existing cluster peers: +2023-06-30 17:34:15.970294+00:00 [info] <0.221.0> Discovered no peer nodes to cluster with. Some discovery backends can filter nodes out based on a readiness criteria. Enabling debug logging might help troubleshoot. +2023-06-30 17:34:15.987668+00:00 [notice] <0.44.0> Application mnesia exited with reason: stopped +2023-06-30 17:34:17.120283+00:00 [info] <0.221.0> Waiting for Mnesia tables for 30000 ms, 9 retries left +2023-06-30 17:34:17.120457+00:00 [info] <0.221.0> Successfully synced tables from a peer +2023-06-30 17:34:17.227311+00:00 [info] <0.221.0> Waiting for Mnesia tables for 30000 ms, 9 retries left +2023-06-30 17:34:17.228954+00:00 [info] <0.221.0> Successfully synced tables from a peer +2023-06-30 17:34:17.229093+00:00 [info] <0.221.0> Feature flag `classic_mirrored_queue_version`: supported, attempt to enable... +2023-06-30 17:34:17.229312+00:00 [info] <0.221.0> Feature flag `classic_mirrored_queue_version`: mark as enabled=state_changing +2023-06-30 17:34:17.247984+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:17.248114+00:00 [info] <0.221.0> Feature flags: [~] classic_mirrored_queue_version +2023-06-30 17:34:17.248175+00:00 [info] <0.221.0> Feature flags: [ ] drop_unroutable_metric +2023-06-30 17:34:17.248316+00:00 [info] <0.221.0> Feature flags: [ ] empty_basic_get_metric +2023-06-30 17:34:17.248362+00:00 [info] <0.221.0> Feature flags: [ ] implicit_default_bindings +2023-06-30 17:34:17.248402+00:00 [info] <0.221.0> Feature flags: [ ] maintenance_mode_status +2023-06-30 17:34:17.249184+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:17.249280+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:17.249326+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:17.249362+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:17.249757+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:17.366149+00:00 [info] <0.221.0> Feature flag `classic_mirrored_queue_version`: mark as enabled=true +2023-06-30 17:34:17.371476+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:17.372173+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:17.373462+00:00 [info] <0.221.0> Feature flags: [ ] drop_unroutable_metric +2023-06-30 17:34:17.373578+00:00 [info] <0.221.0> Feature flags: [ ] empty_basic_get_metric +2023-06-30 17:34:17.373652+00:00 [info] <0.221.0> Feature flags: [ ] implicit_default_bindings +2023-06-30 17:34:17.376918+00:00 [info] <0.221.0> Feature flags: [ ] maintenance_mode_status +2023-06-30 17:34:17.377137+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:17.377231+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:17.377740+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:17.377890+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:17.378438+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:17.467765+00:00 [info] <0.221.0> Feature flag `drop_unroutable_metric`: supported, attempt to enable... +2023-06-30 17:34:17.467873+00:00 [info] <0.221.0> Feature flag `drop_unroutable_metric`: mark as enabled=state_changing +2023-06-30 17:34:17.470253+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:17.470443+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:17.470530+00:00 [info] <0.221.0> Feature flags: [~] drop_unroutable_metric +2023-06-30 17:34:17.470602+00:00 [info] <0.221.0> Feature flags: [ ] empty_basic_get_metric +2023-06-30 17:34:17.470699+00:00 [info] <0.221.0> Feature flags: [ ] implicit_default_bindings +2023-06-30 17:34:17.470794+00:00 [info] <0.221.0> Feature flags: [ ] maintenance_mode_status +2023-06-30 17:34:17.470853+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:17.470908+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:17.470969+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:17.471009+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:17.471050+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:17.578161+00:00 [info] <0.221.0> Feature flag `drop_unroutable_metric`: mark as enabled=true +2023-06-30 17:34:17.586106+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:17.586306+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:17.586371+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:17.586414+00:00 [info] <0.221.0> Feature flags: [ ] empty_basic_get_metric +2023-06-30 17:34:17.586466+00:00 [info] <0.221.0> Feature flags: [ ] implicit_default_bindings +2023-06-30 17:34:17.586505+00:00 [info] <0.221.0> Feature flags: [ ] maintenance_mode_status +2023-06-30 17:34:17.586671+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:17.586878+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:17.586979+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:17.587169+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:17.587332+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:17.611053+00:00 [info] <0.221.0> Feature flag `empty_basic_get_metric`: supported, attempt to enable... +2023-06-30 17:34:17.611249+00:00 [info] <0.221.0> Feature flag `empty_basic_get_metric`: mark as enabled=state_changing +2023-06-30 17:34:17.613076+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:17.613208+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:17.617252+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:17.617363+00:00 [info] <0.221.0> Feature flags: [~] empty_basic_get_metric +2023-06-30 17:34:17.617442+00:00 [info] <0.221.0> Feature flags: [ ] implicit_default_bindings +2023-06-30 17:34:17.617486+00:00 [info] <0.221.0> Feature flags: [ ] maintenance_mode_status +2023-06-30 17:34:17.617529+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:17.617565+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:17.618385+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:17.618431+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:17.618478+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:17.785959+00:00 [info] <0.221.0> Feature flag `empty_basic_get_metric`: mark as enabled=true +2023-06-30 17:34:17.831807+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:17.831993+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:17.832037+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:17.832065+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:17.832103+00:00 [info] <0.221.0> Feature flags: [ ] implicit_default_bindings +2023-06-30 17:34:17.837501+00:00 [info] <0.221.0> Feature flags: [ ] maintenance_mode_status +2023-06-30 17:34:17.837584+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:17.838586+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:17.838693+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:17.838740+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:17.838797+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:17.996578+00:00 [info] <0.221.0> Feature flag `implicit_default_bindings`: supported, attempt to enable... +2023-06-30 17:34:17.996729+00:00 [info] <0.221.0> Feature flag `implicit_default_bindings`: mark as enabled=state_changing +2023-06-30 17:34:18.052555+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:18.052958+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:18.053030+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:18.053096+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:18.053138+00:00 [info] <0.221.0> Feature flags: [~] implicit_default_bindings +2023-06-30 17:34:18.053176+00:00 [info] <0.221.0> Feature flags: [ ] maintenance_mode_status +2023-06-30 17:34:18.053286+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:18.053349+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:18.053421+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:18.053459+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:18.053500+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:18.188674+00:00 [info] <0.221.0> Waiting for Mnesia tables for 30000 ms, 0 retries left +2023-06-30 17:34:18.189301+00:00 [info] <0.221.0> Successfully synced tables from a peer +2023-06-30 17:34:18.189450+00:00 [info] <0.221.0> Feature flag `implicit_default_bindings`: mark as enabled=true +2023-06-30 17:34:18.249730+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:18.249885+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:18.250009+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:18.250158+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:18.250212+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:18.250251+00:00 [info] <0.221.0> Feature flags: [ ] maintenance_mode_status +2023-06-30 17:34:18.250336+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:18.250380+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:18.250436+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:18.250834+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:18.250971+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:18.312801+00:00 [info] <0.221.0> Feature flag `maintenance_mode_status`: supported, attempt to enable... +2023-06-30 17:34:18.313899+00:00 [info] <0.221.0> Feature flag `maintenance_mode_status`: mark as enabled=state_changing +2023-06-30 17:34:18.328203+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:18.328321+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:18.328428+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:18.328699+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:18.328753+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:18.328799+00:00 [info] <0.221.0> Feature flags: [~] maintenance_mode_status +2023-06-30 17:34:18.328943+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:18.329027+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:18.329072+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:18.329111+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:18.329176+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:18.413958+00:00 [info] <0.221.0> Creating table rabbit_node_maintenance_states for feature flag `maintenance_mode_status` +2023-06-30 17:34:18.464988+00:00 [info] <0.221.0> Feature flag `maintenance_mode_status`: mark as enabled=true +2023-06-30 17:34:18.488211+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:18.498343+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:18.498424+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:18.498470+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:18.498518+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:18.498561+00:00 [info] <0.221.0> Feature flags: [x] maintenance_mode_status +2023-06-30 17:34:18.514068+00:00 [info] <0.221.0> Feature flags: [ ] quorum_queue +2023-06-30 17:34:18.514310+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:18.514394+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:18.514458+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:18.514617+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:18.679019+00:00 [info] <0.221.0> Feature flag `quorum_queue`: supported, attempt to enable... +2023-06-30 17:34:18.679162+00:00 [info] <0.221.0> Feature flag `quorum_queue`: mark as enabled=state_changing +2023-06-30 17:34:18.681463+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:18.681711+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:18.681775+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:18.681825+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:18.681868+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:18.682082+00:00 [info] <0.221.0> Feature flags: [x] maintenance_mode_status +2023-06-30 17:34:18.762213+00:00 [info] <0.221.0> Feature flags: [~] quorum_queue +2023-06-30 17:34:18.762283+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:18.762357+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:18.762395+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:18.762545+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:18.888555+00:00 [info] <0.221.0> Waiting for Mnesia tables for 30000 ms, 9 retries left +2023-06-30 17:34:18.890845+00:00 [info] <0.221.0> Successfully synced tables from a peer +2023-06-30 17:34:18.890992+00:00 [info] <0.221.0> Feature flag `quorum_queue`: migrating Mnesia table rabbit_queue... +2023-06-30 17:34:19.122993+00:00 [info] <0.221.0> Feature flag `quorum_queue`: migrating Mnesia table rabbit_durable_queue... +2023-06-30 17:34:19.230644+00:00 [info] <0.221.0> Feature flag `quorum_queue`: Mnesia tables migration done +2023-06-30 17:34:19.230874+00:00 [info] <0.221.0> Feature flag `quorum_queue`: mark as enabled=true +2023-06-30 17:34:19.254091+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:19.254198+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:19.254235+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:19.254314+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:19.254344+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:19.254369+00:00 [info] <0.221.0> Feature flags: [x] maintenance_mode_status +2023-06-30 17:34:19.254452+00:00 [info] <0.221.0> Feature flags: [x] quorum_queue +2023-06-30 17:34:19.254758+00:00 [info] <0.221.0> Feature flags: [ ] stream_queue +2023-06-30 17:34:19.254814+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:19.254878+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:19.254922+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:19.358776+00:00 [info] <0.221.0> Feature flag `stream_queue`: supported, attempt to enable... +2023-06-30 17:34:19.358927+00:00 [info] <0.221.0> Feature flag `stream_queue`: mark as enabled=state_changing +2023-06-30 17:34:19.362442+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:19.362990+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:19.363061+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:19.363112+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:19.363175+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:19.363241+00:00 [info] <0.221.0> Feature flags: [x] maintenance_mode_status +2023-06-30 17:34:19.363291+00:00 [info] <0.221.0> Feature flags: [x] quorum_queue +2023-06-30 17:34:19.363337+00:00 [info] <0.221.0> Feature flags: [~] stream_queue +2023-06-30 17:34:19.367963+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:19.368021+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:19.369106+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:19.483130+00:00 [info] <0.221.0> Feature flag `stream_queue`: mark as enabled=true +2023-06-30 17:34:19.587581+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:19.589150+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:19.589213+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:19.589254+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:19.589339+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:19.589381+00:00 [info] <0.221.0> Feature flags: [x] maintenance_mode_status +2023-06-30 17:34:19.589421+00:00 [info] <0.221.0> Feature flags: [x] quorum_queue +2023-06-30 17:34:19.589460+00:00 [info] <0.221.0> Feature flags: [x] stream_queue +2023-06-30 17:34:19.589673+00:00 [info] <0.221.0> Feature flags: [ ] user_limits +2023-06-30 17:34:19.597309+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:19.601754+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:19.681271+00:00 [info] <0.221.0> Feature flag `user_limits`: supported, attempt to enable... +2023-06-30 17:34:19.681467+00:00 [info] <0.221.0> Feature flag `user_limits`: mark as enabled=state_changing +2023-06-30 17:34:19.724505+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:19.724668+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:19.724728+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:19.724803+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:19.724843+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:19.724885+00:00 [info] <0.221.0> Feature flags: [x] maintenance_mode_status +2023-06-30 17:34:19.725115+00:00 [info] <0.221.0> Feature flags: [x] quorum_queue +2023-06-30 17:34:19.728205+00:00 [info] <0.221.0> Feature flags: [x] stream_queue +2023-06-30 17:34:19.734686+00:00 [info] <0.221.0> Feature flags: [~] user_limits +2023-06-30 17:34:19.734834+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:19.734894+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:19.810254+00:00 [info] <0.221.0> Waiting for Mnesia tables for 30000 ms, 9 retries left +2023-06-30 17:34:19.813713+00:00 [info] <0.221.0> Successfully synced tables from a peer +2023-06-30 17:34:19.935551+00:00 [info] <0.221.0> Feature flag `user_limits`: mark as enabled=true +2023-06-30 17:34:19.952882+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:19.952996+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:19.953032+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:19.953059+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:19.953109+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:19.953136+00:00 [info] <0.221.0> Feature flags: [x] maintenance_mode_status +2023-06-30 17:34:19.953174+00:00 [info] <0.221.0> Feature flags: [x] quorum_queue +2023-06-30 17:34:19.953200+00:00 [info] <0.221.0> Feature flags: [x] stream_queue +2023-06-30 17:34:19.953261+00:00 [info] <0.221.0> Feature flags: [x] user_limits +2023-06-30 17:34:19.953286+00:00 [info] <0.221.0> Feature flags: [ ] virtual_host_metadata +2023-06-30 17:34:19.953311+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:20.078835+00:00 [info] <0.221.0> Feature flag `virtual_host_metadata`: supported, attempt to enable... +2023-06-30 17:34:20.079270+00:00 [info] <0.221.0> Feature flag `virtual_host_metadata`: mark as enabled=state_changing +2023-06-30 17:34:20.084762+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:20.084955+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:20.085022+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:20.123619+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:20.123670+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:20.123709+00:00 [info] <0.221.0> Feature flags: [x] maintenance_mode_status +2023-06-30 17:34:20.123795+00:00 [info] <0.221.0> Feature flags: [x] quorum_queue +2023-06-30 17:34:20.123868+00:00 [info] <0.221.0> Feature flags: [x] stream_queue +2023-06-30 17:34:20.123904+00:00 [info] <0.221.0> Feature flags: [x] user_limits +2023-06-30 17:34:20.123938+00:00 [info] <0.221.0> Feature flags: [~] virtual_host_metadata +2023-06-30 17:34:20.129496+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:20.762339+00:00 [info] <0.221.0> Waiting for Mnesia tables for 30000 ms, 9 retries left +2023-06-30 17:34:20.764058+00:00 [info] <0.221.0> Successfully synced tables from a peer +2023-06-30 17:34:20.954333+00:00 [info] <0.221.0> Feature flag `virtual_host_metadata`: mark as enabled=true +2023-06-30 17:34:20.971476+00:00 [info] <0.221.0> Feature flags: list of feature flags found: +2023-06-30 17:34:20.972331+00:00 [info] <0.221.0> Feature flags: [x] classic_mirrored_queue_version +2023-06-30 17:34:20.974129+00:00 [info] <0.221.0> Feature flags: [x] drop_unroutable_metric +2023-06-30 17:34:20.974567+00:00 [info] <0.221.0> Feature flags: [x] empty_basic_get_metric +2023-06-30 17:34:20.975869+00:00 [info] <0.221.0> Feature flags: [x] implicit_default_bindings +2023-06-30 17:34:20.976144+00:00 [info] <0.221.0> Feature flags: [x] maintenance_mode_status +2023-06-30 17:34:20.976385+00:00 [info] <0.221.0> Feature flags: [x] quorum_queue +2023-06-30 17:34:20.985341+00:00 [info] <0.221.0> Feature flags: [x] stream_queue +2023-06-30 17:34:20.985453+00:00 [info] <0.221.0> Feature flags: [x] user_limits +2023-06-30 17:34:20.985503+00:00 [info] <0.221.0> Feature flags: [x] virtual_host_metadata +2023-06-30 17:34:20.985546+00:00 [info] <0.221.0> Feature flags: feature flag states written to disk: yes +2023-06-30 17:34:21.115745+00:00 [info] <0.221.0> Waiting for Mnesia tables for 30000 ms, 9 retries left +2023-06-30 17:34:21.116040+00:00 [info] <0.221.0> Successfully synced tables from a peer +2023-06-30 17:34:21.299833+00:00 [info] <0.221.0> Waiting for Mnesia tables for 30000 ms, 9 retries left +2023-06-30 17:34:21.300111+00:00 [info] <0.221.0> Successfully synced tables from a peer +2023-06-30 17:34:21.300184+00:00 [info] <0.221.0> Peer discovery backend rabbit_peer_discovery_classic_config does not support registration, skipping registration. +2023-06-30 17:34:21.300272+00:00 [info] <0.221.0> Will try to unlock with peer discovery backend rabbit_peer_discovery_classic_config +2023-06-30 17:34:21.300468+00:00 [info] <0.221.0> Running boot step database_sync defined by app rabbit +2023-06-30 17:34:21.300694+00:00 [info] <0.221.0> Running boot step feature_flags defined by app rabbit +2023-06-30 17:34:21.338224+00:00 [info] <0.221.0> Running boot step codec_correctness_check defined by app rabbit +2023-06-30 17:34:21.338352+00:00 [info] <0.221.0> Running boot step external_infrastructure defined by app rabbit +2023-06-30 17:34:21.342333+00:00 [info] <0.221.0> Running boot step rabbit_registry defined by app rabbit +2023-06-30 17:34:21.342824+00:00 [info] <0.221.0> Running boot step rabbit_auth_mechanism_cr_demo defined by app rabbit +2023-06-30 17:34:21.343205+00:00 [info] <0.221.0> Running boot step rabbit_queue_location_random defined by app rabbit +2023-06-30 17:34:21.343375+00:00 [info] <0.221.0> Running boot step rabbit_event defined by app rabbit +2023-06-30 17:34:21.352964+00:00 [info] <0.221.0> Running boot step rabbit_auth_mechanism_amqplain defined by app rabbit +2023-06-30 17:34:21.353234+00:00 [info] <0.221.0> Running boot step rabbit_auth_mechanism_plain defined by app rabbit +2023-06-30 17:34:21.353365+00:00 [info] <0.221.0> Running boot step rabbit_exchange_type_direct defined by app rabbit +2023-06-30 17:34:21.362955+00:00 [info] <0.221.0> Running boot step rabbit_exchange_type_fanout defined by app rabbit +2023-06-30 17:34:21.363138+00:00 [info] <0.221.0> Running boot step rabbit_exchange_type_headers defined by app rabbit +2023-06-30 17:34:21.363295+00:00 [info] <0.221.0> Running boot step rabbit_exchange_type_topic defined by app rabbit +2023-06-30 17:34:21.363383+00:00 [info] <0.221.0> Running boot step rabbit_mirror_queue_mode_all defined by app rabbit +2023-06-30 17:34:21.363463+00:00 [info] <0.221.0> Running boot step rabbit_mirror_queue_mode_exactly defined by app rabbit +2023-06-30 17:34:21.363565+00:00 [info] <0.221.0> Running boot step rabbit_mirror_queue_mode_nodes defined by app rabbit +2023-06-30 17:34:21.400801+00:00 [info] <0.221.0> Running boot step rabbit_priority_queue defined by app rabbit +2023-06-30 17:34:21.400929+00:00 [info] <0.221.0> Priority queues enabled, real BQ is rabbit_variable_queue +2023-06-30 17:34:21.401092+00:00 [info] <0.221.0> Running boot step rabbit_queue_location_client_local defined by app rabbit +2023-06-30 17:34:21.401244+00:00 [info] <0.221.0> Running boot step rabbit_queue_location_min_masters defined by app rabbit +2023-06-30 17:34:21.401378+00:00 [info] <0.221.0> Running boot step kernel_ready defined by app rabbit +2023-06-30 17:34:21.401695+00:00 [info] <0.221.0> Running boot step rabbit_sysmon_minder defined by app rabbit +2023-06-30 17:34:21.402464+00:00 [info] <0.221.0> Running boot step rabbit_epmd_monitor defined by app rabbit +2023-06-30 17:34:21.448988+00:00 [info] <0.568.0> epmd monitor knows us, inter-node communication (distribution) port: 25672 +2023-06-30 17:34:21.449413+00:00 [info] <0.221.0> Running boot step guid_generator defined by app rabbit +2023-06-30 17:34:21.480870+00:00 [info] <0.221.0> Running boot step rabbit_node_monitor defined by app rabbit +2023-06-30 17:34:21.481474+00:00 [info] <0.572.0> Starting rabbit_node_monitor +2023-06-30 17:34:21.481865+00:00 [info] <0.221.0> Running boot step delegate_sup defined by app rabbit +2023-06-30 17:34:21.484047+00:00 [info] <0.221.0> Running boot step rabbit_memory_monitor defined by app rabbit +2023-06-30 17:34:21.484625+00:00 [info] <0.221.0> Running boot step rabbit_fifo_dlx_sup defined by app rabbit +2023-06-30 17:34:21.484903+00:00 [info] <0.221.0> Running boot step core_initialized defined by app rabbit +2023-06-30 17:34:21.484975+00:00 [info] <0.221.0> Running boot step upgrade_queues defined by app rabbit +2023-06-30 17:34:21.607289+00:00 [info] <0.221.0> message_store upgrades: 1 to apply +2023-06-30 17:34:21.609176+00:00 [info] <0.221.0> message_store upgrades: Applying rabbit_variable_queue:move_messages_to_vhost_store +2023-06-30 17:34:21.609673+00:00 [info] <0.221.0> message_store upgrades: No durable queues found. Skipping message store migration +2023-06-30 17:34:21.609956+00:00 [info] <0.221.0> message_store upgrades: Removing the old message store data +2023-06-30 17:34:21.651076+00:00 [info] <0.221.0> message_store upgrades: All upgrades applied successfully +2023-06-30 17:34:21.775782+00:00 [info] <0.221.0> Running boot step channel_tracking defined by app rabbit +2023-06-30 17:34:21.805945+00:00 [info] <0.221.0> Setting up a table for channel tracking on this node: tracked_channel_on_node_rabbit@2b6eb27dd567 +2023-06-30 17:34:21.837092+00:00 [info] <0.221.0> Setting up a table for channel tracking on this node: tracked_channel_table_per_user_on_node_rabbit@2b6eb27dd567 +2023-06-30 17:34:21.838071+00:00 [info] <0.221.0> Running boot step rabbit_channel_tracking_handler defined by app rabbit +2023-06-30 17:34:21.838988+00:00 [info] <0.221.0> Running boot step connection_tracking defined by app rabbit +2023-06-30 17:34:21.959051+00:00 [info] <0.221.0> Setting up a table for connection tracking on this node: tracked_connection_on_node_rabbit@2b6eb27dd567 +2023-06-30 17:34:22.006458+00:00 [info] <0.221.0> Setting up a table for per-vhost connection counting on this node: tracked_connection_per_vhost_on_node_rabbit@2b6eb27dd567 +2023-06-30 17:34:22.072286+00:00 [info] <0.221.0> Setting up a table for per-user connection counting on this node: tracked_connection_table_per_user_on_node_rabbit@2b6eb27dd567 +2023-06-30 17:34:22.074361+00:00 [info] <0.221.0> Running boot step rabbit_connection_tracking_handler defined by app rabbit +2023-06-30 17:34:22.074797+00:00 [info] <0.221.0> Running boot step rabbit_definitions_hashing defined by app rabbit +2023-06-30 17:34:22.075030+00:00 [info] <0.221.0> Running boot step rabbit_exchange_parameters defined by app rabbit +2023-06-30 17:34:22.075251+00:00 [info] <0.221.0> Running boot step rabbit_mirror_queue_misc defined by app rabbit +2023-06-30 17:34:22.087175+00:00 [info] <0.221.0> Running boot step rabbit_policies defined by app rabbit +2023-06-30 17:34:22.125476+00:00 [info] <0.221.0> Running boot step rabbit_policy defined by app rabbit +2023-06-30 17:34:22.125766+00:00 [info] <0.221.0> Running boot step rabbit_queue_location_validator defined by app rabbit +2023-06-30 17:34:22.125904+00:00 [info] <0.221.0> Running boot step rabbit_quorum_memory_manager defined by app rabbit +2023-06-30 17:34:22.125988+00:00 [info] <0.221.0> Running boot step rabbit_stream_coordinator defined by app rabbit +2023-06-30 17:34:22.126347+00:00 [info] <0.221.0> Running boot step rabbit_vhost_limit defined by app rabbit +2023-06-30 17:34:22.126475+00:00 [info] <0.221.0> Running boot step rabbit_mgmt_reset_handler defined by app rabbitmq_management +2023-06-30 17:34:22.126701+00:00 [info] <0.221.0> Running boot step rabbit_mgmt_db_handler defined by app rabbitmq_management_agent +2023-06-30 17:34:22.126766+00:00 [info] <0.221.0> Management plugin: using rates mode 'basic' +2023-06-30 17:34:22.127371+00:00 [info] <0.221.0> Running boot step recovery defined by app rabbit +2023-06-30 17:34:22.138992+00:00 [info] <0.221.0> Running boot step empty_db_check defined by app rabbit +2023-06-30 17:34:22.139442+00:00 [info] <0.221.0> Will seed default virtual host and user... +2023-06-30 17:34:22.168841+00:00 [info] <0.221.0> Adding vhost '/' (description: 'Default virtual host', tags: []) +2023-06-30 17:34:22.329641+00:00 [info] <0.634.0> Making sure data directory '/var/lib/rabbitmq/mnesia/rabbit@2b6eb27dd567/msg_stores/vhosts/628WB79CIFDYO9LJI6DKMI09L' for vhost '/' exists +2023-06-30 17:34:22.379852+00:00 [info] <0.634.0> Setting segment_entry_count for vhost '/' with 0 queues to '2048' +2023-06-30 17:34:22.401937+00:00 [info] <0.634.0> Starting message stores for vhost '/' +2023-06-30 17:34:22.402333+00:00 [info] <0.639.0> Message store "628WB79CIFDYO9LJI6DKMI09L/msg_store_transient": using rabbit_msg_store_ets_index to provide index +2023-06-30 17:34:22.417358+00:00 [info] <0.634.0> Started message store of type transient for vhost '/' +2023-06-30 17:34:22.417698+00:00 [info] <0.643.0> Message store "628WB79CIFDYO9LJI6DKMI09L/msg_store_persistent": using rabbit_msg_store_ets_index to provide index +2023-06-30 17:34:22.433077+00:00 [warning] <0.643.0> Message store "628WB79CIFDYO9LJI6DKMI09L/msg_store_persistent": rebuilding indices from scratch +2023-06-30 17:34:22.438457+00:00 [info] <0.634.0> Started message store of type persistent for vhost '/' +2023-06-30 17:34:22.439242+00:00 [info] <0.634.0> Recovering 0 queues of type rabbit_classic_queue took 55ms +2023-06-30 17:34:22.439365+00:00 [info] <0.634.0> Recovering 0 queues of type rabbit_quorum_queue took 0ms +2023-06-30 17:34:22.439437+00:00 [info] <0.634.0> Recovering 0 queues of type rabbit_stream_queue took 0ms +2023-06-30 17:34:22.459104+00:00 [info] <0.221.0> Created user 'user' +2023-06-30 17:34:22.472005+00:00 [info] <0.221.0> Successfully set user tags for user 'user' to [administrator] +2023-06-30 17:34:22.516067+00:00 [info] <0.221.0> Successfully set permissions for 'user' in virtual host '/' to '.*', '.*', '.*' +2023-06-30 17:34:22.516354+00:00 [info] <0.221.0> Running boot step rabbit_looking_glass defined by app rabbit +2023-06-30 17:34:22.516441+00:00 [info] <0.221.0> Running boot step rabbit_core_metrics_gc defined by app rabbit +2023-06-30 17:34:22.517439+00:00 [info] <0.221.0> Running boot step background_gc defined by app rabbit +2023-06-30 17:34:22.519124+00:00 [info] <0.221.0> Running boot step routing_ready defined by app rabbit +2023-06-30 17:34:22.519203+00:00 [info] <0.221.0> Running boot step pre_flight defined by app rabbit +2023-06-30 17:34:22.519235+00:00 [info] <0.221.0> Running boot step notify_cluster defined by app rabbit +2023-06-30 17:34:22.521005+00:00 [info] <0.221.0> Running boot step networking defined by app rabbit +2023-06-30 17:34:22.521259+00:00 [info] <0.221.0> Running boot step rabbit_maintenance_mode_state defined by app rabbit +2023-06-30 17:34:22.521321+00:00 [info] <0.221.0> Creating table rabbit_node_maintenance_states for feature flag `maintenance_mode_status` +2023-06-30 17:34:22.532493+00:00 [info] <0.221.0> Running boot step definition_import_worker_pool defined by app rabbit +2023-06-30 17:34:22.532964+00:00 [info] <0.280.0> Starting worker pool 'definition_import_pool' with 4 processes in it +2023-06-30 17:34:22.534741+00:00 [info] <0.221.0> Running boot step cluster_name defined by app rabbit +2023-06-30 17:34:22.535101+00:00 [info] <0.221.0> Initialising internal cluster ID to 'rabbitmq-cluster-id-3EC3aXLvrXWw67gAEbyFaw' +2023-06-30 17:34:22.551935+00:00 [info] <0.221.0> Running boot step direct_client defined by app rabbit +2023-06-30 17:34:22.552259+00:00 [info] <0.221.0> Running boot step rabbit_management_load_definitions defined by app rabbitmq_management +2023-06-30 17:34:22.552438+00:00 [info] <0.677.0> Resetting node maintenance status +2023-06-30 17:34:22.708404+00:00 [info] <0.736.0> Management plugin: HTTP (non-TLS) listener started on port 15672 +2023-06-30 17:34:22.708736+00:00 [info] <0.764.0> Statistics database started. +2023-06-30 17:34:22.711912+00:00 [info] <0.763.0> Starting worker pool 'management_worker_pool' with 3 processes in it +2023-06-30 17:34:22.795797+00:00 [info] <0.778.0> Prometheus metrics: HTTP (non-TLS) listener started on port 15692 +2023-06-30 17:34:22.796227+00:00 [info] <0.677.0> Ready to start client connection listeners +2023-06-30 17:34:22.821136+00:00 [info] <0.822.0> started TCP listener on [::]:5672 + completed with 4 plugins. +2023-06-30 17:34:23.738441+00:00 [info] <0.677.0> Server startup complete; 4 plugins started. +2023-06-30 17:34:23.738441+00:00 [info] <0.677.0> * rabbitmq_prometheus +2023-06-30 17:34:23.738441+00:00 [info] <0.677.0> * rabbitmq_management +2023-06-30 17:34:23.738441+00:00 [info] <0.677.0> * rabbitmq_web_dispatch +2023-06-30 17:34:23.738441+00:00 [info] <0.677.0> * rabbitmq_management_agent +``` + +
+ +###### [Back to top](#contributing) + +### Celery task-queue + +Celery is currently being used as to process background jobs via a task-queue + +If you are implementing a new task, you will need to restart All task-queue related +Automappa services (`redis`, `flower`, `web`, `celery`, `rabbitmq`) +as tasks are registered with celery at instantiation and will not be +'hot-reloaded' like other parts of the app. + +For more information on implementing new tasks see the [task docs](../automappa/pages/home/tasks/README.md) + +
+ +What to see at startup + +```console +automappa-celery-1 | +automappa-celery-1 | -------------- celery@beade5f64d0f v5.3.0 (emerald-rush) +automappa-celery-1 | --- ***** ----- +automappa-celery-1 | -- ******* ---- Linux-5.15.49-linuxkit-x86_64-with-glibc2.31 2023-06-09 11:18:28 +automappa-celery-1 | - *** --- * --- +automappa-celery-1 | - ** ---------- [config] +automappa-celery-1 | - ** ---------- .> app: automappa.tasks:0x7f66305c1070 +automappa-celery-1 | - ** ---------- .> transport: amqp://user:**@rabbitmq:5672// +automappa-celery-1 | - ** ---------- .> results: redis://redis:6379/0 +automappa-celery-1 | - *** --- * --- .> concurrency: 2 (prefork) +automappa-celery-1 | -- ******* ---- .> task events: ON +automappa-celery-1 | --- ***** ----- +automappa-celery-1 | -------------- [queues] +automappa-celery-1 | .> celery exchange=celery(direct) key=celery +automappa-celery-1 | +automappa-celery-1 | +automappa-celery-1 | [tasks] +automappa-celery-1 | . automappa.tasks.aggregate_embeddings +automappa-celery-1 | . automappa.tasks.count_kmer +automappa-celery-1 | . automappa.tasks.embed_kmer +automappa-celery-1 | . automappa.tasks.get_embedding_traces_df +automappa-celery-1 | . automappa.tasks.normalize_kmer +automappa-celery-1 | . automappa.tasks.preprocess_clusters_geom_medians +automappa-celery-1 | . automappa.tasks.preprocess_marker_symbols +automappa-celery-1 | +``` + +
+ +###### [Back to top](#contributing) + +### Redis + +
+ +What to see at startup + +```console +1:C 30 Jun 2023 16:28:07.865 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo +1:C 30 Jun 2023 16:28:07.865 # Redis version=7.0.11, bits=64, commit=00000000, modified=0, pid=1, just started +1:C 30 Jun 2023 16:28:07.865 # Warning: no config file specified, using the default config. In order to specify a config file use redis-server /path/to/redis.conf +1:M 30 Jun 2023 16:28:07.867 * monotonic clock: POSIX clock_gettime +1:M 30 Jun 2023 16:28:07.868 * Running mode=standalone, port=6379. +1:M 30 Jun 2023 16:28:07.868 # Server initialized +1:M 30 Jun 2023 16:28:07.868 * Ready to accept connections +``` + +
+ +###### [Back to top](#contributing) + +### Flower + +
+ +What to see on successful startup + +```console +DEBUG:automappa.tasks:celery config: +task_send_sent_event: True +task_track_started: True +worker_concurrency: 2 +worker_prefetch_multiplier: 1 +worker_send_task_events: True +broker_url: 'amqp://user:********@rabbitmq:5672//' +result_backend: 'redis://redis:6379/0' +deprecated_settings: None +INFO:flower.command:Visit me at http://0.0.0.0:5555 +INFO:flower.command:Broker: amqp://user:**@rabbitmq:5672// +INFO:flower.command:Registered tasks: +['automappa.tasks.aggregate_embeddings', + 'automappa.tasks.count_kmer', + 'automappa.tasks.embed_kmer', + 'automappa.tasks.get_embedding_traces_df', + 'automappa.tasks.normalize_kmer', + 'automappa.tasks.preprocess_clusters_geom_medians', + 'automappa.tasks.preprocess_marker_symbols', + 'celery.accumulate', + 'celery.backend_cleanup', + 'celery.chain', + 'celery.chord', + 'celery.chord_unlock', + 'celery.chunks', + 'celery.group', + 'celery.map', + 'celery.starmap'] +INFO:kombu.mixins:Connected to amqp://user:**@rabbitmq:5672// +WARNING:flower.inspector:Inspect method registered failed +WARNING:flower.inspector:Inspect method active failed +WARNING:flower.inspector:Inspect method scheduled failed +WARNING:flower.inspector:Inspect method reserved failed +WARNING:flower.inspector:Inspect method active_queues failed +WARNING:flower.inspector:Inspect method conf failed +WARNING:flower.inspector:Inspect method stats failed +WARNING:flower.inspector:Inspect method revoked failed +``` + +
+ +###### [Back to top](#contributing) + +## Development resources + +### Libraries + +- [dash-extensions docs](https://www.dash-extensions.com/ "dash-extensions documentation") +- [dash-extensions GitHub](https://github.com/thedirtyfew/dash-extensions "dash-extensions GitHub repository") +- [plotly Dash docs](https://dash.plotly.com/ "plotly Dash documentation") +- [dash-bootstrap-components docs](http://dash-bootstrap-components.opensource.faculty.ai/ "dash-bootstrap-components documentation") +- [dash-mantine-components docs](https://www.dash-mantine-components.com/ "dash-mantine-components documentation") +- [dash-iconify icons browser]( "Iconify icon sets") + +### Monitoring and Task-queue Services + +#### Networking, backend and task management + +- [docker-compose networking docs]() +- [live mongoDB dash example]() +- [plotly dash `dcc.Store` docs]() +- [how to access rabbitmq publicly]( "how to access RabbitMQ publicly") +- [StackOverflow: how to access rabbitmq publicly](https://stackoverflow.com/a/57612615 "StackOverflow: how to access RabbitMQ publicly") +- [celery rabbitmq tutorial](https://suzannewang.com/celery-rabbitmq-tutorial/) + +### Miscellaneous + +dash logger is not supported with pattern matching callbacks + + +#### docker-compose services configuration + +>NOTE: The Prometheus and Grafana services are disabled by default. You may enable them by removing the comments in the docker compose +file. + +***NOTE: All of this assumes you have all docker services running via `make up` or `docker-compose up`*** + +> ~Provision grafana from `docker-compose.yml`. See: [Grafana provisioning example data source config file](https://grafana.com/docs/grafana/latest/administration/provisioning/#example-data-source-config-file)~ +> Found a nice blog post and accompanying GitHub repo to follow: +> +> - [Medium blog post](https://medium.com/swlh/easy-grafana-and-docker-compose-setup-d0f6f9fcec13) +> - [github.com/annea-ai/grafana-infrastructure]() +> - [Grafana docs on Provisioning](https://grafana.com/docs/grafana/latest/administration/provisioning/) + +- RabbitMQ management - [http://localhost:15672](http://localhost:15672) +- Flower link - [http://localhost:5555](http://localhost:5555) +- Prometheus link - [http://localhost:9090](http://localhost:9090) +- Grafana link - [http://localhost:3000](http://localhost:3000) + +#### Grafana configuration + + +- flower+prometheus+grafana [add prometheus as a data source in grafana]( "flower+prometheus+grafana add prometheus as a data source in grafana") +- grafana link - [http://localhost:3000](http://localhost:3000) + +Add the prometheus url as: + +```bash +http://prometheus:9090 +``` + +Notice the tutorial mentions `http://localhost:9090`, but since this is running as a service using `docker-compose` the hostname changes to the +`prometheus` alias (this is the name of the service in the `docker-compose.yml` file) + +[Back to top](#contributing) diff --git a/docs/page-overview.md b/docs/page-overview.md new file mode 100644 index 00000000..f78bca48 --- /dev/null +++ b/docs/page-overview.md @@ -0,0 +1,17 @@ +# Automappa overview + +- [home](#home-page) +- [mag-refinement page](#mag-refinement-page) +- [mag-summary page](#mag-summary-page) + +## Home page + +![HOME page](static/home-tab.png) + +## MAG-refinement page + +![refinement page](static/refinement-tab.png) + +## MAG-summary page + +![summary page](static/summary-tab.png) diff --git a/docs/static/home-tab.png b/docs/static/home-tab.png new file mode 100644 index 00000000..97cedb70 Binary files /dev/null and b/docs/static/home-tab.png differ diff --git a/docs/static/refinement-tab.png b/docs/static/refinement-tab.png new file mode 100644 index 00000000..fec60399 Binary files /dev/null and b/docs/static/refinement-tab.png differ diff --git a/docs/static/summary-tab.png b/docs/static/summary-tab.png new file mode 100644 index 00000000..6c446f35 Binary files /dev/null and b/docs/static/summary-tab.png differ diff --git a/docs/task-queue.md b/docs/task-queue.md new file mode 100644 index 00000000..b1a6f813 --- /dev/null +++ b/docs/task-queue.md @@ -0,0 +1,100 @@ +# Tasks + +## Adding a new task module discovery path + +To have the Celery task-queue register a page's tasks, they must be +discovered based on the relative task module's path. Here's a simple example +(It may be easier to show here than explain...) + +We have our page's tasks module: + +```console +automappa/pages/home/tasks +├── README.md +├── __init__.py +└── task_status_badge.py +``` + +We configure celery for this module in the `celeryconfig.py` file. + +```python +# contents of celeryconfig.py +imports = ("automappa.pages.home.tasks", "automappa.tasks") +``` + +This ***almost*** takes care of celery checking for tasks in the module + +Unfortunately, this is not all, we also need to update the +`automappa/pages/home/tasks/__init__.py` with all of our implemented tasks in the +module for celery to recognize and register the tasks under the `automappa/pages/home/tasks` module. + +For example we have a task `set_badge_color` defined in `task_status_badge.py`. + +We would need to explicitly add this task to `__init__.py` like so: + +```python +# contents of `automappa/pages/home/tasks/__init__.py` +from .task_status_badge import set_badge_color + +__all__ = ["set_badge_color"] +``` + +Voilá, now celery will recognize the task on startup. It should look like this: + +
+ +Celery Startup with `set_badge_color` registered + +```console + + -------------- celery@4bbb963e90ec v5.3.1 (emerald-rush) +--- ***** ----- +-- ******* ---- Linux-5.15.49-linuxkit-pr-x86_64-with-glibc2.31 2023-07-13 18:24:15 +- *** --- * --- +- ** ---------- [config] +- ** ---------- .> app: automappa.tasks:0x7f23e6d778e0 +- ** ---------- .> transport: amqp://user:**@rabbitmq:5672// +- ** ---------- .> results: redis://redis:6379/0 +- *** --- * --- .> concurrency: 2 (prefork) +-- ******* ---- .> task events: ON +--- ***** ----- + -------------- [queues] + .> celery exchange=celery(direct) key=celery + +[tasks] + . automappa.pages.home.tasks.task_status_badge.set_badge_color + . automappa.tasks.aggregate_embeddings + . automappa.tasks.count_kmer + . automappa.tasks.embed_kmer + . automappa.tasks.get_embedding_traces_df + . automappa.tasks.normalize_kmer + . automappa.tasks.preprocess_clusters_geom_medians + . automappa.tasks.preprocess_marker_symbols +[2023-07-13 18:24:16,413: WARNING/MainProcess] /opt/conda/lib/python3.9/site-packages/celery/worker/consumer/consumer.py:498: CPendingDeprecationWarning: The broker_connection_retry configuration setting will no longer determine +whether broker connection retries are made during startup in Celery 6.0 and above. +If you wish to retain the existing behavior for retrying connections on startup, +you should set broker_connection_retry_on_startup to True. + warnings.warn( +[2023-07-13 18:24:16,437: INFO/MainProcess] Connected to amqp://user:**@rabbitmq:5672// +[2023-07-13 18:24:16,439: WARNING/MainProcess] /opt/conda/lib/python3.9/site-packages/celery/worker/consumer/consumer.py:498: CPendingDeprecationWarning: The broker_connection_retry configuration setting will no longer determine +whether broker connection retries are made during startup in Celery 6.0 and above. +If you wish to retain the existing behavior for retrying connections on startup, +you should set broker_connection_retry_on_startup to True. + warnings.warn( +[2023-07-13 18:24:16,455: INFO/MainProcess] mingle: searching for neighbors +[2023-07-13 18:24:17,569: INFO/MainProcess] mingle: all alone +[2023-07-13 18:24:17,627: INFO/MainProcess] celery@4bbb963e90ec ready. +``` + +
+ +## Adding a new task to an existing page + +Celery has trouble discovering newly implemented tasks + +Unfortunately I have not found a convenient workaround for this +but each page has its own `tasks` module with an `__init__.py` +where any tasks implemented under this module must be imported into +`__init__.py` and specified in the `__all__` dunder variable. + +You can avoid alot of headache by recalling this... \ No newline at end of file diff --git a/environment.yml b/environment.yml index 914bbe61..44853bbb 100644 --- a/environment.yml +++ b/environment.yml @@ -1,18 +1,34 @@ name: automappa channels: - - plotly - conda-forge - bioconda + - plotly - defaults dependencies: - autometa - - flask - - celery[redis] - - dash + - dash==2.10.2 - dash-bootstrap-components - - dash-daq - - dash-extensions==0.0.71 + - dash_cytoscape==0.2.0 + - flask + - flower - msgpack-python - - numpy + - numpy==1.20.0 - pandas - plotly + - psycopg2 + - python-dotenv + - python==3.9.* + - pydantic + - scipy + - scikit-learn==0.24 + - sqlalchemy==1.4.41 + - sqlmodel==0.0.8 + - pip + - pip: + - dash-extensions==1.* + - dash-mantine-components==0.12 + - dash-ag-grid>=2.2 + - dash-iconify + - celery[redis] + - geom-median + - dash-uploader==0.6 diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..3652c52a --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,3 @@ +black +pytest +mypy \ No newline at end of file diff --git a/scripts/test_environment.py b/scripts/test_environment.py deleted file mode 100644 index 993fc422..00000000 --- a/scripts/test_environment.py +++ /dev/null @@ -1,14 +0,0 @@ -import dash_table -from dash_extensions import Download -from dash_extensions.snippets import send_data_frame -from dash.dependencies import Input, Output, State -from dash.exceptions import PreventUpdate -import dash_daq as daq -from plotly import graph_objs as go -import dash_core_components as dcc -import dash_html_components as html -import pandas as pd -import flask -import dash -import base64 -import io diff --git a/setup.py b/setup.py index 3bc64bf0..5516ad57 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -"""Setup for installation of Autometa.""" +"""Setup for installation of Automappa.""" import os @@ -20,7 +20,7 @@ def read(fname): python_requires=">=3.7", version=version, packages=find_packages(exclude=["tests"]), - package_data={"": ["app/assets/*"]}, + package_data={"": ["assets/*"]}, entry_points={ "console_scripts": [ "automappa = automappa.__main__:main", @@ -32,6 +32,10 @@ def read(fname): long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/WiscEvan/Automappa", + project_urls={ + "Bug Tracker": "https://github.com/WiscEvan/Automappa/issues", + "Bioconda": "https://anaconda.org/bioconda/automappa", + }, license="GNU Affero General Public License v3 or later (AGPLv3+)", classifiers=[ "Programming Language :: Python",