From 7fd7d6d4c7a6d23165cf1ff8c84d29192432e5bc Mon Sep 17 00:00:00 2001 From: CBroz1 Date: Wed, 22 Feb 2023 09:56:08 -0600 Subject: [PATCH 01/62] Migrate files --- .cspell.json | 62 +++ .gitignore | 74 +++ .pre-commit-config.yaml | 58 +++ CHANGELOG.md | 10 + CODE_OF_CONDUCT.md | 132 +++++ CONTRIBUTING.md | 5 + README.md | 19 + docker/Dockerfile | 25 + docker/apt_requirements.txt | 2 + docker/setup.sh | 35 ++ notebooks/01-Configure.ipynb | 241 +++++++++ notebooks/02-WorkflowStructure_Optional.ipynb | 462 ++++++++++++++++++ notebooks/03_Explore.ipynb | 461 +++++++++++++++++ notebooks/04-Drop_Optional.ipynb | 120 +++++ notebooks/py_scripts/01-Configure.py | 114 +++++ ...9af2735c243-e118-4ee3-b586-4b8636f6c322.py | 9 + notebooks/py_scripts/01_Explore_Workflow.py | 287 +++++++++++ .../02-WorkflowStructure_Optional.py | 125 +++++ notebooks/py_scripts/03_Explore.py | 265 ++++++++++ notebooks/py_scripts/04-Drop_Optional.py | 67 +++ pyproject.toml | 17 + requirements.txt | 7 + requirements_dev.txt | 2 + setup.py | 30 ++ tests/conftest.py | 225 +++++++++ tests/test_ingest.py | 4 + tests/test_pipeline_generation.py | 18 + user_data/sessions.csv | 2 + user_data/subjects.csv | 2 + workflow_volume/__init__.py | 6 + workflow_volume/ingest.py | 48 ++ workflow_volume/paths.py | 41 ++ workflow_volume/pipeline.py | 62 +++ workflow_volume/reference.py | 27 + workflow_volume/version.py | 2 + 35 files changed, 3066 insertions(+) create mode 100644 .cspell.json create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 CHANGELOG.md create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md create mode 100644 README.md create mode 100644 docker/Dockerfile create mode 100644 docker/apt_requirements.txt create mode 100644 docker/setup.sh create mode 100644 notebooks/01-Configure.ipynb create mode 100644 notebooks/02-WorkflowStructure_Optional.ipynb create mode 100644 notebooks/03_Explore.ipynb create mode 100644 notebooks/04-Drop_Optional.ipynb create mode 100644 notebooks/py_scripts/01-Configure.py create mode 100644 notebooks/py_scripts/01_Explore_Workflow-jvsc-a94162bf-4da6-4df9-a725-a0c52d99e9af2735c243-e118-4ee3-b586-4b8636f6c322.py create mode 100644 notebooks/py_scripts/01_Explore_Workflow.py create mode 100644 notebooks/py_scripts/02-WorkflowStructure_Optional.py create mode 100644 notebooks/py_scripts/03_Explore.py create mode 100644 notebooks/py_scripts/04-Drop_Optional.py create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100755 requirements_dev.txt create mode 100644 setup.py create mode 100644 tests/conftest.py create mode 100644 tests/test_ingest.py create mode 100644 tests/test_pipeline_generation.py create mode 100644 user_data/sessions.csv create mode 100644 user_data/subjects.csv create mode 100644 workflow_volume/__init__.py create mode 100644 workflow_volume/ingest.py create mode 100644 workflow_volume/paths.py create mode 100644 workflow_volume/pipeline.py create mode 100644 workflow_volume/reference.py create mode 100644 workflow_volume/version.py diff --git a/.cspell.json b/.cspell.json new file mode 100644 index 0000000..74024db --- /dev/null +++ b/.cspell.json @@ -0,0 +1,62 @@ +// cSpell Settings +//https://github.com/streetsidesoftware/vscode-spell-checker +{ + "version": "0.2", // Version of the setting file. Always 0.2 + "language": "en", // language - current active spelling language + "enabledLanguageIds": [ + "markdown", + "yaml", + "python" + ], + // flagWords - list of words to be always considered incorrect + // This is useful for offensive words and common spelling errors. + // For example "hte" should be "the" + "flagWords": [], + "allowCompoundWords": true, + "ignorePaths": [ + "./element_*.egg-info/*", + "./images/*" + ], + "words": [ + "asarray", + "astype", + "Berens", + "bossdb", + "CICD", + "connectome", + "Connectomics", + "DBURLs", + "djarchive", + "DJARCHIVE", + "Ecker", + "elif", + "Ephys", + "genotyping", + "Hoenselaar", + "IACUC", + "inlinehilite", + "Kasthuri", + "linenums", + "mkdocs", + "mkdocstrings", + "numpy", + "pymdownx", + "pyproject", + "pytest", + "Reimer", + "Roboto", + "RRID", + "Rxiv", + "Sasaki", + "segmentations", + "Shen", + "Siapas", + "Sinz", + "Sitonic", + "Tolias", + "voxel", + "witvliet", + "Yatsenko", + "Zuckerman" + ] +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..130f191 --- /dev/null +++ b/.gitignore @@ -0,0 +1,74 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Distribution, packaging, PyInstaller +.Python +env/ +build/ +*egg*/ +*dist/ +downloads/ +lib*/ +parts/ +var/ +wheels/ +.installed.cfg +*.egg +*.manifest +*.spec +pip-*.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +*.cov* +.cache +nosetests.xml +coverage.xml +.hypothesis/ +.pytest_cache/ +docker-compose.y*ml + +# C extension, Translations +# editors: vscode, emacs, Mac +*.so +*.mo +*.pot +.vscode +**/*~ +**/#*# +**/.#* +.DS_Store + +# Django, Flask, Scrapy, Sphinx, mkdocs +# PyBuilder, Jupyter, SageMath, celery beat +*.log +local_settings.py +instance/ +.webassets-cache +.scrapy +scratchpaper.* +docs/_build/ +/site +target/ +.*checkpoints +celerybeat-schedule +*.sage.py + +# dotenv, virtualenv, pyenv, mypy +.*env +venv/ +ENV/ +.python-version +.mypy_cache/ + +# Spyder/Rope project settings +.spy*project +.ropeproject + +# datajoint, notes, nwb export +dj_local_c*.json +temp* +*nwb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..29eab3b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,58 @@ +default_stages: [commit, push] +exclude: (^.github/|^docs/|^images/|^notebooks/py_scripts/) + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files # prevent giant files from being committed + - id: requirements-txt-fixer + - id: mixed-line-ending + args: ["--fix=lf"] + description: Forces to replace line ending by the UNIX 'lf' character. + + # black + - repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + - id: black-jupyter + args: + - --line-length=88 + + # isort + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: ["--profile", "black"] + description: Sorts imports in an alphabetical order + + # flake8 + - repo: https://github.com/pycqa/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + args: # arguments to configure flake8 + # making isort line length compatible with black + - "--max-line-length=88" + - "--max-complexity=18" + - "--select=B,C,E,F,W,T4,B9" + + # these are errors that will be ignored by flake8 + # https://www.flake8rules.com/rules/{code}.html + - "--ignore=E203,E501,W503,W605,E402" + # E203 - Colons should not have any space before them. + # Needed for list indexing + # E501 - Line lengths are recommended to be no greater than 79 characters. + # Needed as we conform to 88 + # W503 - Line breaks should occur after the binary operator. + # Needed because not compatible with black + # W605 - a backslash-character pair that is not a valid escape sequence now + # generates a DeprecationWarning. This will eventually become a SyntaxError. + # Needed because we use \d as an escape sequence + # E402 - Place module level import at the top. + # Needed to prevent circular import error diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..35047ef --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,10 @@ +# Changelog + +Observes [Semantic Versioning](https://semver.org/spec/v2.0.0.html) standard and +[Keep a Changelog](https://keepachangelog.com/en/1.0.0/) convention. + +## [0.0.0] - Unreleased + ++ Add - Workflow pipeline + +[0.0.0]: https://github.com/datajoint/workflow-session/releases/tag/0.0.0 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..0502528 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,132 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +[Support@DataJoint.com](mailto:support@datajoint.com). +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e04d170 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,5 @@ +# Contribution Guidelines + +This project follows the +[DataJoint Contribution Guidelines](https://datajoint.com/docs/community/contribute/). +Please reference the link for more full details. diff --git a/README.md b/README.md new file mode 100644 index 0000000..b211863 --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +# Workflow for volumetric data + +This directory provides an example workflow to save the information related to +volumetric data management, using the following DataJoint Elements + ++ [element-lab](https://github.com/datajoint/element-lab) ++ [element-animal](https://github.com/datajoint/element-animal) ++ [element-session](https://github.com/datajoint/element-session) ++ [element-volume](https://github.com/datajoint/element-volume) + +This repository provides a demonstration for setting up a workflow using these Elements +in the [pipeline script](workflow_session/pipeline.py)). + +See the [Element Volume documentation](https://datajoint.com/docs/elements/element-volume/) +for the background information and development timeline. + +For more information on the DataJoint Elements project, please visit our +[general documentation](https://datajoint.com/docs/elements/). This work is supported by +the National Institutes of Health. diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..cb1ef61 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,25 @@ +FROM datajoint/djbase:py3.9-debian-8eb1715 + +USER anaconda:anaconda + +COPY ./workflow-volume/docker/apt_requirements.txt /tmp/ +RUN /entrypoint.sh echo "Installed dependencies." + +WORKDIR /main/workflow-volume + +# Always move local - conditional install in setup.sh +COPY --chown=anaconda:anaconda ./element-lab/ /main/element-lab/ +COPY --chown=anaconda:anaconda ./element-animal/ /main/element-animal/ +COPY --chown=anaconda:anaconda ./element-session/ /main/element-session/ +COPY --chown=anaconda:anaconda ./element-event/ /main/element-event/ +COPY --chown=anaconda:anaconda ./element-interface/ /main/element-interface/ +COPY --chown=anaconda:anaconda ./element-volume/ /main/element-volume/ +COPY --chown=anaconda:anaconda ./workflow-volume/ /main/workflow-volume/ + +# Conditional install - local-all, local-dlc, or git +COPY --chown=anaconda:anaconda ./workflow-volume/docker/setup.sh /main/ +COPY --chown=anaconda:anaconda ./workflow-volume/docker/.env /main/ +RUN chmod 755 /main/setup.sh +RUN chmod 755 /main/.env +RUN /main/setup.sh +RUN rm -f ./dj_local_conf.json diff --git a/docker/apt_requirements.txt b/docker/apt_requirements.txt new file mode 100644 index 0000000..3505bb3 --- /dev/null +++ b/docker/apt_requirements.txt @@ -0,0 +1,2 @@ +git +locales-all diff --git a/docker/setup.sh b/docker/setup.sh new file mode 100644 index 0000000..373f4c0 --- /dev/null +++ b/docker/setup.sh @@ -0,0 +1,35 @@ +#! /bin/bash +alias ll='ls -aGg' +export $(grep -v '^#' /main/.env | xargs) + +cd /main/ +echo "INSALL OPTION:" $INSTALL_OPTION + +# Always get djarchive +pip install --no-deps git+https://github.com/datajoint/djarchive-client.git + +if [ "$INSTALL_OPTION" == "local-all" ]; then # all local installs, mapped from host + for f in lab animal session interface; do + pip install -e ./element-${f} + done + pip install -e ./element-array-ephys[nwb] + pip install -e ./workflow-array-ephys +else # all except this repo pip installed + for f in lab animal session interface; do + pip install git+https://github.com/${GITHUB_USERNAME}/element-${f}.git + done + if [ "$INSTALL_OPTION" == "local-ephys" ]; then # only array-ephys items from local + pip install -e ./element-array-ephys[nwb] + pip install -e ./workflow-array-ephys + elif [ "$INSTALL_OPTION" == "git" ]; then # all from github + pip install git+https://github.com/${GITHUB_USERNAME}/element-array-ephys.git + pip install git+https://github.com/${GITHUB_USERNAME}/workflow-array-ephys.git + fi +fi + +# If test cmd contains pytest, install +if [[ "$TEST_CMD" == *pytest* ]]; then + pip install pytest + pip install pytest-cov + pip install opencv-python +fi diff --git a/notebooks/01-Configure.ipynb b/notebooks/01-Configure.ipynb new file mode 100644 index 0000000..250c250 --- /dev/null +++ b/notebooks/01-Configure.ipynb @@ -0,0 +1,241 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# DataJoint U24 - Workflow Volume" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## Configure DataJoint" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "- To run an Element workflow, we need to set up a DataJoint config file, called `dj_local_conf.json`, unique to each machine.\n", + "\n", + "- To upload to BossDB, you'd need to configure an `intern.cfg`.\n", + "\n", + "- These configs only need to be set up once. If you already have them, skip to [02-Workflow-Structure](./02-WorkflowStructure_Optional.ipynb).\n", + "\n", + "- By convention, we set a local config in the workflow directory. You may be interested in [setting a global config](https://docs.datajoint.org/python/setup/01-Install-and-Connect.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# change to the upper level folder to detect dj_local_conf.json\n", + "if os.path.basename(os.getcwd()) == \"notebooks\":\n", + " os.chdir(\"..\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure database host address and credentials" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can set up credentials following [instructions here](https://tutorials.datajoint.io/setting-up/get-database.html)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import datajoint as dj\n", + "import getpass\n", + "\n", + "dj.config[\"database.host\"] = \"{YOUR_HOST}\"\n", + "dj.config[\"database.user\"] = \"{YOUR_USERNAME}\"\n", + "dj.config[\"database.password\"] = getpass.getpass() # enter the password securely" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should be able to connect to the database at this stage." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dj.conn()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure the `custom` field" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Prefix" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A schema prefix can help manage privelages on a server. Teams who work on the same schemas should use the same prefix.\n", + "\n", + "Setting the prefix to `neuro_` means that every schema we then create will start with `neuro_` (e.g. `neuro_lab`, `neuro_subject`, `neuro_model` etc.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dj.config[\"custom\"] = {\"database.prefix\": \"neuro_\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Root directory" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`vol_root_data_dir` sets the root path(s) for the Element. Given multiple, the Element will always figure out which root to use based on the files it expects there. This should be the directory shared across all volumetric data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dj.config[\"custom\"] = {\"vol_root_data_dir\": [\"/tmp/test_data/\", \"/tmp/example/\"]}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save the DataJoint config as a json\n", + "\n", + "Once set, the config can either be saved locally or globally. \n", + "\n", + "- The local config would be saved as `dj_local_conf.json` in the workflow directory. This is usefull for managing multiple (demo) pipelines.\n", + "- A global config would be saved as `datajoint_config.json` in the home directory.\n", + "\n", + "When imported, DataJoint will first check for a local config. If none, it will check for a global config." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dj.config.save_local()\n", + "# dj.config.save_global()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configuring `intern`" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Please refer [BossDB resources](https://www.youtube.com/watch?v=eVNr6Pzxoh8) for\n", + "information on generating an account and configuring `intern`.\n", + "\n", + "Importantly, you'll need an `intern` config file at your root directory with your BossDB api token as follows:\n", + "\n", + "```cfg\n", + " # ~/.intern/intern.cfg\n", + " [Default]\n", + " protocol = https\n", + " host = api.bossdb.io\n", + " token = \n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the [next notebook](./02-WorkflowStructure_Optional.ipynb) notebook, we'll explore the workflow structure." + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "ele", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "d00c4ad21a7027bf1726d6ae3a9a6ef39c8838928eca5a3d5f51f3eb68720410" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/02-WorkflowStructure_Optional.ipynb b/notebooks/02-WorkflowStructure_Optional.ipynb new file mode 100644 index 0000000..2832cc5 --- /dev/null +++ b/notebooks/02-WorkflowStructure_Optional.ipynb @@ -0,0 +1,462 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# DataJoint U24 - Workflow Volume" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook introduces some useful DataJoint concepts for exploring pipelines featuring Element Volume.\n", + "\n", + "+ DataJoint needs to be configured before running this notebook (see [01-Configure](./01-Configure.ipynb)).\n", + "+ Those familiar with the structure of DataJoint workflows can skip to [03-Explore](./03-Explore.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To load the local config, we move to the package root." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "if os.path.basename(os.getcwd()) == \"notebooks\":\n", + " os.chdir(\"..\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Schemas, Diagrams and Tables" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Schemas are conceptually related sets of tables. By importing schemas from `workflow_volume.pipeline`, we'll declare the tables on the server with the prefix in the config (if we have permission to do so). If these tables are already declared, we'll gain access. \n", + "\n", + "- `dj.list_schemas()` lists all schemas a user has access to in the current database\n", + "- `.schema.list_tables()` will provide names for each table in the format used under the hood." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import datajoint as dj\n", + "from workflow_volume.pipeline import lab, subject, session, volume, bossdb\n", + "\n", + "# dj.list_schemas()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['#resolution',\n", + " '#connectome_paramset',\n", + " '#segmentation_paramset',\n", + " '_connectome__connection',\n", + " 'volume',\n", + " 'segmentation_task',\n", + " '_segmentation',\n", + " '_segmentation__cell',\n", + " 'connectome_task',\n", + " '__cell_mapping',\n", + " '_connectome']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "volume.schema.list_tables()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`dj.Diagram()` plots tables and dependencies in a schema. To see additional upstream or downstream connections, add `- N` or `+ N`.\n", + "\n", + "- `volume`: Tables related to volumetric data\n", + "- `bossdb`: Schema to manage BossDB urls for each data type. This could be replaced by a similar schema featuring URLs to another endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "title": "`dj.Diagram()`: plot tables and dependencies" + }, + "outputs": [ + { + "data": { + "image/svg+xml": "\n\n\n\n\n20\n\n20\n\n\n\nvolume.Connectome.Connection\n\n\nvolume.Connectome.Connection\n\n\n\n\n\n20->volume.Connectome.Connection\n\n\n\n\n21\n\n21\n\n\n\n21->volume.Connectome.Connection\n\n\n\n\nbossdb.BossDBURLs.Segmentation\n\n\nbossdb.BossDBURLs.Segmentation\n\n\n\n\n\nvolume.SegmentationTask\n\n\nvolume.SegmentationTask\n\n\n\n\n\nbossdb.BossDBURLs.Segmentation->volume.SegmentationTask\n\n\n\n\nbossdb.BossDBURLs.Connectome\n\n\nbossdb.BossDBURLs.Connectome\n\n\n\n\n\nvolume.ConnectomeTask\n\n\nvolume.ConnectomeTask\n\n\n\n\n\nbossdb.BossDBURLs.Connectome->volume.ConnectomeTask\n\n\n\n\nvolume.SegmentationParamset\n\n\nvolume.SegmentationParamset\n\n\n\n\n\nvolume.SegmentationParamset->volume.SegmentationTask\n\n\n\n\nvolume.ConnectomeParamset\n\n\nvolume.ConnectomeParamset\n\n\n\n\n\nvolume.ConnectomeParamset->volume.ConnectomeTask\n\n\n\n\nvolume.Segmentation.Cell\n\n\nvolume.Segmentation.Cell\n\n\n\n\n\nvolume.Segmentation.Cell->20\n\n\n\n\nvolume.Segmentation.Cell->21\n\n\n\n\nvolume.CellMapping\n\n\nvolume.CellMapping\n\n\n\n\n\nvolume.Segmentation.Cell->volume.CellMapping\n\n\n\n\nvolume.Connectome\n\n\nvolume.Connectome\n\n\n\n\n\nbossdb.BossDBURLs.Volume\n\n\nbossdb.BossDBURLs.Volume\n\n\n\n\n\nvolume.Volume\n\n\nvolume.Volume\n\n\n\n\n\nbossdb.BossDBURLs.Volume->volume.Volume\n\n\n\n\nvolume.ConnectomeTask->volume.Connectome\n\n\n\n\nvolume.Segmentation\n\n\nvolume.Segmentation\n\n\n\n\n\nvolume.Segmentation->volume.Segmentation.Cell\n\n\n\n\nvolume.Segmentation->volume.ConnectomeTask\n\n\n\n\nvolume.Resolution\n\n\nvolume.Resolution\n\n\n\n\n\nvolume.Resolution->volume.Volume\n\n\n\n\nvolume.SegmentationTask->volume.Segmentation\n\n\n\n\nbossdb.BossDBURLs\n\n\nbossdb.BossDBURLs\n\n\n\n\n\nbossdb.BossDBURLs->bossdb.BossDBURLs.Segmentation\n\n\n\n\nbossdb.BossDBURLs->bossdb.BossDBURLs.Volume\n\n\n\n\nvolume.Volume->volume.SegmentationTask\n\n\n\n", + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dj.Diagram(volume) + dj.Diagram(bossdb)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`volume.Volume` is a central table where volumetric data can be ingested, either from images on disk or downloaded from BossDB. The various *task* tables can be used to cue up analysis or ingestion of the various subsequent data types (i.e., segmentation and connectome data. Each segmented cell can be matched with data from another source (e.g., Element Calcium Imaging's `Segmentation.Mask`)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dj.Diagram(volume) - 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Table Types\n", + "\n", + "- **Manual table**: green box, manually inserted table, expect new entries daily, e.g. Subject, ProbeInsertion. \n", + "- **Lookup table**: gray box, pre inserted table, commonly used for general facts or parameters. e.g. Strain, ClusteringMethod, ClusteringParamSet. \n", + "- **Imported table**: blue oval, auto-processing table, the processing depends on the importing of external files. e.g. process of Clustering requires output files from kilosort2. \n", + "- **Computed table**: red circle, auto-processing table, the processing does not depend on files external to the database, commonly used for \n", + "- **Part table**: plain text, as an appendix to the master table, all the part entries of a given master entry represent a intact set of the master entry. e.g. Unit of a CuratedClustering.\n", + "\n", + "### Table Links\n", + "\n", + "- **One-to-one primary**: thick solid line, share the exact same primary key, meaning the child table inherits all the primary key fields from the parent table as its own primary key. \n", + "- **One-to-many primary**: thin solid line, inherit the primary key from the parent table, but have additional field(s) as part of the primary key as well\n", + "- **Secondary dependency**: dashed line, the child table inherits the primary key fields from parent table as its own secondary attribute." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Common Table Functions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "- `()` show table contents\n", + "- `heading` shows attribute definitions\n", + "- `describe()` show table defintiion with foreign key references" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "title": "Each datajoint table class inside the module corresponds to a table inside the schema. For example, the class `ephys.EphysRecording` correponds to the table `_ephys_recording` in the schema `neuro_ephys` in the database." + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " Resolution of stored data\n", + "
\n", + "
\n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

resolution_id

\n", + " Shorthand for convention\n", + "
\n", + "

voxel_unit

\n", + " e.g., nanometers\n", + "
\n", + "

voxel_z_size

\n", + " size of one z dimension voxel in voxel_units\n", + "
\n", + "

voxel_y_size

\n", + " size of one y dimension voxel in voxel_units\n", + "
\n", + "

voxel_x_size

\n", + " size of one x dimension voxel in voxel_units\n", + "
\n", + "

downsampling

\n", + " Downsampling iterations relative to raw data\n", + "
990nmmicrometers1.00.50.50
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*resolution_id voxel_unit voxel_z_size voxel_y_size voxel_x_size downsampling \n", + "+------------+ +------------+ +------------+ +------------+ +------------+ +------------+\n", + "990nm micrometers 1.0 0.5 0.5 0 \n", + " (Total: 1)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "volume.Resolution()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "title": "`heading`: show table attributes regardless of foreign key references." + }, + "outputs": [ + { + "data": { + "text/plain": [ + "# Dataset of a contiguous volume\n", + "volume_id : varchar(32) # shorthand for this volume\n", + "resolution_id : varchar(32) # Shorthand for convention\n", + "---\n", + "subject=null : varchar(8) # \n", + "session_id=null : int # \n", + "z_size : int # total number of voxels in z dimension\n", + "y_size : int # total number of voxels in y dimension\n", + "x_size : int # total number of voxels in x dimension\n", + "slicing_dimension=\"z\" : enum('x','y','z') # perspective of slices\n", + "channel : varchar(64) # data type or modality\n", + "collection_experiment=null : varchar(64) # \n", + "url=null : varchar(64) # \n", + "volume_data=null : longblob # Upload assumes (Z, Y, X) np.array" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "volume.Volume.heading" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'-> volume.SegmentationTask\\n---\\nsegmentation_data=null : longblob \\n'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "volume.Segmentation.describe()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "title": "ephys" + }, + "source": [ + "## Other Elements installed with the workflow\n", + "\n", + "- [`lab`](https://github.com/datajoint/element-lab): lab management related information, such as Lab, User, Project, Protocol, Source.\n", + "- [`subject`](https://github.com/datajoint/element-animal): general animal information, User, Genetic background, Death etc.\n", + "- [`session`](https://github.com/datajoint/element-session): general information of experimental sessions.\n", + "- [`calcium-imaging`](https://github.com/datajoint/element-calcium-imaging): imaging schema for generating activity traces. These can be mapped to cells in `volume.Connectome`\n", + "\n", + "For more information about these Elements, see [workflow session](https://github.com/datajoint/workflow-session) or [workflow calcium imaging](https://github.com/datajoint/workflow-calcium-imaging)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dj.Diagram(lab) + dj.Diagram(subject) + dj.Diagram(session)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "title": "[session](https://github.com/datajoint/element-session): experimental session information" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-> subject.Subject\n", + "session_datetime : datetime \n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "'-> subject.Subject\\nsession_datetime : datetime \\n'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "session.Session.describe()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary and next step\n", + "\n", + "- This notebook introduced the overall structures of the schemas and tables in the workflow and relevant tools to explore the schema structure and table definitions.\n", + "\n", + "- The [next notebook](./03-Explore.ipynb) will introduce the detailed steps to run through `workflow-volume`." + ] + } + ], + "metadata": { + "jupytext": { + "encoding": "# -*- coding: utf-8 -*-", + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "ele", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "d00c4ad21a7027bf1726d6ae3a9a6ef39c8838928eca5a3d5f51f3eb68720410" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/03_Explore.ipynb b/notebooks/03_Explore.ipynb new file mode 100644 index 0000000..43304cf --- /dev/null +++ b/notebooks/03_Explore.ipynb @@ -0,0 +1,461 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# DataJoint U24 - Workflow Volume\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## Interactively run the workflow\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- If you haven't configured your set up, refer to [01-Configure](./01-Configure.ipynb).\n", + "- For an overview of the schema, refer to [02-WorkflowStructure](02-WorkflowStructure_Optional.ipynb).\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's change the directory to load the local config, `dj_local_conf.json`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# change to the upper level folder to detect dj_local_conf.json\n", + "if os.path.basename(os.getcwd()) == \"notebooks\":\n", + " os.chdir(\"..\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`pipeline.py` activates the various schema and declares other required tables.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Connecting cbroz@dss-db.datajoint.io:3306\n" + ] + } + ], + "source": [ + "import datajoint as dj\n", + "from datetime import datetime\n", + "from workflow_volume.pipeline import (\n", + " lab,\n", + " subject,\n", + " session,\n", + " volume,\n", + " bossdb,\n", + " get_session_directory,\n", + " get_vol_root_data_dir,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "## Manually Inserting Entries\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Upstream tables\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can insert entries into `dj.Manual` tables (green in diagrams) by providing values as a dictionary or a list of dictionaries.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "# \n", + "subject : varchar(32) # \n", + "session_datetime : datetime(3) # " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "subject.Subject.insert1(\n", + " dict(subject=\"sub1\", sex=\"M\", subject_birth_date=datetime.now()),\n", + " skip_duplicates=True,\n", + ")\n", + "session_key = (subject.Subject & \"subject='sub1'\").fetch1(\"KEY\")\n", + "session.Session.insert1(\n", + " dict(\n", + " **session_key,\n", + " session_id=1,\n", + " session_datetime=datetime.now(),\n", + " ),\n", + " skip_duplicates=True,\n", + ")\n", + "session.SessionDirectory.insert1(\n", + " dict(**session.Session.fetch1(\"KEY\"), session_dir=\"\"),\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`get_session_directory` will fetch your relative directory path form this `SessionDirectory` table.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from element_interface.utils import find_full_path\n", + "\n", + "data_path = find_full_path(get_vol_root_data_dir(), get_session_directory(session_key))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Element Volume Tables\n", + "\n", + "#### Uploading\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `Resolution` table keeps track details related to data collection, including units and size in each dimension. `downsampling` indicates number of times the dataset has been compressed by taking every other pixel. Within BossDB, resolution 3 data (here, `downsampling` 3) reflects every 8th pixel, for example.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "volume.Resolution.insert1(\n", + " dict(\n", + " resolution_id=\"990nm\",\n", + " voxel_unit=\"micrometers\",\n", + " voxel_z_size=1,\n", + " voxel_y_size=0.5,\n", + " voxel_x_size=0.5,\n", + " downsampling=0,\n", + " ),\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "BossDB operates with a hierarchy of collections, experiments, and channels. A collection spans multiple experiments. An experiment may collect one or more channels, including electron micrioscopy data, segmentation annotations, and connectome data. These form the portions of a BossDB URL.\n", + "\n", + "Here, we choose some example values. With the proper permissions, we can create a BossDB dataset right from our Python environment.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "collection, experiment, volume, segmentation = (\n", + " \"DataJointTest\",\n", + " \"test\",\n", + " \"CalciumImaging\",\n", + " \"Segmented\",\n", + ")\n", + "\n", + "bossdb.BossDBURLs.load_bossdb_info(\n", + " collection=collection,\n", + " experiment=experiment,\n", + " volume=volume,\n", + " segmentation=segmentation,\n", + " skip_duplicates=True,\n", + ")\n", + "url_key = (\n", + " bossdb.BossDBURLs.Volume & dict(collection_experiment=f\"{collection}/{experiment}\")\n", + ").fetch1()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `load_sample_data` function below provides a template for loading a multi-page tif file and saving it into individual Z-axis images.\n", + "\n", + "In the next step, we can choose to upload to BossDB either with individual images in a directory or through an image volume in memory. To store the volume data in the table, replace the contents below with a function that loads your data.\n", + "\n", + "Note: BossDB only accepts image data as `uint8` or `uint16` numpy arrays.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "def load_sample_data():\n", + " from tifffile import TiffFile\n", + " from PIL import Image\n", + " from pathlib import Path\n", + "\n", + " root_dir = get_vol_root_data_dir()[0]\n", + " image_fp = root_dir + \"/.tif\"\n", + " png_fp = root_dir + \"sample/Z%02d.png\" # Z-plane\n", + " image_sample = TiffFile(image_fp).asarray()\n", + "\n", + " image_sample = image_sample.astype(\"uint16\")\n", + " if not Path(png_fp % 0).exists():\n", + " for z in range(20):\n", + " Image.fromarray(image_sample[z]).save(png_fp % z)\n", + " return image_sample" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, we can insert into the `Volume` table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "raw_data = load_sample_data()\n", + "raw_data_shape = raw_data.shape\n", + "volume_key = dict(volume_id=\"Thy1\", resolution_id=\"990nm\")\n", + "volume.Volume.insert1(\n", + " dict(\n", + " **volume_key,\n", + " session_id=1,\n", + " z_size=raw_data_shape[0],\n", + " y_size=raw_data_shape[1],\n", + " x_size=raw_data_shape[2],\n", + " channel=volume,\n", + " **url_key,\n", + " volume_data=raw_data,\n", + " ),\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can upload our data either from the data stored in the table or a path to images. If this entry is already associated with a `SessionDirectory` entry, we'll look for images in this path.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# For other optional parameters, see additional docstring info here:\n", + "# element_volume.export.bossdb.BossDBUpload\n", + "volume.Volume.upload(volume_key, upload_from=\"table\")\n", + "# volume.Volume.upload(volume_key, upload_from=\"dir\", data_extension=\"*pattern*.png\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `Volume` and `BossDBURLs` tables offer additional class methods for downloading BossDB data or returning objects for interacting with the data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bossdb.BossDBURLs.load_bossdb_info(\n", + " collection=\"Kasthuri\",\n", + " experiment=\"ac4\",\n", + " volume=\"em\",\n", + " segmentation=\"neuron\",\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# For other optional parameters, see additional docstring info here:\n", + "# element_volume.readers.bossdb.BossDBInterface.load_data_into_element\n", + "volume.Volume.download(\n", + " \"bossdb://witvliet2020/Dataset_1/em\",\n", + " downsampling=3,\n", + " slice_key=\"[100:120,1000:1500,1000:1500]\",\n", + " save_images=True,\n", + " save_ndarray=True,\n", + " image_mode=\"P\",\n", + " skip_duplicates=True,\n", + ")\n", + "data = volume.Volume.return_bossdb_data(\n", + " volume_key=dict(volume_id=\"witvliet2020/Dataset_1\")\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To load segmentation data, we can set the `task_mode` to load and add additional pararameters to the `SegmentationParamset` table." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "volume.SegmentationParamset.insert_new_params(\n", + " segmentation_method=\"bossdb\",\n", + " paramset_idx=1,\n", + " params=dict(\n", + " slice_key=\"[100:120,1000:1500,1000:1500]\",\n", + " save_images=True,\n", + " save_ndarray=True,\n", + " image_mode=\"P\",\n", + " skip_duplicates=True,\n", + " ),\n", + ")\n", + "volume.SegmentationTask.insert1(\n", + " dict(\n", + " volume_id=\"witvliet2020/Dataset_1\",\n", + " resolution_id=0,\n", + " task_mode=\"load\",\n", + " paramset_idx=1,\n", + " **(\n", + " bossdb.BossDBURLs.Segmentation & \"collection_experiment LIKE 'wit%'\"\n", + " ).fetch1(),\n", + " )\n", + ")\n", + "volume.Segmentation.populate()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the [next notebook](./04-Drop.ipynb), we'll touch on how to drop these various schemas for development.\n" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3.9.13 ('ele')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "d00c4ad21a7027bf1726d6ae3a9a6ef39c8838928eca5a3d5f51f3eb68720410" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/04-Drop_Optional.ipynb b/notebooks/04-Drop_Optional.ipynb new file mode 100644 index 0000000..c431aef --- /dev/null +++ b/notebooks/04-Drop_Optional.ipynb @@ -0,0 +1,120 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# DataJoint U24 - Workflow Volume\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change into the parent directory to find the `dj_local_conf.json` file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "import datajoint as dj\n", + "from pathlib import Path\n", + "\n", + "# change to the upper level folder to detect dj_local_conf.json\n", + "if os.path.basename(os.getcwd()) == \"notebooks\":\n", + " os.chdir(\"..\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from workflow_volume.pipeline import (\n", + " imaging_report,\n", + " volume,\n", + " bossdb,\n", + " imaging,\n", + " scan,\n", + " Device,\n", + " session,\n", + " subject,\n", + " surgery,\n", + " lab,\n", + ")\n", + "\n", + "dj.config[\"safemode\"] = True # Set to false to turn off drop confirmation" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Drop schemas\n", + "\n", + "- Schemas are not typically dropped in a production workflow with real data in it.\n", + "- At the developmental phase, it might be required for the table redesign.\n", + "- When dropping all schemas is needed, drop items starting with the most downstream.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# imaging_report.schema.drop()\n", + "# volume.schema.drop()\n", + "# bossdb.schema.drop()\n", + "# imaging.schema.drop()\n", + "# scan.schema.drop()\n", + "# Device.drop_quick()\n", + "# session.schema.drop()\n", + "# subject.schema.drop()\n", + "# surgery.schema.drop()\n", + "# lab.schema.drop()" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "ele", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "d00c4ad21a7027bf1726d6ae3a9a6ef39c8838928eca5a3d5f51f3eb68720410" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/py_scripts/01-Configure.py b/notebooks/py_scripts/01-Configure.py new file mode 100644 index 0000000..9bf1493 --- /dev/null +++ b/notebooks/py_scripts/01-Configure.py @@ -0,0 +1,114 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: ele +# language: python +# name: python3 +# --- + +# %% [markdown] tags=[] +# # DataJoint U24 - Workflow Volume + +# %% [markdown] tags=[] +# ## Configure DataJoint + +# %% [markdown] tags=[] +# - To run an Element workflow, we need to set up a DataJoint config file, called `dj_local_conf.json`, unique to each machine. +# +# - To upload to BossDB, you'd need to configure an `intern.cfg`. +# +# - These configs only need to be set up once. If you already have them, skip to [02-Workflow-Structure](./02-WorkflowStructure_Optional.ipynb). +# +# - By convention, we set a local config in the workflow directory. You may be interested in [setting a global config](https://docs.datajoint.org/python/setup/01-Install-and-Connect.html). + +# %% +import os + +# change to the upper level folder to detect dj_local_conf.json +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") + +# %% [markdown] +# ### Configure database host address and credentials + +# %% [markdown] +# Now we can set up credentials following [instructions here](https://tutorials.datajoint.io/setting-up/get-database.html). + +# %% +import datajoint as dj +import getpass + +dj.config["database.host"] = "{YOUR_HOST}" +dj.config["database.user"] = "{YOUR_USERNAME}" +dj.config["database.password"] = getpass.getpass() # enter the password securely + +# %% [markdown] +# You should be able to connect to the database at this stage. + +# %% +dj.conn() + +# %% [markdown] +# ### Configure the `custom` field + +# %% [markdown] +# #### Prefix + +# %% [markdown] +# A schema prefix can help manage privelages on a server. Teams who work on the same schemas should use the same prefix. +# +# Setting the prefix to `neuro_` means that every schema we then create will start with `neuro_` (e.g. `neuro_lab`, `neuro_subject`, `neuro_model` etc.) + +# %% +dj.config["custom"] = {"database.prefix": "neuro_"} + +# %% [markdown] +# #### Root directory + +# %% [markdown] +# `vol_root_data_dir` sets the root path(s) for the Element. Given multiple, the Element will always figure out which root to use based on the files it expects there. This should be the directory shared across all volumetric data. + +# %% +dj.config["custom"] = {"vol_root_data_dir": ["/tmp/test_data/", "/tmp/example/"]} + +# %% [markdown] +# ## Save the DataJoint config as a json +# +# Once set, the config can either be saved locally or globally. +# +# - The local config would be saved as `dj_local_conf.json` in the workflow directory. This is usefull for managing multiple (demo) pipelines. +# - A global config would be saved as `datajoint_config.json` in the home directory. +# +# When imported, DataJoint will first check for a local config. If none, it will check for a global config. + +# %% +dj.config.save_local() +# dj.config.save_global() + +# %% [markdown] +# ## Configuring `intern` + +# %% [markdown] +# Please refer [BossDB resources](https://www.youtube.com/watch?v=eVNr6Pzxoh8) for +# information on generating an account and configuring `intern`. +# +# Importantly, you'll need an `intern` config file at your root directory with your BossDB api token as follows: +# +# ```cfg +# # ~/.intern/intern.cfg +# [Default] +# protocol = https +# host = api.bossdb.io +# token = +# ``` +# + +# %% [markdown] +# In the [next notebook](./02-WorkflowStructure_Optional.ipynb) notebook, we'll explore the workflow structure. diff --git a/notebooks/py_scripts/01_Explore_Workflow-jvsc-a94162bf-4da6-4df9-a725-a0c52d99e9af2735c243-e118-4ee3-b586-4b8636f6c322.py b/notebooks/py_scripts/01_Explore_Workflow-jvsc-a94162bf-4da6-4df9-a725-a0c52d99e9af2735c243-e118-4ee3-b586-4b8636f6c322.py new file mode 100644 index 0000000..e44f060 --- /dev/null +++ b/notebooks/py_scripts/01_Explore_Workflow-jvsc-a94162bf-4da6-4df9-a725-a0c52d99e9af2735c243-e118-4ee3-b586-4b8636f6c322.py @@ -0,0 +1,9 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.14.4 +# --- diff --git a/notebooks/py_scripts/01_Explore_Workflow.py b/notebooks/py_scripts/01_Explore_Workflow.py new file mode 100644 index 0000000..c3c23a1 --- /dev/null +++ b/notebooks/py_scripts/01_Explore_Workflow.py @@ -0,0 +1,287 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: ele +# language: python +# name: python3 +# --- + +# DataJoint U24 - Workflow Volume +# + +# ### Intro +# + +# This notebook will describe the steps to use Element Volume for interacting with BossDB. +# Prior to using this notebook, please refer to documentation for +# [Element installation instructions](https://datajoint.com/docs/elements/user-guide/) and refer to [BossDB resources](https://www.youtube.com/watch?v=eVNr6Pzxoh8) for information on generating an account and configuring `intern`. +# +# Importantly, you'll need an `intern` config file, which should look like this: +# +# ```cfg +# # ~/.intern/intern.cfg +# [Default] +# protocol = https +# host = api.bossdb.io +# token = +# ``` +# + +# + +import datajoint as dj +import os + +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") +dj.conn() + +# + +dj.config["custom"]["database.prefix"] = "cbroz_wfboss_" +dj.config["custom"][ + "vol_root_data_dir" +] = "/Users/cb/Documents/data/U24_SampleData/boss/" +from workflow_volume.pipeline import volume, BossDBInterface, bossdb + +# volume.Volume.delete_quick() +# - + +volume.Volume() + +# `BossDBInterface` works much like `intern.array`, but with additional functionality for managing records in your Element Volume schema. We can optionally link this dataset to a session in our pipeline via a session key. +# +# Note, however, that we'll have to change our notation slightly. Whereas we can directly index into a dataset to get slices, we'll need to either provide slices as a string or a tuple. +# + +# ### Testing +# + +data = BossDBInterface( + "bossdb://takemura/takemura13/image", resolution=4, session_key={} +) + +# Using `intern` notion, we can look at Z slice 300, from Y voxels 200-500, and X voxels 0 to 700. +# + +data[300, 200:501, 0:701] + +# The same data can be downloaded and loaded into Element Volume using either of the following commands. +# +# Without a session directory provided via `get_session_directory` in `workflow_volume.paths`, we will infer an output directory based on the BossDB path from `get_vol_root_data_dir`. +# + +# data.download(slice_key=(300,slice(200,501),slice(0,701))) +data.download(slice_key="[300,200:501,0:701]") + +# Our volume is stored in the `Volume` + +volume.Volume() + +# With `Slice` corresponding to slices + +volume.Volume.Slice() + +# Each BossDB resolution will have a unique entry in the `Resolution` table + +volume.Resolution() + +# And, the `Zoom` table retain information about the X/Y windows we use. + +volume.Zoom() + +# Changing any of these pieces of information would download different data. + +data.download(slice_key=(slice(300, 311), slice(100, 401), slice(100, 401))) + +# + +import logging +import numpy as np +from workflow_volume.pipeline import volume, bossdb, session, subject +from workflow_volume.paths import get_vol_root_data_dir +from element_volume.volume import * + +# from workflow_volume.pipeline import BossDBInterface + +# em_data = BossDBInterface("bossdb://Kasthuri/ac4/em", resolution=0) +# seg_data = BossDBInterface("bossdb://Kasthuri/ac4/neuron", resolution=0) +# em_data = BossDBInterface("bossdb://witvliet2020/Dataset_1/em", resolution=0) +# seg_data = BossDBInterface("bossdb://witvliet2020/Dataset_1/segmentation", resolution=0) + +logger = logging.getLogger("datajoint") + +volume_key = dict(volume_id="Thy1") + + +def drop_schemas(): + from datajoint_utilities.dj_search.lists import drop_schemas + + prefix = dj.config["custom"]["database.prefix"] + drop_schemas(prefix, dry_run=False, force_drop=True) + + +def drop_tables(): + tables = [ + volume.Connectome, + volume.ConnectomeTask, + volume.ConnectomeParamset, + volume.Segmentation, + volume.Segmentation.Cell, + volume.CellMapping, + volume.SegmentationTask, + volume.SegmentationParamset, + ] + for t in tables: + t.drop_quick() + + +class upload: + @classmethod + def manual_entry(cls): + from datetime import datetime + + subject.Subject.insert1( + dict(subject="sub1", sex="M", subject_birth_date=datetime.now()), + skip_duplicates=True, + ) + session.Session.insert1( + dict( + **(subject.Subject & "subject='sub1'").fetch1("KEY"), + session_id=1, + session_datetime=datetime.now(), + ), + skip_duplicates=True, + ) + session.SessionDirectory.insert1( + dict(**session.Session.fetch1("KEY"), session_dir="sample"), + skip_duplicates=True, + ) + volume.Resolution.insert1( + dict( + resolution_id="990nm", + voxel_unit="micrometers", + voxel_z_size=1, + voxel_y_size=0.5, + voxel_x_size=0.5, + downsampling=0, + ), + skip_duplicates=True, + ) + + coll, exp, chann, seg = ( + "DataJointTest", + "test", + "CalciumImaging", + "Segmentation", + ) + + bossdb.BossDBURLs.load_bossdb_info( + collection=coll, + experiment=exp, + volume=chann, + segmentation=seg, + skip_duplicates=True, + ) + url_key = ( + bossdb.BossDBURLs.Volume & dict(collection_experiment=f"{coll}/{exp}") + ).fetch1() + + raw_data = cls.load_sample_data() + raw_data_shape = raw_data.shape + + volume.Volume.insert1( + dict( + volume_id="Thy1", + resolution_id="990nm", + session_id=1, + z_size=raw_data_shape[0], + y_size=raw_data_shape[1], + x_size=raw_data_shape[2], + channel=chann, + **url_key, + volume_data=raw_data, + ), + skip_duplicates=True, + ) + + def load_sample_data(): + from tifffile import TiffFile + from PIL import Image + from pathlib import Path + + root_dir = get_vol_root_data_dir()[0] + image_fp = root_dir + "sample/zstack_Gcamp_00001_00012.tif" + png_fp = root_dir + "sample/Z%02d.png" + image_sample = TiffFile(image_fp).asarray()[250:270, 1000:1246, :] + if not Path(png_fp % 0).exists(): + for z in range(20): + Image.fromarray(image_sample[z]).save(png_fp % z) + return image_sample + + def upload_from_volume(): + volume.Volume.upload(volume_key) + # Error uploading chunk 0-20: ndarray is not C-contiguous + + +class download: + def add_manual_boss_url(): + bossdb.BossDBURLs.load_bossdb_info( + collection="Kasthuri", + experiment="ac4", + volume="em", + segmentation="neuron", + skip_duplicates=True, + ) + bossdb.BossDBURLs.load_bossdb_info( + collection="witvliet2020", + experiment="Dataset_1", + volume="em", + segmentation="segmentation", + skip_duplicates=True, + ) + + def download_volume_via_classmeth(): + volume.Volume.download( + url="bossdb://witvliet2020/Dataset_1/em", + slice_key="[100:120,1000:1500,1000:1500]", + save_images=True, + save_ndarray=True, + image_mode="P", + skip_duplicates=True, + ) + + def download_seg_via_classmeth(): + volume.SegmentationParamset.insert_new_params( + segmentation_method="bossdb", + paramset_idx=1, + params=dict( + slice_key="[100:120,1000:1500,1000:1500]", + save_images=True, + save_ndarray=True, + image_mode="P", + skip_duplicates=True, + ), + ) + volume.SegmentationTask.insert1( + dict( + volume_id="witvliet2020/Dataset_1", + resolution_id=0, + task_mode="load", + paramset_idx=1, + **( + bossdb.BossDBURLs.Segmentation & "collection_experiment LIKE 'wit%'" + ).fetch1(), + ) + ) + volume.Segmentation.populate() + + @classmethod + def run_all(cls): + cls.add_manual_boss_url() + cls.download_volume_via_classmeth() + cls.download_seg_via_classmeth() + diff --git a/notebooks/py_scripts/02-WorkflowStructure_Optional.py b/notebooks/py_scripts/02-WorkflowStructure_Optional.py new file mode 100644 index 0000000..0d9384e --- /dev/null +++ b/notebooks/py_scripts/02-WorkflowStructure_Optional.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: ele +# language: python +# name: python3 +# --- + +# %% [markdown] tags=[] +# # DataJoint U24 - Workflow Volume + +# %% [markdown] +# ## Introduction + +# %% [markdown] +# This notebook introduces some useful DataJoint concepts for exploring pipelines featuring Element Volume. +# +# + DataJoint needs to be configured before running this notebook (see [01-Configure](./01-Configure.ipynb)). +# + Those familiar with the structure of DataJoint workflows can skip to [03-Explore](./03-Explore.ipynb). + +# %% [markdown] +# To load the local config, we move to the package root. + +# %% +import os + +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") + +# %% [markdown] +# ## Schemas, Diagrams and Tables + +# %% [markdown] +# Schemas are conceptually related sets of tables. By importing schemas from `workflow_volume.pipeline`, we'll declare the tables on the server with the prefix in the config (if we have permission to do so). If these tables are already declared, we'll gain access. +# +# - `dj.list_schemas()` lists all schemas a user has access to in the current database +# - `.schema.list_tables()` will provide names for each table in the format used under the hood. + +# %% +import datajoint as dj +from workflow_volume.pipeline import lab, subject, session, volume, bossdb + +# dj.list_schemas() + +# %% +volume.schema.list_tables() + +# %% [markdown] +# `dj.Diagram()` plots tables and dependencies in a schema. To see additional upstream or downstream connections, add `- N` or `+ N`. +# +# - `volume`: Tables related to volumetric data +# - `bossdb`: Schema to manage BossDB urls for each data type. This could be replaced by a similar schema featuring URLs to another endpoint. + +# %% `dj.Diagram()`: plot tables and dependencies +dj.Diagram(volume) + dj.Diagram(bossdb) + +# %% [markdown] +# `volume.Volume` is a central table where volumetric data can be ingested, either from images on disk or downloaded from BossDB. The various *task* tables can be used to cue up analysis or ingestion of the various subsequent data types (i.e., segmentation and connectome data. Each segmented cell can be matched with data from another source (e.g., Element Calcium Imaging's `Segmentation.Mask`). + +# %% +dj.Diagram(volume) - 1 + +# %% [markdown] +# ### Table Types +# +# - **Manual table**: green box, manually inserted table, expect new entries daily, e.g. Subject, ProbeInsertion. +# - **Lookup table**: gray box, pre inserted table, commonly used for general facts or parameters. e.g. Strain, ClusteringMethod, ClusteringParamSet. +# - **Imported table**: blue oval, auto-processing table, the processing depends on the importing of external files. e.g. process of Clustering requires output files from kilosort2. +# - **Computed table**: red circle, auto-processing table, the processing does not depend on files external to the database, commonly used for +# - **Part table**: plain text, as an appendix to the master table, all the part entries of a given master entry represent a intact set of the master entry. e.g. Unit of a CuratedClustering. +# +# ### Table Links +# +# - **One-to-one primary**: thick solid line, share the exact same primary key, meaning the child table inherits all the primary key fields from the parent table as its own primary key. +# - **One-to-many primary**: thin solid line, inherit the primary key from the parent table, but have additional field(s) as part of the primary key as well +# - **Secondary dependency**: dashed line, the child table inherits the primary key fields from parent table as its own secondary attribute. + +# %% [markdown] +# ## Common Table Functions + +# %% [markdown] +# +# - `()` show table contents +# - `heading` shows attribute definitions +# - `describe()` show table defintiion with foreign key references + +# %% Each datajoint table class inside the module corresponds to a table inside the schema. For example, the class `ephys.EphysRecording` correponds to the table `_ephys_recording` in the schema `neuro_ephys` in the database. +volume.Resolution() + +# %% `heading`: show table attributes regardless of foreign key references. +volume.Volume.heading + +# %% +volume.Segmentation.describe() + +# %% ephys [markdown] +# ## Other Elements installed with the workflow +# +# - [`lab`](https://github.com/datajoint/element-lab): lab management related information, such as Lab, User, Project, Protocol, Source. +# - [`subject`](https://github.com/datajoint/element-animal): general animal information, User, Genetic background, Death etc. +# - [`session`](https://github.com/datajoint/element-session): general information of experimental sessions. +# - [`calcium-imaging`](https://github.com/datajoint/element-calcium-imaging): imaging schema for generating activity traces. These can be mapped to cells in `volume.Connectome` +# +# For more information about these Elements, see [workflow session](https://github.com/datajoint/workflow-session) or [workflow calcium imaging](https://github.com/datajoint/workflow-calcium-imaging). + +# %% +dj.Diagram(lab) + dj.Diagram(subject) + dj.Diagram(session) + +# %% [session](https://github.com/datajoint/element-session): experimental session information +session.Session.describe() + +# %% [markdown] +# ## Summary and next step +# +# - This notebook introduced the overall structures of the schemas and tables in the workflow and relevant tools to explore the schema structure and table definitions. +# +# - The [next notebook](./03-Explore.ipynb) will introduce the detailed steps to run through `workflow-volume`. diff --git a/notebooks/py_scripts/03_Explore.py b/notebooks/py_scripts/03_Explore.py new file mode 100644 index 0000000..ee2cbb7 --- /dev/null +++ b/notebooks/py_scripts/03_Explore.py @@ -0,0 +1,265 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: Python 3.9.13 ('ele') +# language: python +# name: python3 +# --- + +# %% [markdown] tags=[] +# # DataJoint U24 - Workflow Volume +# + +# %% [markdown] tags=[] +# ## Interactively run the workflow +# + +# %% [markdown] +# - If you haven't configured your set up, refer to [01-Configure](./01-Configure.ipynb). +# - For an overview of the schema, refer to [02-WorkflowStructure](02-WorkflowStructure_Optional.ipynb). +# + +# %% [markdown] +# Let's change the directory to load the local config, `dj_local_conf.json`. +# + +# %% +import os + +# change to the upper level folder to detect dj_local_conf.json +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") + +# %% [markdown] +# `pipeline.py` activates the various schema and declares other required tables. +# + +# %% +import datajoint as dj +from datetime import datetime +from workflow_volume.pipeline import ( + lab, + subject, + session, + volume, + bossdb, + get_session_directory, + get_vol_root_data_dir, +) + +# %% [markdown] tags=[] +# ## Manually Inserting Entries +# + +# %% [markdown] +# ### Upstream tables +# + +# %% [markdown] +# We can insert entries into `dj.Manual` tables (green in diagrams) by providing values as a dictionary or a list of dictionaries. +# + +# %% +subject.Subject.insert1( + dict(subject="sub1", sex="M", subject_birth_date=datetime.now()), + skip_duplicates=True, +) +session_key = (subject.Subject & "subject='sub1'").fetch1("KEY") +session.Session.insert1( + dict( + **session_key, + session_id=1, + session_datetime=datetime.now(), + ), + skip_duplicates=True, +) +session.SessionDirectory.insert1( + dict(**session.Session.fetch1("KEY"), session_dir=""), + skip_duplicates=True, +) + +# %% [markdown] +# `get_session_directory` will fetch your relative directory path form this `SessionDirectory` table. +# + +# %% +from element_interface.utils import find_full_path + +data_path = find_full_path(get_vol_root_data_dir(), get_session_directory(session_key)) + +# %% [markdown] tags=[] +# ### Element Volume Tables +# +# #### Uploading +# + +# %% [markdown] +# The `Resolution` table keeps track details related to data collection, including units and size in each dimension. `downsampling` indicates number of times the dataset has been compressed by taking every other pixel. Within BossDB, resolution 3 data (here, `downsampling` 3) reflects every 8th pixel, for example. +# + +# %% +volume.Resolution.insert1( + dict( + resolution_id="990nm", + voxel_unit="micrometers", + voxel_z_size=1, + voxel_y_size=0.5, + voxel_x_size=0.5, + downsampling=0, + ), + skip_duplicates=True, +) + +# %% [markdown] +# BossDB operates with a hierarchy of collections, experiments, and channels. A collection spans multiple experiments. An experiment may collect one or more channels, including electron micrioscopy data, segmentation annotations, and connectome data. These form the portions of a BossDB URL. +# +# Here, we choose some example values. With the proper permissions, we can create a BossDB dataset right from our Python environment. +# + +# %% +collection, experiment, volume, segmentation = ( + "DataJointTest", + "test", + "CalciumImaging", + "Segmented", +) + +bossdb.BossDBURLs.load_bossdb_info( + collection=collection, + experiment=experiment, + volume=volume, + segmentation=segmentation, + skip_duplicates=True, +) +url_key = ( + bossdb.BossDBURLs.Volume & dict(collection_experiment=f"{collection}/{experiment}") +).fetch1() + + +# %% [markdown] +# The `load_sample_data` function below provides a template for loading a multi-page tif file and saving it into individual Z-axis images. +# +# In the next step, we can choose to upload to BossDB either with individual images in a directory or through an image volume in memory. To store the volume data in the table, replace the contents below with a function that loads your data. +# +# Note: BossDB only accepts image data as `uint8` or `uint16` numpy arrays. +# + +# %% +def load_sample_data(): + from tifffile import TiffFile + from PIL import Image + from pathlib import Path + + root_dir = get_vol_root_data_dir()[0] + image_fp = root_dir + "/.tif" + png_fp = root_dir + "sample/Z%02d.png" # Z-plane + image_sample = TiffFile(image_fp).asarray() + + image_sample = image_sample.astype("uint16") + if not Path(png_fp % 0).exists(): + for z in range(20): + Image.fromarray(image_sample[z]).save(png_fp % z) + return image_sample + + +# %% [markdown] +# Now, we can insert into the `Volume` table. + +# %% +raw_data = load_sample_data() +raw_data_shape = raw_data.shape +volume_key = dict(volume_id="Thy1", resolution_id="990nm") +volume.Volume.insert1( + dict( + **volume_key, + session_id=1, + z_size=raw_data_shape[0], + y_size=raw_data_shape[1], + x_size=raw_data_shape[2], + channel=volume, + **url_key, + volume_data=raw_data, + ), + skip_duplicates=True, +) + +# %% [markdown] +# Finally, we can upload our data either from the data stored in the table or a path to images. If this entry is already associated with a `SessionDirectory` entry, we'll look for images in this path. +# + +# %% +# For other optional parameters, see additional docstring info here: +# element_volume.export.bossdb.BossDBUpload +volume.Volume.upload(volume_key, upload_from="table") +# volume.Volume.upload(volume_key, upload_from="dir", data_extension="*pattern*.png") + +# %% [markdown] +# #### Download + +# %% [markdown] +# The `Volume` and `BossDBURLs` tables offer additional class methods for downloading BossDB data or returning objects for interacting with the data. +# + +# %% +bossdb.BossDBURLs.load_bossdb_info( + collection="Kasthuri", + experiment="ac4", + volume="em", + segmentation="neuron", + skip_duplicates=True, +) + +# %% +# For other optional parameters, see additional docstring info here: +# element_volume.readers.bossdb.BossDBInterface.load_data_into_element +volume.Volume.download( + "bossdb://witvliet2020/Dataset_1/em", + downsampling=3, + slice_key="[100:120,1000:1500,1000:1500]", + save_images=True, + save_ndarray=True, + image_mode="P", + skip_duplicates=True, +) +data = volume.Volume.return_bossdb_data( + volume_key=dict(volume_id="witvliet2020/Dataset_1") +) + +# %% [markdown] +# To load segmentation data, we can set the `task_mode` to load and add additional pararameters to the `SegmentationParamset` table. + +# %% +volume.SegmentationParamset.insert_new_params( + segmentation_method="bossdb", + paramset_idx=1, + params=dict( + slice_key="[100:120,1000:1500,1000:1500]", + save_images=True, + save_ndarray=True, + image_mode="P", + skip_duplicates=True, + ), +) +volume.SegmentationTask.insert1( + dict( + volume_id="witvliet2020/Dataset_1", + resolution_id=0, + task_mode="load", + paramset_idx=1, + **( + bossdb.BossDBURLs.Segmentation & "collection_experiment LIKE 'wit%'" + ).fetch1(), + ) +) +volume.Segmentation.populate() + +# %% [markdown] +# In the [next notebook](./04-Drop.ipynb), we'll touch on how to drop these various schemas for development. +# diff --git a/notebooks/py_scripts/04-Drop_Optional.py b/notebooks/py_scripts/04-Drop_Optional.py new file mode 100644 index 0000000..f8ae415 --- /dev/null +++ b/notebooks/py_scripts/04-Drop_Optional.py @@ -0,0 +1,67 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:percent +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.14.1 +# kernelspec: +# display_name: ele +# language: python +# name: python3 +# --- + +# %% [markdown] tags=[] +# # DataJoint U24 - Workflow Volume +# + +# %% [markdown] +# Change into the parent directory to find the `dj_local_conf.json` file. +# + +# %% tags=[] +import os +import datajoint as dj +from pathlib import Path + +# change to the upper level folder to detect dj_local_conf.json +if os.path.basename(os.getcwd()) == "notebooks": + os.chdir("..") + +# %% +from workflow_volume.pipeline import ( + imaging_report, + volume, + bossdb, + imaging, + scan, + Device, + session, + subject, + surgery, + lab, +) + +dj.config["safemode"] = True # Set to false to turn off drop confirmation + +# %% [markdown] +# ## Drop schemas +# +# - Schemas are not typically dropped in a production workflow with real data in it. +# - At the developmental phase, it might be required for the table redesign. +# - When dropping all schemas is needed, drop items starting with the most downstream. +# + +# %% +# imaging_report.schema.drop() +# volume.schema.drop() +# bossdb.schema.drop() +# imaging.schema.drop() +# scan.schema.drop() +# Device.drop_quick() +# session.schema.drop() +# subject.schema.drop() +# surgery.schema.drop() +# lab.schema.drop() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9cc021d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,17 @@ +[tool.pytest.ini_options] +minversion = "6.0" +addopts = "--capture=tee-sys -p no:warnings --dj-teardown False --dj-verbose True --sw --cov=element_volume --cov-report term-missing" +# Verbosity: -v for pytest more verbose +# Warnings: -p no:warnings to disable +# Stepwise: --sw to restart pytest at last failure point +# Debug: --pdb enter debug mode on first failure +# Capturing output: -s for none, --capture=tee-sys for both stdout and stderr +# Coverage: --cov={package} - package for which we're measuring coverage +# Coverage report: --cov-report term-missing send report to stdout with line numbers of missed +# Custom teardown: --dj-teardown {True, False} to teardown tables, +# Custom verbose: --dj-verbose {True,False} print out dj info like table inserts +testpaths = [ + "tests", +] +norecursedirs = ["docs", "*.egg-info", ".git"] +# PYTHONDONTWRITEBYTECODE=1 # Setting this env variable will speed up pytest diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3abe6c3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +datajoint>=0.13.0 +element-animal +element-lab +element-session +element-volume +intern +# boss-ingest diff --git a/requirements_dev.txt b/requirements_dev.txt new file mode 100755 index 0000000..9955dec --- /dev/null +++ b/requirements_dev.txt @@ -0,0 +1,2 @@ +pytest +pytest-cov diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..032205d --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +from os import path + +from setuptools import find_packages, setup + +pkg_name = "workflow_volume" +here = path.abspath(path.dirname(__file__)) + +long_description = """ +# Workflow for volumetric data +""" + +with open(path.join(here, "requirements.txt")) as f: + requirements = f.read().splitlines() + +with open(path.join(here, pkg_name, "version.py")) as f: + exec(f.read()) + +setup( + name="workflow-volume", + version=__version__, # noqa: F821 + description="DataJoint Workflow for Element Volume", + long_description=long_description, + author="DataJoint", + author_email="info@datajoint.com", + license="MIT", + url="https://github.com/datajoint/workflow-volume", + keywords="neuroscience volumetric BossDB datajoint", + packages=find_packages(exclude=["contrib", "docs", "tests*"]), + install_requires=requirements, +) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..26efb8f --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,225 @@ +import logging +import os +import pathlib +from contextlib import nullcontext +from pathlib import Path + +import datajoint as dj +import pytest +from element_interface.utils import QuietStdOut, find_full_path, value_to_bool + +from workflow_volume.ingest import ingest_sessions +from workflow_volume.paths import get_vol_root_data_dir + +# ------------------- SOME CONSTANTS ------------------- + + +logger = logging.getLogger("datajoint") + +pathlib.Path("./tests/user_data").mkdir(exist_ok=True) +pathlib.Path("./tests/user_data/lab").mkdir(exist_ok=True) + +sessions_dirs = [ + "subject1/session1", + "subject2/session1", + "subject2/session2", + "subject3/session1", + "subject4/experiment1", + "subject5/session1", + "subject6/session1", +] + + +def pytest_addoption(parser): + """ + Permit constants when calling pytest at command line e.g., pytest --dj-verbose False + + Arguments: + --dj-verbose (bool): Default True. Pass print statements from Elements. + --dj-teardown (bool): Default True. Delete pipeline on close. + --dj-datadir (str): Default ./tests/user_data. Relative path of test CSV data. + """ + parser.addoption( + "--dj-verbose", + action="store", + default="True", + help="Verbose for dj items: True or False", + choices=("True", "False"), + ) + parser.addoption( + "--dj-teardown", + action="store", + default="True", + help="Verbose for dj items: True or False", + choices=("True", "False"), + ) + parser.addoption( + "--dj-datadir", + action="store", + default="./tests/user_data", + help="Relative path for saving tests data", + ) + + +@pytest.fixture(autouse=True, scope="session") +def setup(request): + """Take passed commandline variables, set as global""" + global verbose, _tear_down, test_user_data_dir, verbose_context + + verbose = value_to_bool(request.config.getoption("--dj-verbose")) + _tear_down = value_to_bool(request.config.getoption("--dj-teardown")) + test_user_data_dir = Path(request.config.getoption("--dj-datadir")) + test_user_data_dir.mkdir(exist_ok=True) + + if not verbose: + logging.getLogger("datajoint").setLevel(logging.CRITICAL) + + verbose_context = nullcontext() if verbose else QuietStdOut() + + yield verbose_context, verbose + + +# -------------------- HELPER CLASS -------------------- + + +def null_function(*args, **kwargs): + pass + + +# ---------------------- FIXTURES ---------------------- + + +@pytest.fixture(autouse=True, scope="session") +def dj_config(setup): + """If dj_local_config exists, load""" + if pathlib.Path("./dj_local_conf.json").exists(): + dj.config.load("./dj_local_conf.json") + dj.config.update( + { + "safemode": False, + "database.host": os.environ.get("DJ_HOST") or dj.config["database.host"], + "database.password": os.environ.get("DJ_PASS") + or dj.config["database.password"], + "database.user": os.environ.get("DJ_USER") or dj.config["database.user"], + "custom": { + "ephys_mode": ( + os.environ.get("EPHYS_MODE") or dj.config["custom"]["ephys_mode"] + ), + "database.prefix": ( + os.environ.get("DATABASE_PREFIX") + or dj.config["custom"]["database.prefix"] + ), + "ephys_root_data_dir": ( + os.environ.get("EPHYS_ROOT_DATA_DIR").split(",") + if os.environ.get("EPHYS_ROOT_DATA_DIR") + else dj.config["custom"]["ephys_root_data_dir"] + ), + }, + } + ) + return + + +@pytest.fixture(autouse=True, scope="session") +def test_data(dj_config): + """If data does not exist or partial data is present, + attempt download with DJArchive to the first listed root directory""" + test_data_exists = True + + for p in sessions_dirs: + try: + find_full_path(get_vol_root_data_dir(), p) + except FileNotFoundError: + test_data_exists = False # If data not found + break + + if not test_data_exists: # attempt to djArchive dowload + try: + dj.config["custom"].update( + { + "djarchive.client.endpoint": os.environ[ + "DJARCHIVE_CLIENT_ENDPOINT" + ], + "djarchive.client.bucket": os.environ["DJARCHIVE_CLIENT_BUCKET"], + "djarchive.client.access_key": os.environ[ + "DJARCHIVE_CLIENT_ACCESSKEY" + ], + "djarchive.client.secret_key": os.environ[ + "DJARCHIVE_CLIENT_SECRETKEY" + ], + } + ) + except KeyError as e: + raise FileNotFoundError( + "Full test data not available.\nAttempting to download from DJArchive," + + " but no credentials found in environment variables.\nError:" + + str(e) + ) + + import djarchive_client + + client = djarchive_client.client() + + test_data_dir = get_vol_root_data_dir() + if isinstance(test_data_dir, list): # if multiple root dirs, first + test_data_dir = test_data_dir[0] + + client.download( + "workflow-array-ephys-benchmark", + "v2", + str(test_data_dir), + create_target=False, + ) + return + + +@pytest.fixture(autouse=True, scope="session") +def pipeline(): + from workflow_volume import pipeline + + yield { + "subject": pipeline.subject, + "lab": pipeline.lab, + "ephys": pipeline.ephys, + "probe": pipeline.probe, + "ephys_report": pipeline.ephys_report, + "session": pipeline.session, + "get_vol_root_data_dir": pipeline.get_vol_root_data_dir, + "ephys_mode": pipeline.ephys_mode, + } + + if _tear_down: + with verbose_context: + pipeline.subject.Subject.delete() + + +@pytest.fixture(scope="session") +def ingest_data(setup, pipeline, test_data): + """For each input, generates csv in test_user_data_dir and ingests in schema""" + # CSV as list of 3: filename, relevant tables, content + all_csvs = { + "file.csv": { + "func": null_function, + "args": {}, + "content": ["header,one,two", "info,a,b"], + }, + "session.csv": { + "func": ingest_sessions, + "args": {}, + "content": ["header,one,two", "info,a,b"], + }, + } + # If data in last table, presume didn't tear down last time, skip insert + if len(pipeline["ephys"].Clustering()) == 0: + for csv_filename, csv_dict in all_csvs.items(): + csv_path = test_user_data_dir / csv_filename # add prefix for rel path + Path(csv_path).write_text("\n".join(csv_dict["content"]) + "\n") + csv_dict["func"](verbose=verbose, skip_duplicates=True, **csv_dict["args"]) + + yield all_csvs + + if _tear_down: + with verbose_context: + for csv in all_csvs: + csv_path = test_user_data_dir / csv + csv_path.unlink() diff --git a/tests/test_ingest.py b/tests/test_ingest.py new file mode 100644 index 0000000..916c3a7 --- /dev/null +++ b/tests/test_ingest.py @@ -0,0 +1,4 @@ +def test_ingest_volume(pipeline, ingest_data): + """Check length of various Volume schema tables""" + volume = pipeline["Volume"] + assert len(volume.Volume()) == 2, f"Check Volume: len={len(volume.Volume())}" diff --git a/tests/test_pipeline_generation.py b/tests/test_pipeline_generation.py new file mode 100644 index 0000000..7e7d740 --- /dev/null +++ b/tests/test_pipeline_generation.py @@ -0,0 +1,18 @@ +def test_generate_pipeline(pipeline): + session = pipeline["session"] + volume = pipeline["volume"] + + volume_children = volume.volume.children() + assert volume.Volume.full_table_name in volume_children + assert session.Session.volume.full_table_name in volume_children + + # test connection Subject -> schema children + session_children_links = session.Session.children() + session_children_list = [ + volume.Volume, + ] + + for child in session_children_list: + assert ( + child.full_table_name in session_children_links + ), f"session.Session.children() did not include {child.full_table_name}" diff --git a/user_data/sessions.csv b/user_data/sessions.csv new file mode 100644 index 0000000..1d3ccb1 --- /dev/null +++ b/user_data/sessions.csv @@ -0,0 +1,2 @@ +subject,session_dir,session_id,session_datetime +subject8,,1,2022-05-05 12:13:14 diff --git a/user_data/subjects.csv b/user_data/subjects.csv new file mode 100644 index 0000000..a2e14a6 --- /dev/null +++ b/user_data/subjects.csv @@ -0,0 +1,2 @@ +subject,sex,subject_birth_date,subject_description +subject8,F,2023-03-03,EM data collection diff --git a/workflow_volume/__init__.py b/workflow_volume/__init__.py new file mode 100644 index 0000000..111bd2f --- /dev/null +++ b/workflow_volume/__init__.py @@ -0,0 +1,6 @@ +import datajoint as dj + +if "custom" not in dj.config: + dj.config["custom"] = {} + +db_prefix = dj.config["custom"].get("database.prefix", "") diff --git a/workflow_volume/ingest.py b/workflow_volume/ingest.py new file mode 100644 index 0000000..8706caa --- /dev/null +++ b/workflow_volume/ingest.py @@ -0,0 +1,48 @@ +from element_interface.utils import ingest_csv_to_table + +from .pipeline import session, subject + + +def ingest_subjects( + subject_csv_path: str = "./user_data/subjects.csv", + skip_duplicates: bool = True, + verbose: bool = True, +): + """Inserts ./user_data/subject.csv data into corresponding subject schema tables + + Args: + subject_csv_path (str): relative path of subject csv + skip_duplicates (bool): Default True. Passed to DataJoint insert + verbose (bool): Display number of entries inserted when ingesting + """ + csvs = [subject_csv_path] + tables = [subject.Subject()] + ingest_csv_to_table(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose) + + +def ingest_sessions( + session_csv_path: str = "./user_data/sessions.csv", + skip_duplicates: bool = True, + verbose: bool = True, +): + """ + Inserts data from a sessions csv into corresponding session schema tables + By default, uses data from workflow_session/user_data/session/ + session_csv_path (str): relative path of session csv + skip_duplicates (bool): Default True. See DataJoint `insert` function + verbose (bool): Print number inserted (i.e., table length change) + """ + csvs = [ + session_csv_path, + session_csv_path, + ] + tables = [ + session.Session(), + session.SessionDirectory(), + ] + + ingest_csv_to_table(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose) + + +if __name__ == "__main__": + ingest_sessions() diff --git a/workflow_volume/paths.py b/workflow_volume/paths.py new file mode 100644 index 0000000..21e44be --- /dev/null +++ b/workflow_volume/paths.py @@ -0,0 +1,41 @@ +from collections import Sequence +from typing import List + +import datajoint as dj + + +def get_session_directory(session_key: dict) -> str: + """Return relative path from SessionDirectory table given key + + Args: + session_key (dict): Key uniquely identifying a session + + Returns: + path (str): Relative path of session directory + """ + from .pipeline import session + + # NOTE: fetch (vs. fetch1) permits dir to not exist, may be the case when saving + # slices directly from from BossDB into inferred dir based on BossDB structure + session_dir = (session.SessionDirectory & session_key).fetch("session_dir") + + if len(session_dir) > 1: + raise ValueError( + f"Found >1 directory for this key:\n\t{session_key}\n\t{session_dir}" + ) + elif len(session_dir) == 1: + return session_dir[0] + else: + return None + + +def get_vol_root_data_dir() -> List[str]: + """Return root directory for ephys from 'vol_root_data_dir' in dj.config + + Returns: + path (any): List of path(s) if available or None + """ + roots = dj.config.get("custom", {}).get("vol_root_data_dir", None) + if not isinstance(roots, Sequence): + roots = [roots] + return roots diff --git a/workflow_volume/pipeline.py b/workflow_volume/pipeline.py new file mode 100644 index 0000000..c8f3999 --- /dev/null +++ b/workflow_volume/pipeline.py @@ -0,0 +1,62 @@ +from element_animal import subject, surgery +from element_animal.subject import Subject # Dependency for session +from element_animal.surgery import BrainRegion # Dependency for imaging +from element_calcium_imaging import imaging, imaging_report, scan +from element_lab import lab +from element_lab.lab import Lab, Project, Protocol, Source, User # Deps for Subject +from element_session import session_with_id as session +from element_session.session_with_id import Session, SessionDirectory +from element_volume import bossdb, volume +from element_volume.bossdb import BossDBURLs +from element_volume.readers.bossdb import BossDBInterface + +from . import db_prefix +from .paths import get_session_directory, get_vol_root_data_dir +from .reference import Device + +__all__ = [ + "db_prefix", + "lab", + "scan", + "imaging", + "imaging_report", + "session", + "subject", + "surgery", + "bossdb", + "volume", + "Device", + "Lab", + "Project", + "Protocol", + "User", + "Source", + "Session", + "SessionDirectory", + "Subject", + "BrainRegion", + "BossDBURLs", + "BossDBInterface", + "get_session_directory", + "get_vol_root_data_dir", +] + +# ---------------------------------- Activate schemas ---------------------------------- + +lab.activate(db_prefix + "lab") + +# subject.activate(db_prefix + "subject", linking_module=__name__) +surgery.activate(db_prefix + "subject", db_prefix + "surgery", linking_module=__name__) + +Experimenter = lab.User +session.activate(db_prefix + "session", linking_module=__name__) + +Equipment = Device +Location = BrainRegion +imaging.activate(db_prefix + "imaging", db_prefix + "scan", linking_module=__name__) + +bossdb.activate(db_prefix + "bossdb") + +URLs = bossdb.BossDBURLs +Mask = imaging.Segmentation.Mask +volume.activate(db_prefix + "volume", linking_module=__name__) diff --git a/workflow_volume/reference.py b/workflow_volume/reference.py new file mode 100644 index 0000000..1bfde38 --- /dev/null +++ b/workflow_volume/reference.py @@ -0,0 +1,27 @@ +import datajoint as dj + +from . import db_prefix + +schema = dj.Schema(db_prefix + "reference") + + +@schema +class Device(dj.Lookup): + """Table for managing lab equipment. + + Attributes: + device ( varchar(32) ): Device short name. + modality ( varchar(64) ): Modality for which this device is used. + description ( varchar(256) ): Optional. Description of device. + """ + + definition = """ + device : varchar(32) + --- + modality : varchar(64) + description=null : varchar(256) + """ + contents = [ + ["scanner1", "calcium imaging", ""], + ["scanner2", "calcium imaging", ""], + ] diff --git a/workflow_volume/version.py b/workflow_volume/version.py new file mode 100644 index 0000000..bbc2222 --- /dev/null +++ b/workflow_volume/version.py @@ -0,0 +1,2 @@ +"""Package metadata.""" +__version__ = "0.0.0" From caa282355afc77f7a0dea8b6b990384d63d27a1d Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Thu, 13 Apr 2023 13:57:34 -0500 Subject: [PATCH 02/62] Make workflow compatible with latest version of the element --- .gitignore | 4 + notebooks/01-Configure.ipynb | 28 +- notebooks/02-WorkflowStructure_Optional.ipynb | 259 +++++------------ notebooks/03_Explore.ipynb | 267 +++++------------- notebooks/04-Drop_Optional.ipynb | 120 -------- requirements.txt | 4 +- .../__init__.py | 0 .../ingest.py | 0 {workflow_volume => workflow_zstack}/paths.py | 2 +- .../pipeline.py | 15 +- .../reference.py | 0 .../version.py | 0 12 files changed, 174 insertions(+), 525 deletions(-) delete mode 100644 notebooks/04-Drop_Optional.ipynb rename {workflow_volume => workflow_zstack}/__init__.py (100%) rename {workflow_volume => workflow_zstack}/ingest.py (100%) rename {workflow_volume => workflow_zstack}/paths.py (96%) rename {workflow_volume => workflow_zstack}/pipeline.py (86%) rename {workflow_volume => workflow_zstack}/reference.py (100%) rename {workflow_volume => workflow_zstack}/version.py (100%) diff --git a/.gitignore b/.gitignore index 130f191..c454817 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# User files and data directories +example_data/ +test.ipynb + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/notebooks/01-Configure.ipynb b/notebooks/01-Configure.ipynb index 250c250..fee3bac 100644 --- a/notebooks/01-Configure.ipynb +++ b/notebooks/01-Configure.ipynb @@ -85,10 +85,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2023-03-16 10:42:37,445][INFO]: Connecting root@localhost:3306\n", + "[2023-03-16 10:42:37,466][INFO]: Connected root@localhost:3306\n" + ] + }, + { + "data": { + "text/plain": [ + "DataJoint connection (connected) root@localhost:3306" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "import datajoint as dj \n", "dj.conn()" ] }, @@ -146,7 +166,7 @@ "metadata": {}, "outputs": [], "source": [ - "dj.config[\"custom\"] = {\"vol_root_data_dir\": [\"/tmp/test_data/\", \"/tmp/example/\"]}" + "dj.config[\"custom\"] = {\"vol_root_data_dir\": [\"C:/Users/kusha/elementsRepos/workflow-bossdb/example_data\"]}" ] }, { @@ -228,7 +248,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.12" }, "vscode": { "interpreter": { diff --git a/notebooks/02-WorkflowStructure_Optional.ipynb b/notebooks/02-WorkflowStructure_Optional.ipynb index 2832cc5..b47a275 100644 --- a/notebooks/02-WorkflowStructure_Optional.ipynb +++ b/notebooks/02-WorkflowStructure_Optional.ipynb @@ -67,42 +67,38 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2023-03-28 10:43:18,127][INFO]: Connecting kushalbakshitest@tutorial-db.datajoint.io:3306\n", + "[2023-03-28 10:43:18,619][INFO]: Connected kushalbakshitest@tutorial-db.datajoint.io:3306\n" + ] + } + ], "source": [ "import datajoint as dj\n", - "from workflow_volume.pipeline import lab, subject, session, volume, bossdb\n", - "\n", + "from workflow_volume.pipeline import lab, subject, session, scan, volume\n", "# dj.list_schemas()" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['#resolution',\n", - " '#connectome_paramset',\n", - " '#segmentation_paramset',\n", - " '_connectome__connection',\n", - " 'volume',\n", - " 'segmentation_task',\n", - " '_segmentation',\n", - " '_segmentation__cell',\n", - " 'connectome_task',\n", - " '__cell_mapping',\n", - " '_connectome']" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], + "source": [ + "dj.list_schemas()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "volume.schema.list_tables()" ] @@ -120,25 +116,43 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from element_volume.bossdb import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from element_volume.volume import *" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "metadata": { "title": "`dj.Diagram()`: plot tables and dependencies" }, "outputs": [ { "data": { - "image/svg+xml": "\n\n\n\n\n20\n\n20\n\n\n\nvolume.Connectome.Connection\n\n\nvolume.Connectome.Connection\n\n\n\n\n\n20->volume.Connectome.Connection\n\n\n\n\n21\n\n21\n\n\n\n21->volume.Connectome.Connection\n\n\n\n\nbossdb.BossDBURLs.Segmentation\n\n\nbossdb.BossDBURLs.Segmentation\n\n\n\n\n\nvolume.SegmentationTask\n\n\nvolume.SegmentationTask\n\n\n\n\n\nbossdb.BossDBURLs.Segmentation->volume.SegmentationTask\n\n\n\n\nbossdb.BossDBURLs.Connectome\n\n\nbossdb.BossDBURLs.Connectome\n\n\n\n\n\nvolume.ConnectomeTask\n\n\nvolume.ConnectomeTask\n\n\n\n\n\nbossdb.BossDBURLs.Connectome->volume.ConnectomeTask\n\n\n\n\nvolume.SegmentationParamset\n\n\nvolume.SegmentationParamset\n\n\n\n\n\nvolume.SegmentationParamset->volume.SegmentationTask\n\n\n\n\nvolume.ConnectomeParamset\n\n\nvolume.ConnectomeParamset\n\n\n\n\n\nvolume.ConnectomeParamset->volume.ConnectomeTask\n\n\n\n\nvolume.Segmentation.Cell\n\n\nvolume.Segmentation.Cell\n\n\n\n\n\nvolume.Segmentation.Cell->20\n\n\n\n\nvolume.Segmentation.Cell->21\n\n\n\n\nvolume.CellMapping\n\n\nvolume.CellMapping\n\n\n\n\n\nvolume.Segmentation.Cell->volume.CellMapping\n\n\n\n\nvolume.Connectome\n\n\nvolume.Connectome\n\n\n\n\n\nbossdb.BossDBURLs.Volume\n\n\nbossdb.BossDBURLs.Volume\n\n\n\n\n\nvolume.Volume\n\n\nvolume.Volume\n\n\n\n\n\nbossdb.BossDBURLs.Volume->volume.Volume\n\n\n\n\nvolume.ConnectomeTask->volume.Connectome\n\n\n\n\nvolume.Segmentation\n\n\nvolume.Segmentation\n\n\n\n\n\nvolume.Segmentation->volume.Segmentation.Cell\n\n\n\n\nvolume.Segmentation->volume.ConnectomeTask\n\n\n\n\nvolume.Resolution\n\n\nvolume.Resolution\n\n\n\n\n\nvolume.Resolution->volume.Volume\n\n\n\n\nvolume.SegmentationTask->volume.Segmentation\n\n\n\n\nbossdb.BossDBURLs\n\n\nbossdb.BossDBURLs\n\n\n\n\n\nbossdb.BossDBURLs->bossdb.BossDBURLs.Segmentation\n\n\n\n\nbossdb.BossDBURLs->bossdb.BossDBURLs.Volume\n\n\n\n\nvolume.Volume->volume.SegmentationTask\n\n\n\n", + "image/svg+xml": "\n\n\n\n\nvolume.Volume\n\n\nvolume.Volume\n\n\n\n\n\nvolume.SegmentationTask\n\n\nvolume.SegmentationTask\n\n\n\n\n\nvolume.Volume->volume.SegmentationTask\n\n\n\n\nvolume.BossDBURLs\n\n\nvolume.BossDBURLs\n\n\n\n\n\nvolume.Volume->volume.BossDBURLs\n\n\n\n\nvolume.Segmentation\n\n\nvolume.Segmentation\n\n\n\n\n\nvolume.Segmentation.Mask\n\n\nvolume.Segmentation.Mask\n\n\n\n\n\nvolume.Segmentation->volume.Segmentation.Mask\n\n\n\n\nvolume.SegmentationParamset\n\n\nvolume.SegmentationParamset\n\n\n\n\n\nvolume.SegmentationParamset->volume.SegmentationTask\n\n\n\n\nsession.Session\n\n\nsession.Session\n\n\n\n\n\nscan.Scan\n\n\nscan.Scan\n\n\n\n\n\nsession.Session->scan.Scan\n\n\n\n\nscan.Scan->volume.Volume\n\n\n\n\nvolume.SegmentationTask->volume.Segmentation\n\n\n\n", "text/plain": [ - "" + "" ] }, - "execution_count": 5, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "dj.Diagram(volume) + dj.Diagram(bossdb)" + "dj.Diagram(session.Session) + dj.Diagram(scan.Scan) + dj.Diagram(volume)" ] }, { @@ -149,6 +163,15 @@ "`volume.Volume` is a central table where volumetric data can be ingested, either from images on disk or downloaded from BossDB. The various *task* tables can be used to cue up analysis or ingestion of the various subsequent data types (i.e., segmentation and connectome data. Each segmented cell can be matched with data from another source (e.g., Element Calcium Imaging's `Segmentation.Mask`)." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.schema" + ] + }, { "cell_type": "code", "execution_count": null, @@ -196,165 +219,31 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "title": "Each datajoint table class inside the module corresponds to a table inside the schema. For example, the class `ephys.EphysRecording` correponds to the table `_ephys_recording` in the schema `neuro_ephys` in the database." }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " Resolution of stored data\n", - "
\n", - "
\n", - " \n", - " \n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "

resolution_id

\n", - " Shorthand for convention\n", - "
\n", - "

voxel_unit

\n", - " e.g., nanometers\n", - "
\n", - "

voxel_z_size

\n", - " size of one z dimension voxel in voxel_units\n", - "
\n", - "

voxel_y_size

\n", - " size of one y dimension voxel in voxel_units\n", - "
\n", - "

voxel_x_size

\n", - " size of one x dimension voxel in voxel_units\n", - "
\n", - "

downsampling

\n", - " Downsampling iterations relative to raw data\n", - "
990nmmicrometers1.00.50.50
\n", - " \n", - "

Total: 1

\n", - " " - ], - "text/plain": [ - "*resolution_id voxel_unit voxel_z_size voxel_y_size voxel_x_size downsampling \n", - "+------------+ +------------+ +------------+ +------------+ +------------+ +------------+\n", - "990nm micrometers 1.0 0.5 0.5 0 \n", - " (Total: 1)" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "volume.Resolution()" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "title": "`heading`: show table attributes regardless of foreign key references." }, - "outputs": [ - { - "data": { - "text/plain": [ - "# Dataset of a contiguous volume\n", - "volume_id : varchar(32) # shorthand for this volume\n", - "resolution_id : varchar(32) # Shorthand for convention\n", - "---\n", - "subject=null : varchar(8) # \n", - "session_id=null : int # \n", - "z_size : int # total number of voxels in z dimension\n", - "y_size : int # total number of voxels in y dimension\n", - "x_size : int # total number of voxels in x dimension\n", - "slicing_dimension=\"z\" : enum('x','y','z') # perspective of slices\n", - "channel : varchar(64) # data type or modality\n", - "collection_experiment=null : varchar(64) # \n", - "url=null : varchar(64) # \n", - "volume_data=null : longblob # Upload assumes (Z, Y, X) np.array" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "volume.Volume.heading" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'-> volume.SegmentationTask\\n---\\nsegmentation_data=null : longblob \\n'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "volume.Segmentation.describe()" ] @@ -387,31 +276,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": { "title": "[session](https://github.com/datajoint/element-session): experimental session information" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-> subject.Subject\n", - "session_datetime : datetime \n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "'-> subject.Subject\\nsession_datetime : datetime \\n'" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "session.Session.describe()" ] @@ -449,7 +318,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.9.12" }, "vscode": { "interpreter": { diff --git a/notebooks/03_Explore.ipynb b/notebooks/03_Explore.ipynb index 43304cf..6ab7d9c 100644 --- a/notebooks/03_Explore.ipynb +++ b/notebooks/03_Explore.ipynb @@ -64,22 +64,27 @@ "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Connecting cbroz@dss-db.datajoint.io:3306\n" + "ename": "ModuleNotFoundError", + "evalue": "No module named 'element_volume'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[2], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mdatajoint\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mdj\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mdatetime\u001b[39;00m \u001b[39mimport\u001b[39;00m datetime\n\u001b[1;32m----> 3\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39melement_volume\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mexport\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mbossdb\u001b[39;00m \u001b[39mimport\u001b[39;00m BossDBUpload\n\u001b[0;32m 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mworkflow_volume\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpipeline\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 5\u001b[0m lab,\n\u001b[0;32m 6\u001b[0m subject,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 11\u001b[0m get_vol_root_data_dir,\n\u001b[0;32m 12\u001b[0m )\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'element_volume'" ] } ], "source": [ "import datajoint as dj\n", "from datetime import datetime\n", + "from element_volume.export.bossdb import BossDBUpload\n", "from workflow_volume.pipeline import (\n", " lab,\n", " subject,\n", " session,\n", + " scan,\n", " volume,\n", - " bossdb,\n", " get_session_directory,\n", " get_vol_root_data_dir,\n", ")" @@ -115,37 +120,35 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "# \n", - "subject : varchar(32) # \n", - "session_datetime : datetime(3) # " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "subject.Subject.insert1(\n", " dict(subject=\"sub1\", sex=\"M\", subject_birth_date=datetime.now()),\n", " skip_duplicates=True,\n", ")\n", - "session_key = (subject.Subject & \"subject='sub1'\").fetch1(\"KEY\")\n", + "session_key = dict(\n", + " subject=\"sub1\",\n", + " session_id=1,\n", + ")\n", "session.Session.insert1(\n", " dict(\n", - " **session_key,\n", - " session_id=1,\n", + " session_key,\n", " session_datetime=datetime.now(),\n", " ),\n", " skip_duplicates=True,\n", ")\n", "session.SessionDirectory.insert1(\n", - " dict(**session.Session.fetch1(\"KEY\"), session_dir=\"\"),\n", + " dict(**session.Session.fetch1(\"KEY\"), session_dir=\"\"),\n", " skip_duplicates=True,\n", - ")" + ")\n", + "scan.Scan.insert1(\n", + " dict(\n", + " session_key,\n", + " scan_id=1,\n", + " acq_software=\"ScanImage\",\n", + " ), skip_duplicates=True\n", + ")\n", + "scan_key = (scan.Scan & \"subject = 'sub1'\").fetch1(\"KEY\")" ] }, { @@ -167,53 +170,12 @@ "data_path = find_full_path(get_vol_root_data_dir(), get_session_directory(session_key))" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "### Element Volume Tables\n", - "\n", - "#### Uploading\n" - ] - }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "The `Resolution` table keeps track details related to data collection, including units and size in each dimension. `downsampling` indicates number of times the dataset has been compressed by taking every other pixel. Within BossDB, resolution 3 data (here, `downsampling` 3) reflects every 8th pixel, for example.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "volume.Resolution.insert1(\n", - " dict(\n", - " resolution_id=\"990nm\",\n", - " voxel_unit=\"micrometers\",\n", - " voxel_z_size=1,\n", - " voxel_y_size=0.5,\n", - " voxel_x_size=0.5,\n", - " downsampling=0,\n", - " ),\n", - " skip_duplicates=True,\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "BossDB operates with a hierarchy of collections, experiments, and channels. A collection spans multiple experiments. An experiment may collect one or more channels, including electron micrioscopy data, segmentation annotations, and connectome data. These form the portions of a BossDB URL.\n", - "\n", - "Here, we choose some example values. With the proper permissions, we can create a BossDB dataset right from our Python environment.\n" + "Now we can populate the `Volume` table." ] }, { @@ -222,23 +184,7 @@ "metadata": {}, "outputs": [], "source": [ - "collection, experiment, volume, segmentation = (\n", - " \"DataJointTest\",\n", - " \"test\",\n", - " \"CalciumImaging\",\n", - " \"Segmented\",\n", - ")\n", - "\n", - "bossdb.BossDBURLs.load_bossdb_info(\n", - " collection=collection,\n", - " experiment=experiment,\n", - " volume=volume,\n", - " segmentation=segmentation,\n", - " skip_duplicates=True,\n", - ")\n", - "url_key = (\n", - " bossdb.BossDBURLs.Volume & dict(collection_experiment=f\"{collection}/{experiment}\")\n", - ").fetch1()" + "volume.Volume.populate(display_progress=True)" ] }, { @@ -246,42 +192,31 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The `load_sample_data` function below provides a template for loading a multi-page tif file and saving it into individual Z-axis images.\n", - "\n", - "In the next step, we can choose to upload to BossDB either with individual images in a directory or through an image volume in memory. To store the volume data in the table, replace the contents below with a function that loads your data.\n", - "\n", - "Note: BossDB only accepts image data as `uint8` or `uint16` numpy arrays.\n" + "Next, we'll define a pameter set for segmentation with cellpose in the\n", + "`SegmentationParamset` table, and insert an entry into the `SegmentationTask`\n", + "table. " ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "def load_sample_data():\n", - " from tifffile import TiffFile\n", - " from PIL import Image\n", - " from pathlib import Path\n", - "\n", - " root_dir = get_vol_root_data_dir()[0]\n", - " image_fp = root_dir + \"/.tif\"\n", - " png_fp = root_dir + \"sample/Z%02d.png\" # Z-plane\n", - " image_sample = TiffFile(image_fp).asarray()\n", - "\n", - " image_sample = image_sample.astype(\"uint16\")\n", - " if not Path(png_fp % 0).exists():\n", - " for z in range(20):\n", - " Image.fromarray(image_sample[z]).save(png_fp % z)\n", - " return image_sample" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, we can insert into the `Volume` table." + "volume.SegmentationParamset.insert_new_params(\n", + " segmentation_method=\"cellpose\",\n", + " paramset_idx=1,\n", + " params=dict(\n", + " diameter = 8,\n", + " min_size = 2,\n", + " do_3d = False,\n", + " anisotropy = 0.5,\n", + " model_type = \"nuclei\",\n", + " channels = [[0, 0]],\n", + " z_axis = 0,\n", + " skip_duplicates=True,\n", + " ),\n", + ")" ] }, { @@ -290,21 +225,12 @@ "metadata": {}, "outputs": [], "source": [ - "raw_data = load_sample_data()\n", - "raw_data_shape = raw_data.shape\n", - "volume_key = dict(volume_id=\"Thy1\", resolution_id=\"990nm\")\n", - "volume.Volume.insert1(\n", + "volume.SegmentationTask.insert1(\n", " dict(\n", - " **volume_key,\n", - " session_id=1,\n", - " z_size=raw_data_shape[0],\n", - " y_size=raw_data_shape[1],\n", - " x_size=raw_data_shape[2],\n", - " channel=volume,\n", - " **url_key,\n", - " volume_data=raw_data,\n", - " ),\n", - " skip_duplicates=True,\n", + " scan_key,\n", + " paramset_idx=1,\n", + " task_mode=\"trigger\",\n", + " ), skip_duplicates=True\n", ")" ] }, @@ -313,7 +239,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Finally, we can upload our data either from the data stored in the table or a path to images. If this entry is already associated with a `SessionDirectory` entry, we'll look for images in this path.\n" + "Now, we can popluate the `Segmentation` table." ] }, { @@ -322,18 +248,7 @@ "metadata": {}, "outputs": [], "source": [ - "# For other optional parameters, see additional docstring info here:\n", - "# element_volume.export.bossdb.BossDBUpload\n", - "volume.Volume.upload(volume_key, upload_from=\"table\")\n", - "# volume.Volume.upload(volume_key, upload_from=\"dir\", data_extension=\"*pattern*.png\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Download" + "volume.Segmentation.populate()" ] }, { @@ -341,7 +256,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The `Volume` and `BossDBURLs` tables offer additional class methods for downloading BossDB data or returning objects for interacting with the data.\n" + "Finally, we can upload our data either from the data stored in the table or a path to images. If this entry is already associated with a `SessionDirectory` entry, we'll look for images in this path.\n" ] }, { @@ -350,13 +265,7 @@ "metadata": {}, "outputs": [], "source": [ - "bossdb.BossDBURLs.load_bossdb_info(\n", - " collection=\"Kasthuri\",\n", - " experiment=\"ac4\",\n", - " volume=\"em\",\n", - " segmentation=\"neuron\",\n", - " skip_duplicates=True,\n", - ")" + "import numpy as np" ] }, { @@ -365,59 +274,24 @@ "metadata": {}, "outputs": [], "source": [ - "# For other optional parameters, see additional docstring info here:\n", - "# element_volume.readers.bossdb.BossDBInterface.load_data_into_element\n", - "volume.Volume.download(\n", - " \"bossdb://witvliet2020/Dataset_1/em\",\n", - " downsampling=3,\n", - " slice_key=\"[100:120,1000:1500,1000:1500]\",\n", - " save_images=True,\n", - " save_ndarray=True,\n", - " image_mode=\"P\",\n", - " skip_duplicates=True,\n", - ")\n", - "data = volume.Volume.return_bossdb_data(\n", - " volume_key=dict(volume_id=\"witvliet2020/Dataset_1\")\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To load segmentation data, we can set the `task_mode` to load and add additional pararameters to the `SegmentationParamset` table." + "BossDBUpload(url='bossdb://dataJointTestUpload/CaImaging/test1-seg', data_dir=r\"C:\\Users\\kusha\\elementsRepos\\workflow-bossdb\\example_data\\Animal1_day1_tdtomato_seg-001.npy\", voxel_size=[1, 1, 1], voxel_units=\"nanometers\", shape_zyx=[536, 709, 600], raw_data=np.load(\"./example_data/Animal1_day1_tdtomato_seg-001.npy\", allow_pickle=True).item().get('masks')[0], data_type=\"annotation\")\n" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "volume.SegmentationParamset.insert_new_params(\n", - " segmentation_method=\"bossdb\",\n", - " paramset_idx=1,\n", - " params=dict(\n", - " slice_key=\"[100:120,1000:1500,1000:1500]\",\n", - " save_images=True,\n", - " save_ndarray=True,\n", - " image_mode=\"P\",\n", - " skip_duplicates=True,\n", - " ),\n", - ")\n", - "volume.SegmentationTask.insert1(\n", - " dict(\n", - " volume_id=\"witvliet2020/Dataset_1\",\n", - " resolution_id=0,\n", - " task_mode=\"load\",\n", - " paramset_idx=1,\n", - " **(\n", - " bossdb.BossDBURLs.Segmentation & \"collection_experiment LIKE 'wit%'\"\n", - " ).fetch1(),\n", - " )\n", - ")\n", - "volume.Segmentation.populate()" + "def get_neuroglancer_url(collection, experiment, channel):\n", + " base_url = f\"boss://https://api.bossdb.io/{collection}/{experiment}/{channel}\"\n", + " return (\n", + " \"https://neuroglancer.bossdb.io/#!{'layers':{'\"\n", + " + f'{experiment}'\n", + " + \"':{'source':'\"\n", + " + base_url + \"','name':'\"\n", + " + f'{channel}' + \"'}}}\"\n", + " )" ] }, { @@ -425,7 +299,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the [next notebook](./04-Drop.ipynb), we'll touch on how to drop these various schemas for development.\n" + "Explain how to get permission from APL to upload data. \n", + "\n", + "Create a schema to automatically generate neuroglancer link and insert into DJ\n", + "table. \n", + "\n", + "Include BossDBUpload in BossDBURLs as a computed/imported table. " ] } ], @@ -448,7 +327,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.10.10" }, "vscode": { "interpreter": { diff --git a/notebooks/04-Drop_Optional.ipynb b/notebooks/04-Drop_Optional.ipynb deleted file mode 100644 index c431aef..0000000 --- a/notebooks/04-Drop_Optional.ipynb +++ /dev/null @@ -1,120 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "# DataJoint U24 - Workflow Volume\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Change into the parent directory to find the `dj_local_conf.json` file.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "import os\n", - "import datajoint as dj\n", - "from pathlib import Path\n", - "\n", - "# change to the upper level folder to detect dj_local_conf.json\n", - "if os.path.basename(os.getcwd()) == \"notebooks\":\n", - " os.chdir(\"..\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from workflow_volume.pipeline import (\n", - " imaging_report,\n", - " volume,\n", - " bossdb,\n", - " imaging,\n", - " scan,\n", - " Device,\n", - " session,\n", - " subject,\n", - " surgery,\n", - " lab,\n", - ")\n", - "\n", - "dj.config[\"safemode\"] = True # Set to false to turn off drop confirmation" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Drop schemas\n", - "\n", - "- Schemas are not typically dropped in a production workflow with real data in it.\n", - "- At the developmental phase, it might be required for the table redesign.\n", - "- When dropping all schemas is needed, drop items starting with the most downstream.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# imaging_report.schema.drop()\n", - "# volume.schema.drop()\n", - "# bossdb.schema.drop()\n", - "# imaging.schema.drop()\n", - "# scan.schema.drop()\n", - "# Device.drop_quick()\n", - "# session.schema.drop()\n", - "# subject.schema.drop()\n", - "# surgery.schema.drop()\n", - "# lab.schema.drop()" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,py:percent" - }, - "kernelspec": { - "display_name": "ele", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - }, - "vscode": { - "interpreter": { - "hash": "d00c4ad21a7027bf1726d6ae3a9a6ef39c8838928eca5a3d5f51f3eb68720410" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/requirements.txt b/requirements.txt index 3abe6c3..6519223 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ datajoint>=0.13.0 element-animal element-lab element-session -element-volume +element-calcium-imaging +element-bossdb intern -# boss-ingest diff --git a/workflow_volume/__init__.py b/workflow_zstack/__init__.py similarity index 100% rename from workflow_volume/__init__.py rename to workflow_zstack/__init__.py diff --git a/workflow_volume/ingest.py b/workflow_zstack/ingest.py similarity index 100% rename from workflow_volume/ingest.py rename to workflow_zstack/ingest.py diff --git a/workflow_volume/paths.py b/workflow_zstack/paths.py similarity index 96% rename from workflow_volume/paths.py rename to workflow_zstack/paths.py index 21e44be..6048da2 100644 --- a/workflow_volume/paths.py +++ b/workflow_zstack/paths.py @@ -1,4 +1,4 @@ -from collections import Sequence +from collections.abc import Sequence from typing import List import datajoint as dj diff --git a/workflow_volume/pipeline.py b/workflow_zstack/pipeline.py similarity index 86% rename from workflow_volume/pipeline.py rename to workflow_zstack/pipeline.py index c8f3999..a581b7e 100644 --- a/workflow_volume/pipeline.py +++ b/workflow_zstack/pipeline.py @@ -6,9 +6,8 @@ from element_lab.lab import Lab, Project, Protocol, Source, User # Deps for Subject from element_session import session_with_id as session from element_session.session_with_id import Session, SessionDirectory -from element_volume import bossdb, volume -from element_volume.bossdb import BossDBURLs -from element_volume.readers.bossdb import BossDBInterface +from element_zstack import volume +from element_zstack.export import bossdb from . import db_prefix from .paths import get_session_directory, get_vol_root_data_dir @@ -23,8 +22,8 @@ "session", "subject", "surgery", - "bossdb", "volume", + "bossdb", "Device", "Lab", "Project", @@ -35,8 +34,6 @@ "SessionDirectory", "Subject", "BrainRegion", - "BossDBURLs", - "BossDBInterface", "get_session_directory", "get_vol_root_data_dir", ] @@ -55,8 +52,8 @@ Location = BrainRegion imaging.activate(db_prefix + "imaging", db_prefix + "scan", linking_module=__name__) -bossdb.activate(db_prefix + "bossdb") - -URLs = bossdb.BossDBURLs Mask = imaging.Segmentation.Mask +Scan = scan.Scan + volume.activate(db_prefix + "volume", linking_module=__name__) +bossdb.activate(db_prefix + "bossdb", linking_module=__name__) diff --git a/workflow_volume/reference.py b/workflow_zstack/reference.py similarity index 100% rename from workflow_volume/reference.py rename to workflow_zstack/reference.py diff --git a/workflow_volume/version.py b/workflow_zstack/version.py similarity index 100% rename from workflow_volume/version.py rename to workflow_zstack/version.py From e698194c323b87b0c1f158d0fdede23f9dc6b07c Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Tue, 18 Apr 2023 18:27:51 -0500 Subject: [PATCH 03/62] Updates for changes in the corresponding element --- notebooks/03_Explore.ipynb | 130 ++++++++++++++++++++---------------- workflow_zstack/paths.py | 46 +++++++++++-- workflow_zstack/pipeline.py | 22 ++---- 3 files changed, 119 insertions(+), 79 deletions(-) diff --git a/notebooks/03_Explore.ipynb b/notebooks/03_Explore.ipynb index 6ab7d9c..52f471c 100644 --- a/notebooks/03_Explore.ipynb +++ b/notebooks/03_Explore.ipynb @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -60,33 +60,22 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'element_volume'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[2], line 3\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mdatajoint\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mdj\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mdatetime\u001b[39;00m \u001b[39mimport\u001b[39;00m datetime\n\u001b[1;32m----> 3\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39melement_volume\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mexport\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mbossdb\u001b[39;00m \u001b[39mimport\u001b[39;00m BossDBUpload\n\u001b[0;32m 4\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mworkflow_volume\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpipeline\u001b[39;00m \u001b[39mimport\u001b[39;00m (\n\u001b[0;32m 5\u001b[0m lab,\n\u001b[0;32m 6\u001b[0m subject,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 11\u001b[0m get_vol_root_data_dir,\n\u001b[0;32m 12\u001b[0m )\n", - "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'element_volume'" - ] - } - ], + "outputs": [], "source": [ "import datajoint as dj\n", "from datetime import datetime\n", - "from element_volume.export.bossdb import BossDBUpload\n", - "from workflow_volume.pipeline import (\n", + "from element_zstack.export.bossdb import BossDBUpload\n", + "from workflow_zstack.pipeline import (\n", " lab,\n", " subject,\n", " session,\n", " scan,\n", " volume,\n", + " bossdb,\n", " get_session_directory,\n", - " get_vol_root_data_dir,\n", + " get_volume_root_data_dir,\n", ")" ] }, @@ -151,33 +140,6 @@ "scan_key = (scan.Scan & \"subject = 'sub1'\").fetch1(\"KEY\")" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`get_session_directory` will fetch your relative directory path form this `SessionDirectory` table.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from element_interface.utils import find_full_path\n", - "\n", - "data_path = find_full_path(get_vol_root_data_dir(), get_session_directory(session_key))" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we can populate the `Volume` table." - ] - }, { "cell_type": "code", "execution_count": null, @@ -265,7 +227,71 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np" + "bossdb.UploadParamSet.insert_new_params(\n", + " paramset_idx=1,\n", + " paramset_desc=\"test params\",\n", + " params=dict(\n", + " voxel_units=\"micrometers\",\n", + " voxel_size=[1,1,1],\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bossdb.VolumeUploadTask()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col_name = \"dataJointTestUpload\"\n", + "exp_name = \"CaImaging\"\n", + "chn_name = \"test10\"\n", + "bossdb.VolumeUploadTask.update1(\n", + " dict(\n", + " scan_key,\n", + " paramset_idx=1,\n", + " collection_name=col_name,\n", + " experiment_name=exp_name,\n", + " channel_name=chn_name,\n", + " upload_type=\"image\",\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bossdb.VolumeUploadTask()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "upload_key = (bossdb.VolumeUploadTask & scan_key & \"channel_name = 'test10'\").fetch(\"KEY\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "upload_key" ] }, { @@ -274,7 +300,7 @@ "metadata": {}, "outputs": [], "source": [ - "BossDBUpload(url='bossdb://dataJointTestUpload/CaImaging/test1-seg', data_dir=r\"C:\\Users\\kusha\\elementsRepos\\workflow-bossdb\\example_data\\Animal1_day1_tdtomato_seg-001.npy\", voxel_size=[1, 1, 1], voxel_units=\"nanometers\", shape_zyx=[536, 709, 600], raw_data=np.load(\"./example_data/Animal1_day1_tdtomato_seg-001.npy\", allow_pickle=True).item().get('masks')[0], data_type=\"annotation\")\n" + "bossdb.BossDBURLs.populate(upload_key)" ] }, { @@ -283,15 +309,7 @@ "metadata": {}, "outputs": [], "source": [ - "def get_neuroglancer_url(collection, experiment, channel):\n", - " base_url = f\"boss://https://api.bossdb.io/{collection}/{experiment}/{channel}\"\n", - " return (\n", - " \"https://neuroglancer.bossdb.io/#!{'layers':{'\"\n", - " + f'{experiment}'\n", - " + \"':{'source':'\"\n", - " + base_url + \"','name':'\"\n", - " + f'{channel}' + \"'}}}\"\n", - " )" + "(bossdb.BossDBURLs & scan_key).fetch1(\"neuroglancer_url\")" ] }, { diff --git a/workflow_zstack/paths.py b/workflow_zstack/paths.py index 6048da2..ba8c52a 100644 --- a/workflow_zstack/paths.py +++ b/workflow_zstack/paths.py @@ -1,7 +1,9 @@ from collections.abc import Sequence from typing import List - +import pathlib import datajoint as dj +from element_interface.utils import find_full_path +from element_session import session_with_id as session def get_session_directory(session_key: dict) -> str: @@ -29,13 +31,45 @@ def get_session_directory(session_key: dict) -> str: return None -def get_vol_root_data_dir() -> List[str]: +def get_volume_root_data_dir() -> List[str]: """Return root directory for ephys from 'vol_root_data_dir' in dj.config Returns: path (any): List of path(s) if available or None """ - roots = dj.config.get("custom", {}).get("vol_root_data_dir", None) - if not isinstance(roots, Sequence): - roots = [roots] - return roots + vol_root_dirs = dj.config.get("custom", {}).get("vol_root_data_dir", None) + if not vol_root_dirs: + return None + elif not isinstance(vol_root_dirs, Sequence): + return list(vol_root_dirs) + else: + return pathlib.Path(vol_root_dirs[0]) + + +def _find_files_by_type(scan_key, filetype: str): + """Uses roots + relative SessionDirectory, returns list of files with filetype""" + sess_dir = find_full_path( + get_volume_root_data_dir(), + pathlib.Path((session.SessionDirectory & scan_key).fetch1("session_dir")), + ) + return sess_dir, [fp.as_posix() for fp in sess_dir.rglob(filetype)] + + +def get_volume_tif_file(scan_key): + """Retrieve the list of ScanImage files associated with a given Scan. + + Args: + scan_key (dict): Primary key from Scan. + + Returns: + path (list): Absolute path(s) of the scan files. + + Raises: + FileNotFoundError: If the session directory or tiff files are not found. + """ + # Folder structure: root / subject / session / .tif (raw) + sess_dir, tiff_filepaths = _find_files_by_type(scan_key, "*_stitched.tif") + if tiff_filepaths: + return tiff_filepaths + else: + raise FileNotFoundError(f"No tiff file found in {sess_dir}") diff --git a/workflow_zstack/pipeline.py b/workflow_zstack/pipeline.py index a581b7e..630f6cf 100644 --- a/workflow_zstack/pipeline.py +++ b/workflow_zstack/pipeline.py @@ -1,16 +1,12 @@ from element_animal import subject, surgery -from element_animal.subject import Subject # Dependency for session -from element_animal.surgery import BrainRegion # Dependency for imaging from element_calcium_imaging import imaging, imaging_report, scan from element_lab import lab -from element_lab.lab import Lab, Project, Protocol, Source, User # Deps for Subject from element_session import session_with_id as session -from element_session.session_with_id import Session, SessionDirectory from element_zstack import volume from element_zstack.export import bossdb from . import db_prefix -from .paths import get_session_directory, get_vol_root_data_dir +from .paths import get_session_directory, get_volume_root_data_dir, get_volume_tif_file from .reference import Device __all__ = [ @@ -25,17 +21,9 @@ "volume", "bossdb", "Device", - "Lab", - "Project", - "Protocol", - "User", - "Source", - "Session", - "SessionDirectory", - "Subject", - "BrainRegion", "get_session_directory", - "get_vol_root_data_dir", + "get_volume_root_data_dir", + "get_volume_tif_file", ] # ---------------------------------- Activate schemas ---------------------------------- @@ -49,11 +37,11 @@ session.activate(db_prefix + "session", linking_module=__name__) Equipment = Device -Location = BrainRegion imaging.activate(db_prefix + "imaging", db_prefix + "scan", linking_module=__name__) Mask = imaging.Segmentation.Mask +Session = session.Session +SessionDirectory = session.SessionDirectory Scan = scan.Scan - volume.activate(db_prefix + "volume", linking_module=__name__) bossdb.activate(db_prefix + "bossdb", linking_module=__name__) From fb3d2612062d3f15b8798ae9017ca34f5ce8829d Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Thu, 20 Apr 2023 17:08:48 -0500 Subject: [PATCH 04/62] Initial tutorial notebook created --- notebooks/01-Configure.ipynb | 261 ----- notebooks/02-WorkflowStructure_Optional.ipynb | 331 ------ notebooks/03_Explore.ipynb | 358 ------- notebooks/py_scripts/01-Configure.py | 114 -- ...9af2735c243-e118-4ee3-b586-4b8636f6c322.py | 9 - notebooks/py_scripts/01_Explore_Workflow.py | 287 ----- .../02-WorkflowStructure_Optional.py | 125 --- notebooks/py_scripts/03_Explore.py | 265 ----- notebooks/py_scripts/04-Drop_Optional.py | 67 -- notebooks/tutorial.ipynb | 996 ++++++++++++++++++ workflow_zstack/pipeline.py | 20 +- 11 files changed, 1008 insertions(+), 1825 deletions(-) delete mode 100644 notebooks/01-Configure.ipynb delete mode 100644 notebooks/02-WorkflowStructure_Optional.ipynb delete mode 100644 notebooks/03_Explore.ipynb delete mode 100644 notebooks/py_scripts/01-Configure.py delete mode 100644 notebooks/py_scripts/01_Explore_Workflow-jvsc-a94162bf-4da6-4df9-a725-a0c52d99e9af2735c243-e118-4ee3-b586-4b8636f6c322.py delete mode 100644 notebooks/py_scripts/01_Explore_Workflow.py delete mode 100644 notebooks/py_scripts/02-WorkflowStructure_Optional.py delete mode 100644 notebooks/py_scripts/03_Explore.py delete mode 100644 notebooks/py_scripts/04-Drop_Optional.py create mode 100644 notebooks/tutorial.ipynb diff --git a/notebooks/01-Configure.ipynb b/notebooks/01-Configure.ipynb deleted file mode 100644 index fee3bac..0000000 --- a/notebooks/01-Configure.ipynb +++ /dev/null @@ -1,261 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "# DataJoint U24 - Workflow Volume" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## Configure DataJoint" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "- To run an Element workflow, we need to set up a DataJoint config file, called `dj_local_conf.json`, unique to each machine.\n", - "\n", - "- To upload to BossDB, you'd need to configure an `intern.cfg`.\n", - "\n", - "- These configs only need to be set up once. If you already have them, skip to [02-Workflow-Structure](./02-WorkflowStructure_Optional.ipynb).\n", - "\n", - "- By convention, we set a local config in the workflow directory. You may be interested in [setting a global config](https://docs.datajoint.org/python/setup/01-Install-and-Connect.html)." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "# change to the upper level folder to detect dj_local_conf.json\n", - "if os.path.basename(os.getcwd()) == \"notebooks\":\n", - " os.chdir(\"..\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Configure database host address and credentials" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we can set up credentials following [instructions here](https://tutorials.datajoint.io/setting-up/get-database.html)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import datajoint as dj\n", - "import getpass\n", - "\n", - "dj.config[\"database.host\"] = \"{YOUR_HOST}\"\n", - "dj.config[\"database.user\"] = \"{YOUR_USERNAME}\"\n", - "dj.config[\"database.password\"] = getpass.getpass() # enter the password securely" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You should be able to connect to the database at this stage." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2023-03-16 10:42:37,445][INFO]: Connecting root@localhost:3306\n", - "[2023-03-16 10:42:37,466][INFO]: Connected root@localhost:3306\n" - ] - }, - { - "data": { - "text/plain": [ - "DataJoint connection (connected) root@localhost:3306" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import datajoint as dj \n", - "dj.conn()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Configure the `custom` field" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Prefix" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A schema prefix can help manage privelages on a server. Teams who work on the same schemas should use the same prefix.\n", - "\n", - "Setting the prefix to `neuro_` means that every schema we then create will start with `neuro_` (e.g. `neuro_lab`, `neuro_subject`, `neuro_model` etc.)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dj.config[\"custom\"] = {\"database.prefix\": \"neuro_\"}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Root directory" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`vol_root_data_dir` sets the root path(s) for the Element. Given multiple, the Element will always figure out which root to use based on the files it expects there. This should be the directory shared across all volumetric data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dj.config[\"custom\"] = {\"vol_root_data_dir\": [\"C:/Users/kusha/elementsRepos/workflow-bossdb/example_data\"]}" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Save the DataJoint config as a json\n", - "\n", - "Once set, the config can either be saved locally or globally. \n", - "\n", - "- The local config would be saved as `dj_local_conf.json` in the workflow directory. This is usefull for managing multiple (demo) pipelines.\n", - "- A global config would be saved as `datajoint_config.json` in the home directory.\n", - "\n", - "When imported, DataJoint will first check for a local config. If none, it will check for a global config." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dj.config.save_local()\n", - "# dj.config.save_global()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Configuring `intern`" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Please refer [BossDB resources](https://www.youtube.com/watch?v=eVNr6Pzxoh8) for\n", - "information on generating an account and configuring `intern`.\n", - "\n", - "Importantly, you'll need an `intern` config file at your root directory with your BossDB api token as follows:\n", - "\n", - "```cfg\n", - " # ~/.intern/intern.cfg\n", - " [Default]\n", - " protocol = https\n", - " host = api.bossdb.io\n", - " token = \n", - "```\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the [next notebook](./02-WorkflowStructure_Optional.ipynb) notebook, we'll explore the workflow structure." - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,py:percent" - }, - "kernelspec": { - "display_name": "ele", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - }, - "vscode": { - "interpreter": { - "hash": "d00c4ad21a7027bf1726d6ae3a9a6ef39c8838928eca5a3d5f51f3eb68720410" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/02-WorkflowStructure_Optional.ipynb b/notebooks/02-WorkflowStructure_Optional.ipynb deleted file mode 100644 index b47a275..0000000 --- a/notebooks/02-WorkflowStructure_Optional.ipynb +++ /dev/null @@ -1,331 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "# DataJoint U24 - Workflow Volume" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Introduction" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook introduces some useful DataJoint concepts for exploring pipelines featuring Element Volume.\n", - "\n", - "+ DataJoint needs to be configured before running this notebook (see [01-Configure](./01-Configure.ipynb)).\n", - "+ Those familiar with the structure of DataJoint workflows can skip to [03-Explore](./03-Explore.ipynb)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To load the local config, we move to the package root." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "if os.path.basename(os.getcwd()) == \"notebooks\":\n", - " os.chdir(\"..\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Schemas, Diagrams and Tables" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Schemas are conceptually related sets of tables. By importing schemas from `workflow_volume.pipeline`, we'll declare the tables on the server with the prefix in the config (if we have permission to do so). If these tables are already declared, we'll gain access. \n", - "\n", - "- `dj.list_schemas()` lists all schemas a user has access to in the current database\n", - "- `.schema.list_tables()` will provide names for each table in the format used under the hood." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2023-03-28 10:43:18,127][INFO]: Connecting kushalbakshitest@tutorial-db.datajoint.io:3306\n", - "[2023-03-28 10:43:18,619][INFO]: Connected kushalbakshitest@tutorial-db.datajoint.io:3306\n" - ] - } - ], - "source": [ - "import datajoint as dj\n", - "from workflow_volume.pipeline import lab, subject, session, scan, volume\n", - "# dj.list_schemas()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dj.list_schemas()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "volume.schema.list_tables()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`dj.Diagram()` plots tables and dependencies in a schema. To see additional upstream or downstream connections, add `- N` or `+ N`.\n", - "\n", - "- `volume`: Tables related to volumetric data\n", - "- `bossdb`: Schema to manage BossDB urls for each data type. This could be replaced by a similar schema featuring URLs to another endpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from element_volume.bossdb import *" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from element_volume.volume import *" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "title": "`dj.Diagram()`: plot tables and dependencies" - }, - "outputs": [ - { - "data": { - "image/svg+xml": "\n\n\n\n\nvolume.Volume\n\n\nvolume.Volume\n\n\n\n\n\nvolume.SegmentationTask\n\n\nvolume.SegmentationTask\n\n\n\n\n\nvolume.Volume->volume.SegmentationTask\n\n\n\n\nvolume.BossDBURLs\n\n\nvolume.BossDBURLs\n\n\n\n\n\nvolume.Volume->volume.BossDBURLs\n\n\n\n\nvolume.Segmentation\n\n\nvolume.Segmentation\n\n\n\n\n\nvolume.Segmentation.Mask\n\n\nvolume.Segmentation.Mask\n\n\n\n\n\nvolume.Segmentation->volume.Segmentation.Mask\n\n\n\n\nvolume.SegmentationParamset\n\n\nvolume.SegmentationParamset\n\n\n\n\n\nvolume.SegmentationParamset->volume.SegmentationTask\n\n\n\n\nsession.Session\n\n\nsession.Session\n\n\n\n\n\nscan.Scan\n\n\nscan.Scan\n\n\n\n\n\nsession.Session->scan.Scan\n\n\n\n\nscan.Scan->volume.Volume\n\n\n\n\nvolume.SegmentationTask->volume.Segmentation\n\n\n\n", - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dj.Diagram(session.Session) + dj.Diagram(scan.Scan) + dj.Diagram(volume)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`volume.Volume` is a central table where volumetric data can be ingested, either from images on disk or downloaded from BossDB. The various *task* tables can be used to cue up analysis or ingestion of the various subsequent data types (i.e., segmentation and connectome data. Each segmented cell can be matched with data from another source (e.g., Element Calcium Imaging's `Segmentation.Mask`)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "volume.schema" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dj.Diagram(volume) - 1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Table Types\n", - "\n", - "- **Manual table**: green box, manually inserted table, expect new entries daily, e.g. Subject, ProbeInsertion. \n", - "- **Lookup table**: gray box, pre inserted table, commonly used for general facts or parameters. e.g. Strain, ClusteringMethod, ClusteringParamSet. \n", - "- **Imported table**: blue oval, auto-processing table, the processing depends on the importing of external files. e.g. process of Clustering requires output files from kilosort2. \n", - "- **Computed table**: red circle, auto-processing table, the processing does not depend on files external to the database, commonly used for \n", - "- **Part table**: plain text, as an appendix to the master table, all the part entries of a given master entry represent a intact set of the master entry. e.g. Unit of a CuratedClustering.\n", - "\n", - "### Table Links\n", - "\n", - "- **One-to-one primary**: thick solid line, share the exact same primary key, meaning the child table inherits all the primary key fields from the parent table as its own primary key. \n", - "- **One-to-many primary**: thin solid line, inherit the primary key from the parent table, but have additional field(s) as part of the primary key as well\n", - "- **Secondary dependency**: dashed line, the child table inherits the primary key fields from parent table as its own secondary attribute." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Common Table Functions" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "- `()` show table contents\n", - "- `heading` shows attribute definitions\n", - "- `describe()` show table defintiion with foreign key references" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "title": "Each datajoint table class inside the module corresponds to a table inside the schema. For example, the class `ephys.EphysRecording` correponds to the table `_ephys_recording` in the schema `neuro_ephys` in the database." - }, - "outputs": [], - "source": [ - "volume.Resolution()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "title": "`heading`: show table attributes regardless of foreign key references." - }, - "outputs": [], - "source": [ - "volume.Volume.heading" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "volume.Segmentation.describe()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "title": "ephys" - }, - "source": [ - "## Other Elements installed with the workflow\n", - "\n", - "- [`lab`](https://github.com/datajoint/element-lab): lab management related information, such as Lab, User, Project, Protocol, Source.\n", - "- [`subject`](https://github.com/datajoint/element-animal): general animal information, User, Genetic background, Death etc.\n", - "- [`session`](https://github.com/datajoint/element-session): general information of experimental sessions.\n", - "- [`calcium-imaging`](https://github.com/datajoint/element-calcium-imaging): imaging schema for generating activity traces. These can be mapped to cells in `volume.Connectome`\n", - "\n", - "For more information about these Elements, see [workflow session](https://github.com/datajoint/workflow-session) or [workflow calcium imaging](https://github.com/datajoint/workflow-calcium-imaging)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "dj.Diagram(lab) + dj.Diagram(subject) + dj.Diagram(session)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "title": "[session](https://github.com/datajoint/element-session): experimental session information" - }, - "outputs": [], - "source": [ - "session.Session.describe()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Summary and next step\n", - "\n", - "- This notebook introduced the overall structures of the schemas and tables in the workflow and relevant tools to explore the schema structure and table definitions.\n", - "\n", - "- The [next notebook](./03-Explore.ipynb) will introduce the detailed steps to run through `workflow-volume`." - ] - } - ], - "metadata": { - "jupytext": { - "encoding": "# -*- coding: utf-8 -*-", - "formats": "ipynb,py:percent" - }, - "kernelspec": { - "display_name": "ele", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.12" - }, - "vscode": { - "interpreter": { - "hash": "d00c4ad21a7027bf1726d6ae3a9a6ef39c8838928eca5a3d5f51f3eb68720410" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/03_Explore.ipynb b/notebooks/03_Explore.ipynb deleted file mode 100644 index 52f471c..0000000 --- a/notebooks/03_Explore.ipynb +++ /dev/null @@ -1,358 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "# DataJoint U24 - Workflow Volume\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## Interactively run the workflow\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "- If you haven't configured your set up, refer to [01-Configure](./01-Configure.ipynb).\n", - "- For an overview of the schema, refer to [02-WorkflowStructure](02-WorkflowStructure_Optional.ipynb).\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's change the directory to load the local config, `dj_local_conf.json`.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "# change to the upper level folder to detect dj_local_conf.json\n", - "if os.path.basename(os.getcwd()) == \"notebooks\":\n", - " os.chdir(\"..\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`pipeline.py` activates the various schema and declares other required tables.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import datajoint as dj\n", - "from datetime import datetime\n", - "from element_zstack.export.bossdb import BossDBUpload\n", - "from workflow_zstack.pipeline import (\n", - " lab,\n", - " subject,\n", - " session,\n", - " scan,\n", - " volume,\n", - " bossdb,\n", - " get_session_directory,\n", - " get_volume_root_data_dir,\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": { - "tags": [] - }, - "source": [ - "## Manually Inserting Entries\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Upstream tables\n" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can insert entries into `dj.Manual` tables (green in diagrams) by providing values as a dictionary or a list of dictionaries.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "subject.Subject.insert1(\n", - " dict(subject=\"sub1\", sex=\"M\", subject_birth_date=datetime.now()),\n", - " skip_duplicates=True,\n", - ")\n", - "session_key = dict(\n", - " subject=\"sub1\",\n", - " session_id=1,\n", - ")\n", - "session.Session.insert1(\n", - " dict(\n", - " session_key,\n", - " session_datetime=datetime.now(),\n", - " ),\n", - " skip_duplicates=True,\n", - ")\n", - "session.SessionDirectory.insert1(\n", - " dict(**session.Session.fetch1(\"KEY\"), session_dir=\"\"),\n", - " skip_duplicates=True,\n", - ")\n", - "scan.Scan.insert1(\n", - " dict(\n", - " session_key,\n", - " scan_id=1,\n", - " acq_software=\"ScanImage\",\n", - " ), skip_duplicates=True\n", - ")\n", - "scan_key = (scan.Scan & \"subject = 'sub1'\").fetch1(\"KEY\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "volume.Volume.populate(display_progress=True)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we'll define a pameter set for segmentation with cellpose in the\n", - "`SegmentationParamset` table, and insert an entry into the `SegmentationTask`\n", - "table. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "volume.SegmentationParamset.insert_new_params(\n", - " segmentation_method=\"cellpose\",\n", - " paramset_idx=1,\n", - " params=dict(\n", - " diameter = 8,\n", - " min_size = 2,\n", - " do_3d = False,\n", - " anisotropy = 0.5,\n", - " model_type = \"nuclei\",\n", - " channels = [[0, 0]],\n", - " z_axis = 0,\n", - " skip_duplicates=True,\n", - " ),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "volume.SegmentationTask.insert1(\n", - " dict(\n", - " scan_key,\n", - " paramset_idx=1,\n", - " task_mode=\"trigger\",\n", - " ), skip_duplicates=True\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now, we can popluate the `Segmentation` table." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "volume.Segmentation.populate()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we can upload our data either from the data stored in the table or a path to images. If this entry is already associated with a `SessionDirectory` entry, we'll look for images in this path.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bossdb.UploadParamSet.insert_new_params(\n", - " paramset_idx=1,\n", - " paramset_desc=\"test params\",\n", - " params=dict(\n", - " voxel_units=\"micrometers\",\n", - " voxel_size=[1,1,1],\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bossdb.VolumeUploadTask()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "col_name = \"dataJointTestUpload\"\n", - "exp_name = \"CaImaging\"\n", - "chn_name = \"test10\"\n", - "bossdb.VolumeUploadTask.update1(\n", - " dict(\n", - " scan_key,\n", - " paramset_idx=1,\n", - " collection_name=col_name,\n", - " experiment_name=exp_name,\n", - " channel_name=chn_name,\n", - " upload_type=\"image\",\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bossdb.VolumeUploadTask()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "upload_key = (bossdb.VolumeUploadTask & scan_key & \"channel_name = 'test10'\").fetch(\"KEY\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "upload_key" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bossdb.BossDBURLs.populate(upload_key)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "(bossdb.BossDBURLs & scan_key).fetch1(\"neuroglancer_url\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Explain how to get permission from APL to upload data. \n", - "\n", - "Create a schema to automatically generate neuroglancer link and insert into DJ\n", - "table. \n", - "\n", - "Include BossDBUpload in BossDBURLs as a computed/imported table. " - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,py:percent" - }, - "kernelspec": { - "display_name": "Python 3.9.13 ('ele')", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.10" - }, - "vscode": { - "interpreter": { - "hash": "d00c4ad21a7027bf1726d6ae3a9a6ef39c8838928eca5a3d5f51f3eb68720410" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/notebooks/py_scripts/01-Configure.py b/notebooks/py_scripts/01-Configure.py deleted file mode 100644 index 9bf1493..0000000 --- a/notebooks/py_scripts/01-Configure.py +++ /dev/null @@ -1,114 +0,0 @@ -# --- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.14.1 -# kernelspec: -# display_name: ele -# language: python -# name: python3 -# --- - -# %% [markdown] tags=[] -# # DataJoint U24 - Workflow Volume - -# %% [markdown] tags=[] -# ## Configure DataJoint - -# %% [markdown] tags=[] -# - To run an Element workflow, we need to set up a DataJoint config file, called `dj_local_conf.json`, unique to each machine. -# -# - To upload to BossDB, you'd need to configure an `intern.cfg`. -# -# - These configs only need to be set up once. If you already have them, skip to [02-Workflow-Structure](./02-WorkflowStructure_Optional.ipynb). -# -# - By convention, we set a local config in the workflow directory. You may be interested in [setting a global config](https://docs.datajoint.org/python/setup/01-Install-and-Connect.html). - -# %% -import os - -# change to the upper level folder to detect dj_local_conf.json -if os.path.basename(os.getcwd()) == "notebooks": - os.chdir("..") - -# %% [markdown] -# ### Configure database host address and credentials - -# %% [markdown] -# Now we can set up credentials following [instructions here](https://tutorials.datajoint.io/setting-up/get-database.html). - -# %% -import datajoint as dj -import getpass - -dj.config["database.host"] = "{YOUR_HOST}" -dj.config["database.user"] = "{YOUR_USERNAME}" -dj.config["database.password"] = getpass.getpass() # enter the password securely - -# %% [markdown] -# You should be able to connect to the database at this stage. - -# %% -dj.conn() - -# %% [markdown] -# ### Configure the `custom` field - -# %% [markdown] -# #### Prefix - -# %% [markdown] -# A schema prefix can help manage privelages on a server. Teams who work on the same schemas should use the same prefix. -# -# Setting the prefix to `neuro_` means that every schema we then create will start with `neuro_` (e.g. `neuro_lab`, `neuro_subject`, `neuro_model` etc.) - -# %% -dj.config["custom"] = {"database.prefix": "neuro_"} - -# %% [markdown] -# #### Root directory - -# %% [markdown] -# `vol_root_data_dir` sets the root path(s) for the Element. Given multiple, the Element will always figure out which root to use based on the files it expects there. This should be the directory shared across all volumetric data. - -# %% -dj.config["custom"] = {"vol_root_data_dir": ["/tmp/test_data/", "/tmp/example/"]} - -# %% [markdown] -# ## Save the DataJoint config as a json -# -# Once set, the config can either be saved locally or globally. -# -# - The local config would be saved as `dj_local_conf.json` in the workflow directory. This is usefull for managing multiple (demo) pipelines. -# - A global config would be saved as `datajoint_config.json` in the home directory. -# -# When imported, DataJoint will first check for a local config. If none, it will check for a global config. - -# %% -dj.config.save_local() -# dj.config.save_global() - -# %% [markdown] -# ## Configuring `intern` - -# %% [markdown] -# Please refer [BossDB resources](https://www.youtube.com/watch?v=eVNr6Pzxoh8) for -# information on generating an account and configuring `intern`. -# -# Importantly, you'll need an `intern` config file at your root directory with your BossDB api token as follows: -# -# ```cfg -# # ~/.intern/intern.cfg -# [Default] -# protocol = https -# host = api.bossdb.io -# token = -# ``` -# - -# %% [markdown] -# In the [next notebook](./02-WorkflowStructure_Optional.ipynb) notebook, we'll explore the workflow structure. diff --git a/notebooks/py_scripts/01_Explore_Workflow-jvsc-a94162bf-4da6-4df9-a725-a0c52d99e9af2735c243-e118-4ee3-b586-4b8636f6c322.py b/notebooks/py_scripts/01_Explore_Workflow-jvsc-a94162bf-4da6-4df9-a725-a0c52d99e9af2735c243-e118-4ee3-b586-4b8636f6c322.py deleted file mode 100644 index e44f060..0000000 --- a/notebooks/py_scripts/01_Explore_Workflow-jvsc-a94162bf-4da6-4df9-a725-a0c52d99e9af2735c243-e118-4ee3-b586-4b8636f6c322.py +++ /dev/null @@ -1,9 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: light -# format_version: '1.5' -# jupytext_version: 1.14.4 -# --- diff --git a/notebooks/py_scripts/01_Explore_Workflow.py b/notebooks/py_scripts/01_Explore_Workflow.py deleted file mode 100644 index c3c23a1..0000000 --- a/notebooks/py_scripts/01_Explore_Workflow.py +++ /dev/null @@ -1,287 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: light -# format_version: '1.5' -# jupytext_version: 1.14.1 -# kernelspec: -# display_name: ele -# language: python -# name: python3 -# --- - -# DataJoint U24 - Workflow Volume -# - -# ### Intro -# - -# This notebook will describe the steps to use Element Volume for interacting with BossDB. -# Prior to using this notebook, please refer to documentation for -# [Element installation instructions](https://datajoint.com/docs/elements/user-guide/) and refer to [BossDB resources](https://www.youtube.com/watch?v=eVNr6Pzxoh8) for information on generating an account and configuring `intern`. -# -# Importantly, you'll need an `intern` config file, which should look like this: -# -# ```cfg -# # ~/.intern/intern.cfg -# [Default] -# protocol = https -# host = api.bossdb.io -# token = -# ``` -# - -# + -import datajoint as dj -import os - -if os.path.basename(os.getcwd()) == "notebooks": - os.chdir("..") -dj.conn() - -# + -dj.config["custom"]["database.prefix"] = "cbroz_wfboss_" -dj.config["custom"][ - "vol_root_data_dir" -] = "/Users/cb/Documents/data/U24_SampleData/boss/" -from workflow_volume.pipeline import volume, BossDBInterface, bossdb - -# volume.Volume.delete_quick() -# - - -volume.Volume() - -# `BossDBInterface` works much like `intern.array`, but with additional functionality for managing records in your Element Volume schema. We can optionally link this dataset to a session in our pipeline via a session key. -# -# Note, however, that we'll have to change our notation slightly. Whereas we can directly index into a dataset to get slices, we'll need to either provide slices as a string or a tuple. -# - -# ### Testing -# - -data = BossDBInterface( - "bossdb://takemura/takemura13/image", resolution=4, session_key={} -) - -# Using `intern` notion, we can look at Z slice 300, from Y voxels 200-500, and X voxels 0 to 700. -# - -data[300, 200:501, 0:701] - -# The same data can be downloaded and loaded into Element Volume using either of the following commands. -# -# Without a session directory provided via `get_session_directory` in `workflow_volume.paths`, we will infer an output directory based on the BossDB path from `get_vol_root_data_dir`. -# - -# data.download(slice_key=(300,slice(200,501),slice(0,701))) -data.download(slice_key="[300,200:501,0:701]") - -# Our volume is stored in the `Volume` - -volume.Volume() - -# With `Slice` corresponding to slices - -volume.Volume.Slice() - -# Each BossDB resolution will have a unique entry in the `Resolution` table - -volume.Resolution() - -# And, the `Zoom` table retain information about the X/Y windows we use. - -volume.Zoom() - -# Changing any of these pieces of information would download different data. - -data.download(slice_key=(slice(300, 311), slice(100, 401), slice(100, 401))) - -# + -import logging -import numpy as np -from workflow_volume.pipeline import volume, bossdb, session, subject -from workflow_volume.paths import get_vol_root_data_dir -from element_volume.volume import * - -# from workflow_volume.pipeline import BossDBInterface - -# em_data = BossDBInterface("bossdb://Kasthuri/ac4/em", resolution=0) -# seg_data = BossDBInterface("bossdb://Kasthuri/ac4/neuron", resolution=0) -# em_data = BossDBInterface("bossdb://witvliet2020/Dataset_1/em", resolution=0) -# seg_data = BossDBInterface("bossdb://witvliet2020/Dataset_1/segmentation", resolution=0) - -logger = logging.getLogger("datajoint") - -volume_key = dict(volume_id="Thy1") - - -def drop_schemas(): - from datajoint_utilities.dj_search.lists import drop_schemas - - prefix = dj.config["custom"]["database.prefix"] - drop_schemas(prefix, dry_run=False, force_drop=True) - - -def drop_tables(): - tables = [ - volume.Connectome, - volume.ConnectomeTask, - volume.ConnectomeParamset, - volume.Segmentation, - volume.Segmentation.Cell, - volume.CellMapping, - volume.SegmentationTask, - volume.SegmentationParamset, - ] - for t in tables: - t.drop_quick() - - -class upload: - @classmethod - def manual_entry(cls): - from datetime import datetime - - subject.Subject.insert1( - dict(subject="sub1", sex="M", subject_birth_date=datetime.now()), - skip_duplicates=True, - ) - session.Session.insert1( - dict( - **(subject.Subject & "subject='sub1'").fetch1("KEY"), - session_id=1, - session_datetime=datetime.now(), - ), - skip_duplicates=True, - ) - session.SessionDirectory.insert1( - dict(**session.Session.fetch1("KEY"), session_dir="sample"), - skip_duplicates=True, - ) - volume.Resolution.insert1( - dict( - resolution_id="990nm", - voxel_unit="micrometers", - voxel_z_size=1, - voxel_y_size=0.5, - voxel_x_size=0.5, - downsampling=0, - ), - skip_duplicates=True, - ) - - coll, exp, chann, seg = ( - "DataJointTest", - "test", - "CalciumImaging", - "Segmentation", - ) - - bossdb.BossDBURLs.load_bossdb_info( - collection=coll, - experiment=exp, - volume=chann, - segmentation=seg, - skip_duplicates=True, - ) - url_key = ( - bossdb.BossDBURLs.Volume & dict(collection_experiment=f"{coll}/{exp}") - ).fetch1() - - raw_data = cls.load_sample_data() - raw_data_shape = raw_data.shape - - volume.Volume.insert1( - dict( - volume_id="Thy1", - resolution_id="990nm", - session_id=1, - z_size=raw_data_shape[0], - y_size=raw_data_shape[1], - x_size=raw_data_shape[2], - channel=chann, - **url_key, - volume_data=raw_data, - ), - skip_duplicates=True, - ) - - def load_sample_data(): - from tifffile import TiffFile - from PIL import Image - from pathlib import Path - - root_dir = get_vol_root_data_dir()[0] - image_fp = root_dir + "sample/zstack_Gcamp_00001_00012.tif" - png_fp = root_dir + "sample/Z%02d.png" - image_sample = TiffFile(image_fp).asarray()[250:270, 1000:1246, :] - if not Path(png_fp % 0).exists(): - for z in range(20): - Image.fromarray(image_sample[z]).save(png_fp % z) - return image_sample - - def upload_from_volume(): - volume.Volume.upload(volume_key) - # Error uploading chunk 0-20: ndarray is not C-contiguous - - -class download: - def add_manual_boss_url(): - bossdb.BossDBURLs.load_bossdb_info( - collection="Kasthuri", - experiment="ac4", - volume="em", - segmentation="neuron", - skip_duplicates=True, - ) - bossdb.BossDBURLs.load_bossdb_info( - collection="witvliet2020", - experiment="Dataset_1", - volume="em", - segmentation="segmentation", - skip_duplicates=True, - ) - - def download_volume_via_classmeth(): - volume.Volume.download( - url="bossdb://witvliet2020/Dataset_1/em", - slice_key="[100:120,1000:1500,1000:1500]", - save_images=True, - save_ndarray=True, - image_mode="P", - skip_duplicates=True, - ) - - def download_seg_via_classmeth(): - volume.SegmentationParamset.insert_new_params( - segmentation_method="bossdb", - paramset_idx=1, - params=dict( - slice_key="[100:120,1000:1500,1000:1500]", - save_images=True, - save_ndarray=True, - image_mode="P", - skip_duplicates=True, - ), - ) - volume.SegmentationTask.insert1( - dict( - volume_id="witvliet2020/Dataset_1", - resolution_id=0, - task_mode="load", - paramset_idx=1, - **( - bossdb.BossDBURLs.Segmentation & "collection_experiment LIKE 'wit%'" - ).fetch1(), - ) - ) - volume.Segmentation.populate() - - @classmethod - def run_all(cls): - cls.add_manual_boss_url() - cls.download_volume_via_classmeth() - cls.download_seg_via_classmeth() - diff --git a/notebooks/py_scripts/02-WorkflowStructure_Optional.py b/notebooks/py_scripts/02-WorkflowStructure_Optional.py deleted file mode 100644 index 0d9384e..0000000 --- a/notebooks/py_scripts/02-WorkflowStructure_Optional.py +++ /dev/null @@ -1,125 +0,0 @@ -# -*- coding: utf-8 -*- -# --- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.14.1 -# kernelspec: -# display_name: ele -# language: python -# name: python3 -# --- - -# %% [markdown] tags=[] -# # DataJoint U24 - Workflow Volume - -# %% [markdown] -# ## Introduction - -# %% [markdown] -# This notebook introduces some useful DataJoint concepts for exploring pipelines featuring Element Volume. -# -# + DataJoint needs to be configured before running this notebook (see [01-Configure](./01-Configure.ipynb)). -# + Those familiar with the structure of DataJoint workflows can skip to [03-Explore](./03-Explore.ipynb). - -# %% [markdown] -# To load the local config, we move to the package root. - -# %% -import os - -if os.path.basename(os.getcwd()) == "notebooks": - os.chdir("..") - -# %% [markdown] -# ## Schemas, Diagrams and Tables - -# %% [markdown] -# Schemas are conceptually related sets of tables. By importing schemas from `workflow_volume.pipeline`, we'll declare the tables on the server with the prefix in the config (if we have permission to do so). If these tables are already declared, we'll gain access. -# -# - `dj.list_schemas()` lists all schemas a user has access to in the current database -# - `.schema.list_tables()` will provide names for each table in the format used under the hood. - -# %% -import datajoint as dj -from workflow_volume.pipeline import lab, subject, session, volume, bossdb - -# dj.list_schemas() - -# %% -volume.schema.list_tables() - -# %% [markdown] -# `dj.Diagram()` plots tables and dependencies in a schema. To see additional upstream or downstream connections, add `- N` or `+ N`. -# -# - `volume`: Tables related to volumetric data -# - `bossdb`: Schema to manage BossDB urls for each data type. This could be replaced by a similar schema featuring URLs to another endpoint. - -# %% `dj.Diagram()`: plot tables and dependencies -dj.Diagram(volume) + dj.Diagram(bossdb) - -# %% [markdown] -# `volume.Volume` is a central table where volumetric data can be ingested, either from images on disk or downloaded from BossDB. The various *task* tables can be used to cue up analysis or ingestion of the various subsequent data types (i.e., segmentation and connectome data. Each segmented cell can be matched with data from another source (e.g., Element Calcium Imaging's `Segmentation.Mask`). - -# %% -dj.Diagram(volume) - 1 - -# %% [markdown] -# ### Table Types -# -# - **Manual table**: green box, manually inserted table, expect new entries daily, e.g. Subject, ProbeInsertion. -# - **Lookup table**: gray box, pre inserted table, commonly used for general facts or parameters. e.g. Strain, ClusteringMethod, ClusteringParamSet. -# - **Imported table**: blue oval, auto-processing table, the processing depends on the importing of external files. e.g. process of Clustering requires output files from kilosort2. -# - **Computed table**: red circle, auto-processing table, the processing does not depend on files external to the database, commonly used for -# - **Part table**: plain text, as an appendix to the master table, all the part entries of a given master entry represent a intact set of the master entry. e.g. Unit of a CuratedClustering. -# -# ### Table Links -# -# - **One-to-one primary**: thick solid line, share the exact same primary key, meaning the child table inherits all the primary key fields from the parent table as its own primary key. -# - **One-to-many primary**: thin solid line, inherit the primary key from the parent table, but have additional field(s) as part of the primary key as well -# - **Secondary dependency**: dashed line, the child table inherits the primary key fields from parent table as its own secondary attribute. - -# %% [markdown] -# ## Common Table Functions - -# %% [markdown] -# -# - `
()` show table contents -# - `heading` shows attribute definitions -# - `describe()` show table defintiion with foreign key references - -# %% Each datajoint table class inside the module corresponds to a table inside the schema. For example, the class `ephys.EphysRecording` correponds to the table `_ephys_recording` in the schema `neuro_ephys` in the database. -volume.Resolution() - -# %% `heading`: show table attributes regardless of foreign key references. -volume.Volume.heading - -# %% -volume.Segmentation.describe() - -# %% ephys [markdown] -# ## Other Elements installed with the workflow -# -# - [`lab`](https://github.com/datajoint/element-lab): lab management related information, such as Lab, User, Project, Protocol, Source. -# - [`subject`](https://github.com/datajoint/element-animal): general animal information, User, Genetic background, Death etc. -# - [`session`](https://github.com/datajoint/element-session): general information of experimental sessions. -# - [`calcium-imaging`](https://github.com/datajoint/element-calcium-imaging): imaging schema for generating activity traces. These can be mapped to cells in `volume.Connectome` -# -# For more information about these Elements, see [workflow session](https://github.com/datajoint/workflow-session) or [workflow calcium imaging](https://github.com/datajoint/workflow-calcium-imaging). - -# %% -dj.Diagram(lab) + dj.Diagram(subject) + dj.Diagram(session) - -# %% [session](https://github.com/datajoint/element-session): experimental session information -session.Session.describe() - -# %% [markdown] -# ## Summary and next step -# -# - This notebook introduced the overall structures of the schemas and tables in the workflow and relevant tools to explore the schema structure and table definitions. -# -# - The [next notebook](./03-Explore.ipynb) will introduce the detailed steps to run through `workflow-volume`. diff --git a/notebooks/py_scripts/03_Explore.py b/notebooks/py_scripts/03_Explore.py deleted file mode 100644 index ee2cbb7..0000000 --- a/notebooks/py_scripts/03_Explore.py +++ /dev/null @@ -1,265 +0,0 @@ -# --- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.14.1 -# kernelspec: -# display_name: Python 3.9.13 ('ele') -# language: python -# name: python3 -# --- - -# %% [markdown] tags=[] -# # DataJoint U24 - Workflow Volume -# - -# %% [markdown] tags=[] -# ## Interactively run the workflow -# - -# %% [markdown] -# - If you haven't configured your set up, refer to [01-Configure](./01-Configure.ipynb). -# - For an overview of the schema, refer to [02-WorkflowStructure](02-WorkflowStructure_Optional.ipynb). -# - -# %% [markdown] -# Let's change the directory to load the local config, `dj_local_conf.json`. -# - -# %% -import os - -# change to the upper level folder to detect dj_local_conf.json -if os.path.basename(os.getcwd()) == "notebooks": - os.chdir("..") - -# %% [markdown] -# `pipeline.py` activates the various schema and declares other required tables. -# - -# %% -import datajoint as dj -from datetime import datetime -from workflow_volume.pipeline import ( - lab, - subject, - session, - volume, - bossdb, - get_session_directory, - get_vol_root_data_dir, -) - -# %% [markdown] tags=[] -# ## Manually Inserting Entries -# - -# %% [markdown] -# ### Upstream tables -# - -# %% [markdown] -# We can insert entries into `dj.Manual` tables (green in diagrams) by providing values as a dictionary or a list of dictionaries. -# - -# %% -subject.Subject.insert1( - dict(subject="sub1", sex="M", subject_birth_date=datetime.now()), - skip_duplicates=True, -) -session_key = (subject.Subject & "subject='sub1'").fetch1("KEY") -session.Session.insert1( - dict( - **session_key, - session_id=1, - session_datetime=datetime.now(), - ), - skip_duplicates=True, -) -session.SessionDirectory.insert1( - dict(**session.Session.fetch1("KEY"), session_dir=""), - skip_duplicates=True, -) - -# %% [markdown] -# `get_session_directory` will fetch your relative directory path form this `SessionDirectory` table. -# - -# %% -from element_interface.utils import find_full_path - -data_path = find_full_path(get_vol_root_data_dir(), get_session_directory(session_key)) - -# %% [markdown] tags=[] -# ### Element Volume Tables -# -# #### Uploading -# - -# %% [markdown] -# The `Resolution` table keeps track details related to data collection, including units and size in each dimension. `downsampling` indicates number of times the dataset has been compressed by taking every other pixel. Within BossDB, resolution 3 data (here, `downsampling` 3) reflects every 8th pixel, for example. -# - -# %% -volume.Resolution.insert1( - dict( - resolution_id="990nm", - voxel_unit="micrometers", - voxel_z_size=1, - voxel_y_size=0.5, - voxel_x_size=0.5, - downsampling=0, - ), - skip_duplicates=True, -) - -# %% [markdown] -# BossDB operates with a hierarchy of collections, experiments, and channels. A collection spans multiple experiments. An experiment may collect one or more channels, including electron micrioscopy data, segmentation annotations, and connectome data. These form the portions of a BossDB URL. -# -# Here, we choose some example values. With the proper permissions, we can create a BossDB dataset right from our Python environment. -# - -# %% -collection, experiment, volume, segmentation = ( - "DataJointTest", - "test", - "CalciumImaging", - "Segmented", -) - -bossdb.BossDBURLs.load_bossdb_info( - collection=collection, - experiment=experiment, - volume=volume, - segmentation=segmentation, - skip_duplicates=True, -) -url_key = ( - bossdb.BossDBURLs.Volume & dict(collection_experiment=f"{collection}/{experiment}") -).fetch1() - - -# %% [markdown] -# The `load_sample_data` function below provides a template for loading a multi-page tif file and saving it into individual Z-axis images. -# -# In the next step, we can choose to upload to BossDB either with individual images in a directory or through an image volume in memory. To store the volume data in the table, replace the contents below with a function that loads your data. -# -# Note: BossDB only accepts image data as `uint8` or `uint16` numpy arrays. -# - -# %% -def load_sample_data(): - from tifffile import TiffFile - from PIL import Image - from pathlib import Path - - root_dir = get_vol_root_data_dir()[0] - image_fp = root_dir + "/.tif" - png_fp = root_dir + "sample/Z%02d.png" # Z-plane - image_sample = TiffFile(image_fp).asarray() - - image_sample = image_sample.astype("uint16") - if not Path(png_fp % 0).exists(): - for z in range(20): - Image.fromarray(image_sample[z]).save(png_fp % z) - return image_sample - - -# %% [markdown] -# Now, we can insert into the `Volume` table. - -# %% -raw_data = load_sample_data() -raw_data_shape = raw_data.shape -volume_key = dict(volume_id="Thy1", resolution_id="990nm") -volume.Volume.insert1( - dict( - **volume_key, - session_id=1, - z_size=raw_data_shape[0], - y_size=raw_data_shape[1], - x_size=raw_data_shape[2], - channel=volume, - **url_key, - volume_data=raw_data, - ), - skip_duplicates=True, -) - -# %% [markdown] -# Finally, we can upload our data either from the data stored in the table or a path to images. If this entry is already associated with a `SessionDirectory` entry, we'll look for images in this path. -# - -# %% -# For other optional parameters, see additional docstring info here: -# element_volume.export.bossdb.BossDBUpload -volume.Volume.upload(volume_key, upload_from="table") -# volume.Volume.upload(volume_key, upload_from="dir", data_extension="*pattern*.png") - -# %% [markdown] -# #### Download - -# %% [markdown] -# The `Volume` and `BossDBURLs` tables offer additional class methods for downloading BossDB data or returning objects for interacting with the data. -# - -# %% -bossdb.BossDBURLs.load_bossdb_info( - collection="Kasthuri", - experiment="ac4", - volume="em", - segmentation="neuron", - skip_duplicates=True, -) - -# %% -# For other optional parameters, see additional docstring info here: -# element_volume.readers.bossdb.BossDBInterface.load_data_into_element -volume.Volume.download( - "bossdb://witvliet2020/Dataset_1/em", - downsampling=3, - slice_key="[100:120,1000:1500,1000:1500]", - save_images=True, - save_ndarray=True, - image_mode="P", - skip_duplicates=True, -) -data = volume.Volume.return_bossdb_data( - volume_key=dict(volume_id="witvliet2020/Dataset_1") -) - -# %% [markdown] -# To load segmentation data, we can set the `task_mode` to load and add additional pararameters to the `SegmentationParamset` table. - -# %% -volume.SegmentationParamset.insert_new_params( - segmentation_method="bossdb", - paramset_idx=1, - params=dict( - slice_key="[100:120,1000:1500,1000:1500]", - save_images=True, - save_ndarray=True, - image_mode="P", - skip_duplicates=True, - ), -) -volume.SegmentationTask.insert1( - dict( - volume_id="witvliet2020/Dataset_1", - resolution_id=0, - task_mode="load", - paramset_idx=1, - **( - bossdb.BossDBURLs.Segmentation & "collection_experiment LIKE 'wit%'" - ).fetch1(), - ) -) -volume.Segmentation.populate() - -# %% [markdown] -# In the [next notebook](./04-Drop.ipynb), we'll touch on how to drop these various schemas for development. -# diff --git a/notebooks/py_scripts/04-Drop_Optional.py b/notebooks/py_scripts/04-Drop_Optional.py deleted file mode 100644 index f8ae415..0000000 --- a/notebooks/py_scripts/04-Drop_Optional.py +++ /dev/null @@ -1,67 +0,0 @@ -# --- -# jupyter: -# jupytext: -# formats: ipynb,py:percent -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.14.1 -# kernelspec: -# display_name: ele -# language: python -# name: python3 -# --- - -# %% [markdown] tags=[] -# # DataJoint U24 - Workflow Volume -# - -# %% [markdown] -# Change into the parent directory to find the `dj_local_conf.json` file. -# - -# %% tags=[] -import os -import datajoint as dj -from pathlib import Path - -# change to the upper level folder to detect dj_local_conf.json -if os.path.basename(os.getcwd()) == "notebooks": - os.chdir("..") - -# %% -from workflow_volume.pipeline import ( - imaging_report, - volume, - bossdb, - imaging, - scan, - Device, - session, - subject, - surgery, - lab, -) - -dj.config["safemode"] = True # Set to false to turn off drop confirmation - -# %% [markdown] -# ## Drop schemas -# -# - Schemas are not typically dropped in a production workflow with real data in it. -# - At the developmental phase, it might be required for the table redesign. -# - When dropping all schemas is needed, drop items starting with the most downstream. -# - -# %% -# imaging_report.schema.drop() -# volume.schema.drop() -# bossdb.schema.drop() -# imaging.schema.drop() -# scan.schema.drop() -# Device.drop_quick() -# session.schema.drop() -# subject.schema.drop() -# surgery.schema.drop() -# lab.schema.drop() diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb new file mode 100644 index 0000000..d450e00 --- /dev/null +++ b/notebooks/tutorial.ipynb @@ -0,0 +1,996 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "# Process volumetric microscopic calcium imaging data with DataJoint Elements\n", + "\n", + "This notebook will walk through processing volumetric two-photon calcium imaging data collected\n", + "from ScanImage and segmented with cellpose. While anyone can work through this\n", + "notebook to process volumetric microscopic calcium imaging data through DataJoint's\n", + "`element-zstack` pipeline, for a detailed tutorial about the fundamentals of\n", + "DataJoint including table types, make functions, and querying, please see the\n", + "[DataJoint Tutorial](https://github.com/datajoint/datajoint-tutorials).\n", + "\n", + "The DataJoint Python API and Element Calcium Imaging offer a lot of features to\n", + "support collaboration, automation, reproducibility, and visualizations.\n", + "For more information on these topics, please visit our documentation: \n", + " \n", + "- [DataJoint Core](https://datajoint.com/docs/core/): General principles\n", + "\n", + "- DataJoint [Python](https://datajoint.com/docs/core/datajoint-python/) and\n", + " [MATLAB](https://datajoint.com/docs/core/datajoint-matlab/) APIs: in-depth reviews of\n", + " specifics\n", + "\n", + "- [DataJoint Element ZStack](https://datajoint.com/docs/elements/element-zstack/):\n", + " A modular pipeline for volumetric calcium imaging data analysis\n", + "\n", + "\n", + "Let's start by importing the packages necessary to run this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "# change to the upper level folder to detect dj_local_conf.json\n", + "if os.path.basename(os.getcwd()) == \"notebooks\":\n", + " os.chdir(\"..\")\n", + "\n", + "import datajoint as dj\n", + "import datetime\n", + "import numpy as np" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The Basics:\n", + "\n", + "Any DataJoint workflow can be broken down into basic 3 parts:\n", + "\n", + "- `Insert`\n", + "- `Populate` (or process)\n", + "- `Query`\n", + "\n", + "In this demo we will:\n", + "- `Insert` metadata about an animal subject, recording session, and \n", + " parameters related to processing calcium imaging data through Suite2p.\n", + "- `Populate` tables with outputs of image processing including motion correction,\n", + " segmentation, mask classification, fluorescence traces and deconvolved activity traces.\n", + "- `Query` the processed data from the database and plot calcium activity traces.\n", + "\n", + "Each of these topics will be explained thoroughly in this notebook." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Workflow diagram\n", + "\n", + "This workflow is assembled from 5 DataJoint elements:\n", + "+ [element-lab](https://github.com/datajoint/element-lab)\n", + "+ [element-animal](https://github.com/datajoint/element-animal)\n", + "+ [element-session](https://github.com/datajoint/element-session)\n", + "+ [element-calcium-imaging](https://github.com/datajoint/element-calcium-imaging)\n", + "+ [element-zstack](https://github.com/datajoint/element-zstack)\n", + "\n", + "Each element declares its own schema in the database. These schemas can be imported like\n", + "any other Python package. This workflow is composed of schemas from each of the Elements\n", + "above and correspond to a module within `workflow_zstack.pipeline`.\n", + "\n", + "The schema diagram is a good reference for understanding the order of the tables\n", + "within the workflow, as well as the corresponding table type.\n", + "Let's activate the elements and view the schema diagram." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2023-04-20 15:28:53,314][WARNING]: lab.Project and related tables will be removed in a future version of Element Lab. Please use the project schema.\n", + "[2023-04-20 15:28:53,339][INFO]: Connecting kushalbakshi2@tutorial-db.datajoint.io:3306\n", + "[2023-04-20 15:28:53,788][INFO]: Connected kushalbakshi2@tutorial-db.datajoint.io:3306\n" + ] + } + ], + "source": [ + "from workflow_zstack.pipeline import lab, subject, session, scan, volume, volume_matching, bossdb, get_session_directory, get_volume_root_data_dir" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatch\n", + "\n", + "\n", + "volume_matching.VolumeMatch\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatch.Transformation\n", + "\n", + "\n", + "volume_matching.VolumeMatch.Transformation\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatch->volume_matching.VolumeMatch.Transformation\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatch.CommonMask\n", + "\n", + "\n", + "volume_matching.VolumeMatch.CommonMask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatch.VolumeMask\n", + "\n", + "\n", + "volume_matching.VolumeMatch.VolumeMask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatch.CommonMask->volume_matching.VolumeMatch.VolumeMask\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask.Volume\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask.Volume\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask.Volume->volume_matching.VolumeMatch.Transformation\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask.Volume->volume_matching.VolumeMatch.VolumeMask\n", + "\n", + "\n", + "\n", + "\n", + "volume.Segmentation.Mask\n", + "\n", + "\n", + "volume.Segmentation.Mask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume.Segmentation.Mask->volume_matching.VolumeMatch.VolumeMask\n", + "\n", + "\n", + "\n", + "\n", + "volume.Volume\n", + "\n", + "\n", + "volume.Volume\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "bossdb.VolumeUploadTask\n", + "\n", + "\n", + "bossdb.VolumeUploadTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume.Volume->bossdb.VolumeUploadTask\n", + "\n", + "\n", + "\n", + "\n", + "volume.SegmentationTask\n", + "\n", + "\n", + "volume.SegmentationTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume.Volume->volume.SegmentationTask\n", + "\n", + "\n", + "\n", + "\n", + "volume.SegmentationParamset\n", + "\n", + "\n", + "volume.SegmentationParamset\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume.SegmentationParamset->volume.SegmentationTask\n", + "\n", + "\n", + "\n", + "\n", + "bossdb.BossDBURLs\n", + "\n", + "\n", + "bossdb.BossDBURLs\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "bossdb.VolumeUploadTask->bossdb.BossDBURLs\n", + "\n", + "\n", + "\n", + "\n", + "volume.Segmentation\n", + "\n", + "\n", + "volume.Segmentation\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume.Segmentation->volume_matching.VolumeMatchTask.Volume\n", + "\n", + "\n", + "\n", + "\n", + "volume.Segmentation->volume.Segmentation.Mask\n", + "\n", + "\n", + "\n", + "\n", + "bossdb.UploadParamSet\n", + "\n", + "\n", + "bossdb.UploadParamSet\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "bossdb.UploadParamSet->bossdb.VolumeUploadTask\n", + "\n", + "\n", + "\n", + "\n", + "scan.Scan\n", + "\n", + "\n", + "scan.Scan\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "scan.Scan->volume.Volume\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject\n", + "\n", + "\n", + "subject.Subject\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "session.Session\n", + "\n", + "\n", + "session.Session\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "subject.Subject->session.Session\n", + "\n", + "\n", + "\n", + "\n", + "volume.SegmentationTask->volume.Segmentation\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask->volume_matching.VolumeMatch\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask->volume_matching.VolumeMatchTask.Volume\n", + "\n", + "\n", + "\n", + "\n", + "session.Session->scan.Scan\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dj.Diagram(subject.Subject) + dj.Diagram(session.Session) + dj.Diagram(scan.Scan) + dj.Diagram(volume) + dj.Diagram(volume_matching) + dj.Diagram(bossdb)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Diagram Breakdown\n", + "\n", + "While the diagram above seems complex at first, it becomes more clear when it's\n", + "approached as a hierarchy of tables that **define the order** in which the\n", + "workflow **expects to receive data** in each of its tables. \n", + "\n", + "- Tables with a green, or rectangular shape expect to receive data manually using the\n", + "`insert()` function. \n", + "- The tables higher up in the diagram such as `subject.Subject()`\n", + "should be the first to receive data. This ensures data integrity by preventing orphaned\n", + "data within DataJoint schemas. \n", + "- Tables with a purple oval or red circle can be automatically filled with relevant data\n", + " by calling `populate()`. For example `volume.Segmentation` and its part-table\n", + " `volume.Segmentation.Mask` are both populated with `volume.Segmentation.populate()`.\n", + "- Tables connected by a solid line depend on attributes (entries) in the table\n", + " above it.\n", + "\n", + "#### Table Types\n", + "\n", + "There are 5 table types in DataJoint. Each of these appear in the diagram above.\n", + "\n", + "- **Manual table**: green box, manually inserted table, expect new entries daily, e.g. `Subject`, `Scan`. \n", + "- **Lookup table**: gray box, pre inserted table, commonly used for general facts or parameters. e.g. `bossdb.UploadParamset`, `volume.SegmentationParamset`. \n", + "- **Imported table**: blue oval, auto-processing table, the processing depends\n", + " on the importing of external files. e.g. process of obtaining the `Volume` data requires\n", + " raw data stored outside the database. \n", + "- **Computed table**: red circle, auto-processing table, the processing does not\n", + " depend on files external to the database, commonly used for computations such\n", + " as `volume.Segmentation`, `volume_match.VolumeMatch`. \n", + "- **Part table**: plain text, as an appendix to the master table, all the part\n", + " entries of a given master entry represent a intact set of the master entry.\n", + " e.g. Masks of `Segmentation`." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Starting the workflow: Insert\n", + "\n", + "### Insert entries into manual tables\n", + "\n", + "To view details about a table's dependencies and attributes, use functions `.describe()`\n", + "and `.heading`, respectively.\n", + "\n", + "Let's start with the first table in the schema diagram (the `subject` table) and view\n", + "the table attributes we need to insert. There are two ways you can do this: *run each\n", + "of the two cells below*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "subject.Subject.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "subject.Subject.heading" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (2346125880.py, line 6)", + "output_type": "error", + "traceback": [ + "\u001b[1;36m Cell \u001b[1;32mIn[4], line 6\u001b[1;36m\u001b[0m\n\u001b[1;33m subject_description=),\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" + ] + } + ], + "source": [ + "subject.Subject.insert1(\n", + " dict(\n", + " subject=\"subject1\",\n", + " sex=\"M\",\n", + " subject_birth_date=\"2023-01-01\",\n", + " subject_description=\"Cellpose segmentation of volumetric data.\"),\n", + " skip_duplicates=True,\n", + ")\n", + "subject.Subject()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's repeat the steps above for the `Session` table and see how the output varies between `.describe` and `.heading`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "session.Session.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "session.Session.heading" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The cells above show the dependencies and attributes for the `session.Session` table.\n", + "Notice that `describe` shows the dependencies of the table on upstream tables. The\n", + "`Session` table depends on the upstream `Subject` table. \n", + "\n", + "Whereas `heading` lists all the attributes of the `Session` table, regardless of\n", + "whether they are declared in an upstream table. \n", + "\n", + "Here we will demonstrate a very useful way of inserting data by assigning the dictionary\n", + "to a variable `session_key`. This variable can be used to insert entries into tables that\n", + "contain the `Session` table as one of its attributes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "session_key = dict(\n", + " subject=\"subject1\",\n", + " session_id=1,\n", + ")\n", + "session.Session.insert1(\n", + " dict(\n", + " session_key,\n", + " session_datetime=datetime.now(),\n", + " ),\n", + " skip_duplicates=True,\n", + ")\n", + "session.Session()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `SessionDirectory` table locates the relevant data files in a directory path\n", + "relative to the root directory defined in your `dj.config[\"custom\"]`. More\n", + "information about `dj.config` is provided at the end of this tutorial and is\n", + "particularly useful for local deployments of this workflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "session.SessionDirectory.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "session.SessionDirectory.heading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "session.SessionDirectory.insert1(\n", + " dict(**session.Session.fetch1(\"KEY\"), session_dir=\"\"),\n", + " skip_duplicates=True,\n", + ")\n", + "session.SessionDirectory()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll use `describe` and `heading` for the Scan table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scan.Scan.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scan.Scan.heading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scan.Scan.insert1(\n", + " dict(\n", + " session_key,\n", + " scan_id=0,\n", + " acq_software=\"ScanImage\",\n", + " ),\n", + " skip_duplicates=True,\n", + ")\n", + "scan_key = (scan.Scan & \"subject = 'sub1'\").fetch1(\"KEY\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Populate\n", + "\n", + "### Automatically populate tables\n", + "\n", + "`volume.Volume` is the first table in the pipeline that can be populated automatically.\n", + "If a table contains a part table, this part table is also populated during the\n", + "`populate()` call. `populate()` takes several arguments including the a session\n", + "key. This key restricts `populate()` to performing the operation on the session\n", + "of interest rather than all possible sessions which could be a time-intensive\n", + "process for databases with lots of entries.\n", + "\n", + "Let's view the `volume.Volume` and populate it using the `populate()` call." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.Volume.heading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.Volume()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.Volume.populate(scan_key, display_progress=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's view the information was entered into this table:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.Volume()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We're almost ready to perform volume segmentation with `cellpose`. An important step before\n", + "processing is managing the parameters which will be used in that step. To do so, we will\n", + "insert parameters required by cellpose into a DataJoint table\n", + "`SegmentationParamSet`. This table keeps track of all combinations of your image\n", + "processing parameters. You can choose which parameters are used during\n", + "processing in a later step.\n", + "\n", + "Let's view the attributes and insert data into `volume.SegmentationParamSet`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.SegmentationParamset.heading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.SegmentationParamSet.insert_new_params(\n", + " segmentation_method=\"cellpose\",\n", + " paramset_idx=1,\n", + " params=dict(\n", + " diameter=8,\n", + " min_size=2,\n", + " do_3d=False,\n", + " anisotropy=0.5,\n", + " model_type=\"nuclei\",\n", + " channels=[[0, 0]],\n", + " z_axis=0,\n", + " skip_duplicates=True,\n", + " ),\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we've inserted cellpose parameters into the `SegmentationParamSet` table,\n", + "we're almost ready to run image processing. DataJoint uses a `SegmentationTask` table to\n", + "manage which `Volume` and `SegmentationParamSet` should be used during processing. \n", + "\n", + "This table is important for defining several important aspects of\n", + "downstream processing. Let's view the attributes to get a better understanding. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.SegmentationTask.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.SegmentationTask.heading" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `SegmentationTask` table contains two important attributes: \n", + "+ `paramset_idx`\n", + "+ `task_mode`\n", + "\n", + "The `paramset_idx` attribute is tracks\n", + "your segmentation parameter sets. You can choose the parameter set on which\n", + "you want to run segmentation analysis based on this attribute. This\n", + "attribute tells the `Segmentation` table which set of parameters you are\n", + "processing in a given `populate()`.\n", + "\n", + "The `task_mode` attribute can be set to either `load` or `trigger`. When set to `trigger`, the\n", + "segmentation step will run cellpose on the raw data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.SegmentationTask.insert1(\n", + " dict(\n", + " scan_key,\n", + " paramset_idx=1,\n", + " task_mode=\"trigger\",\n", + " ),\n", + " skip_duplicates=True,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For now, Element ZStack only supports triggering cellpose. Now, we can popluate\n", + "the `Segmentation` table. This step may take several hours, depending on your\n", + "computer's capabilities." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "volume.Segmentation.populate(scan_key, display_progress=True)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we can upload our data to BossDB. The `bossdb` schema contains three\n", + "tables to define the upload parameters, the upload tasks, and execute the\n", + "upload. The structure of these tables mirrors the `volume` schema. \n", + "\n", + "Let's begin by viewing the upload parameter table and inserting upload\n", + "parameters into it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bossdb.UploadParamSet.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bossdb.UploadParamSet.heading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bossdb.UploadParamSet.insert_new_params(\n", + " paramset_idx=1,\n", + " paramset_desc=\"test params\",\n", + " params=dict(\n", + " voxel_units=\"micrometers\",\n", + " voxel_size=[1, 1, 1],\n", + " ),\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next step is to define the upload task by naming the collection, experiment,\n", + "and channel where the data should be uploaded. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bossdb.VolumeUploadTask.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bossdb.VolumeUploadTask.heading" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "col_name = \"dataJointTestUpload\"\n", + "exp_name = \"CaImaging\"\n", + "chn_name = \"test10\"\n", + "bossdb.VolumeUploadTask.update1(\n", + " dict(\n", + " scan_key,\n", + " paramset_idx=1,\n", + " collection_name=col_name,\n", + " experiment_name=exp_name,\n", + " channel_name=chn_name,\n", + " upload_type=\"image\",\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "upload_key = (bossdb.VolumeUploadTask & scan_key & \"channel_name = 'test10'\").fetch(\n", + " \"KEY\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bossdb.BossDBURLs.populate(upload_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(bossdb.BossDBURLs & scan_key).fetch1(\"neuroglancer_url\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Explain how to get permission from APL to upload data. \n", + "\n", + "Create a schema to automatically generate neuroglancer link and insert into DJ\n", + "table. \n", + "\n", + "Include BossDBUpload in BossDBURLs as a computed/imported table. " + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:percent" + }, + "kernelspec": { + "display_name": "Python 3.9.13 ('ele')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + }, + "vscode": { + "interpreter": { + "hash": "d00c4ad21a7027bf1726d6ae3a9a6ef39c8838928eca5a3d5f51f3eb68720410" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/workflow_zstack/pipeline.py b/workflow_zstack/pipeline.py index 630f6cf..ba153a0 100644 --- a/workflow_zstack/pipeline.py +++ b/workflow_zstack/pipeline.py @@ -1,8 +1,11 @@ -from element_animal import subject, surgery -from element_calcium_imaging import imaging, imaging_report, scan +import datajoint as dj from element_lab import lab +from element_lab.lab import Lab, Location, Project, Protocol, Source, User +from element_animal import subject, surgery +from element_animal.subject import Subject from element_session import session_with_id as session -from element_zstack import volume +from element_calcium_imaging import imaging, imaging_report, scan +from element_zstack import volume, volume_matching from element_zstack.export import bossdb from . import db_prefix @@ -19,6 +22,7 @@ "subject", "surgery", "volume", + "volume_matching", "bossdb", "Device", "get_session_directory", @@ -29,19 +33,19 @@ # ---------------------------------- Activate schemas ---------------------------------- lab.activate(db_prefix + "lab") - -# subject.activate(db_prefix + "subject", linking_module=__name__) -surgery.activate(db_prefix + "subject", db_prefix + "surgery", linking_module=__name__) +subject.activate(db_prefix + "subject", linking_module=__name__) +surgery.activate(db_prefix + "surgery", linking_module=__name__) Experimenter = lab.User session.activate(db_prefix + "session", linking_module=__name__) Equipment = Device +Session = session.Session +SessionDirectory = session.SessionDirectory imaging.activate(db_prefix + "imaging", db_prefix + "scan", linking_module=__name__) Mask = imaging.Segmentation.Mask -Session = session.Session -SessionDirectory = session.SessionDirectory Scan = scan.Scan volume.activate(db_prefix + "volume", linking_module=__name__) +volume_matching.activate(db_prefix + "volume_matching") bossdb.activate(db_prefix + "bossdb", linking_module=__name__) From 9c1eea2bb4f089c082795e850e1151674efd58a1 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Thu, 20 Apr 2023 17:29:29 -0500 Subject: [PATCH 05/62] Add pytest and devcontainer files --- .devcontainer/Dockerfile | 43 +++ .devcontainer/devcontainer.json | 26 ++ .devcontainer/docker-compose.yaml | 12 + .devcontainer/local/devcontainer.json | 26 ++ .devcontainer/local/docker-compose.yaml | 13 + .gitignore | 1 - tests/__init__.py | 5 + tests/conftest.py | 295 +++++++++++++++++++- tests/test_export.py | 150 ++++++++++ tests/test_ingest.py | 113 +++++++- tests/test_pipeline_generation.py | 34 ++- tests/test_populate.py | 348 ++++++++++++++++++++++++ 12 files changed, 1036 insertions(+), 30 deletions(-) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/docker-compose.yaml create mode 100644 .devcontainer/local/devcontainer.json create mode 100644 .devcontainer/local/docker-compose.yaml create mode 100644 tests/__init__.py create mode 100644 tests/test_export.py create mode 100644 tests/test_populate.py diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..a65d696 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,43 @@ +FROM python:3.9-slim + +RUN \ + adduser --system --disabled-password --shell /bin/bash vscode && \ + # install docker + apt-get update && \ + apt-get install ca-certificates curl gnupg lsb-release -y && \ + mkdir -m 0755 -p /etc/apt/keyrings && \ + curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null && \ + apt-get update && \ + apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y && \ + usermod -aG docker vscode && \ + apt-get clean + +RUN \ + # dev setup + apt update && \ + apt-get install sudo git bash-completion graphviz default-mysql-client s3fs procps -y && \ + usermod -aG sudo vscode && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \ + pip install --no-cache-dir --upgrade black pip && \ + echo '. /etc/bash_completion' >> /home/vscode/.bashrc && \ + echo 'export PS1="\[\e[32;1m\]\u\[\e[m\]@\[\e[34;1m\]\H\[\e[m\]:\[\e[33;1m\]\w\[\e[m\]$ "' >> /home/vscode/.bashrc && \ + # dircolors -b >> /home/vscode/.bashrc && \ # somehow fix colors + apt-get clean +COPY ./requirements.txt /tmp/ +RUN \ + # workflow dependencies + apt-get install gcc ffmpeg libsm6 libxext6 -y && \ + pip install --no-cache-dir -r /tmp/requirements.txt && \ + # clean up + rm /tmp/requirements.txt && \ + apt-get clean + +ENV DJ_HOST fakeservices.datajoint.io +ENV DJ_USER root +ENV DJ_PASS simple + +ENV DATABASE_PREFIX neuro_ + +USER vscode +CMD bash -c "sudo rm /var/run/docker.pid; sudo dockerd" \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..47d92ee --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,26 @@ +{ + "name": "Tutorial", + "dockerComposeFile": "docker-compose.yaml", + "service": "app", + "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + "remoteEnv": { + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + "onCreateCommand": "pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", + "postStartCommand": "docker volume prune -f", + "hostRequirements": { + "cpus": 4, + "memory": "8gb", + "storage": "32gb" + }, + "forwardPorts": [ + 3306 + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml new file mode 100644 index 0000000..28db3c5 --- /dev/null +++ b/.devcontainer/docker-compose.yaml @@ -0,0 +1,12 @@ +version: "3" +services: + app: + extends: + file: ./local-test/docker-compose.yaml + service: app + devices: + - /dev/fuse + cap_add: + - SYS_ADMIN + security_opt: + - apparmor:unconfined \ No newline at end of file diff --git a/.devcontainer/local/devcontainer.json b/.devcontainer/local/devcontainer.json new file mode 100644 index 0000000..af0d917 --- /dev/null +++ b/.devcontainer/local/devcontainer.json @@ -0,0 +1,26 @@ +{ + "name": "Local", + "dockerComposeFile": "docker-compose.yaml", + "service": "app", + "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + "remoteEnv": { + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + "onCreateCommand": "pip install -r ./requirements_dev.txt && pip install -e . && pip install -e ../element-zstack && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", + "postStartCommand": "docker volume prune -f", + "hostRequirements": { + "cpus": 4, + "memory": "8gb", + "storage": "32gb" + }, + "forwardPorts": [ + 3306 + ], + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/local/docker-compose.yaml b/.devcontainer/local/docker-compose.yaml new file mode 100644 index 0000000..0dd6db4 --- /dev/null +++ b/.devcontainer/local/docker-compose.yaml @@ -0,0 +1,13 @@ +version: "3" +services: + app: + cpus: 4 + mem_limit: 8g + build: + context: ../.. + dockerfile: ./.devcontainer/Dockerfile + extra_hosts: + - fakeservices.datajoint.io:127.0.0.1 + volumes: + - ../../..:/workspaces + privileged: true # only because of dind \ No newline at end of file diff --git a/.gitignore b/.gitignore index c454817..560b576 100644 --- a/.gitignore +++ b/.gitignore @@ -33,7 +33,6 @@ nosetests.xml coverage.xml .hypothesis/ .pytest_cache/ -docker-compose.y*ml # C extension, Translations # editors: vscode, emacs, Mac diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..6f83d15 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,5 @@ +""" +See pyproject.toml for config options. +Run all: pytest tests/ +Run one: pytest tests/test_SCRIPT.py -k test_name +""" diff --git a/tests/conftest.py b/tests/conftest.py index 26efb8f..0d95ea5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,8 +8,8 @@ import pytest from element_interface.utils import QuietStdOut, find_full_path, value_to_bool -from workflow_volume.ingest import ingest_sessions -from workflow_volume.paths import get_vol_root_data_dir +from workflow_array_ephys.ingest import ingest_lab, ingest_sessions, ingest_subjects +from workflow_array_ephys.paths import get_ephys_root_data_dir # ------------------- SOME CONSTANTS ------------------- @@ -120,7 +120,7 @@ def dj_config(setup): return -@pytest.fixture(autouse=True, scope="session") +@pytest.fixture(scope="session") def test_data(dj_config): """If data does not exist or partial data is present, attempt download with DJArchive to the first listed root directory""" @@ -128,7 +128,7 @@ def test_data(dj_config): for p in sessions_dirs: try: - find_full_path(get_vol_root_data_dir(), p) + find_full_path(get_ephys_root_data_dir(), p) except FileNotFoundError: test_data_exists = False # If data not found break @@ -160,7 +160,7 @@ def test_data(dj_config): client = djarchive_client.client() - test_data_dir = get_vol_root_data_dir() + test_data_dir = get_ephys_root_data_dir() if isinstance(test_data_dir, list): # if multiple root dirs, first test_data_dir = test_data_dir[0] @@ -175,7 +175,7 @@ def test_data(dj_config): @pytest.fixture(autouse=True, scope="session") def pipeline(): - from workflow_volume import pipeline + from workflow_array_ephys import pipeline yield { "subject": pipeline.subject, @@ -184,7 +184,7 @@ def pipeline(): "probe": pipeline.probe, "ephys_report": pipeline.ephys_report, "session": pipeline.session, - "get_vol_root_data_dir": pipeline.get_vol_root_data_dir, + "get_ephys_root_data_dir": pipeline.get_ephys_root_data_dir, "ephys_mode": pipeline.ephys_mode, } @@ -198,15 +198,122 @@ def ingest_data(setup, pipeline, test_data): """For each input, generates csv in test_user_data_dir and ingests in schema""" # CSV as list of 3: filename, relevant tables, content all_csvs = { - "file.csv": { + "lab/labs.csv": { "func": null_function, "args": {}, - "content": ["header,one,two", "info,a,b"], + "content": [ + "lab,lab_name,organization,org_name,address," + + "time_zone,location,location_description", + "LabA,The Example Lab,Uni1,Example Uni,'221B Baker St,London NW1 6XE,UK'," + + "UTC+0,Example Building,'2nd floor lab dedicated to all fictional experiments.'", + "LabB,The Other Lab,Uni2,Other Uni,'Oxford OX1 2JD, United Kingdom'," + + "UTC+0,Other Building,'fictional campus dedicated to imaginaryexperiments.'", + ], }, - "session.csv": { - "func": ingest_sessions, + "lab/projects.csv": { + "func": null_function, + "args": {}, + "content": [ + "project,project_description,project_title,project_start_date," + + "repository_url,repository_name,codeurl", + "ProjA,Example project to populate element-lab," + + "Example project to populate element-lab,2020-01-01," + + "https://github.com/datajoint/element-lab/," + + "element-lab,https://github.com/datajoint/element" + + "-lab/tree/main/element_lab", + "ProjB,Other example project to populate element-lab," + + "Other example project to populate element-lab,2020-01-02," + + "https://github.com/datajoint/element-session/," + + "element-session,https://github.com/datajoint/" + + "element-session/tree/main/element_session", + ], + }, + "lab/project_users.csv": { + "func": null_function, + "args": {}, + "content": [ + "user,project", + "Sherlock,ProjA", + "Sherlock,ProjB", + "Watson,ProjB", + "Dr. Candace Pert,ProjA", + "User1,ProjA", + ], + }, + "lab/publications.csv": { + "func": null_function, "args": {}, - "content": ["header,one,two", "info,a,b"], + "content": [ + "project,publication", + "ProjA,arXiv:1807.11104", + "ProjA,arXiv:1807.11104v1", + ], + }, + "lab/keywords.csv": { + "func": null_function, + "args": {}, + "content": [ + "project,keyword", + "ProjA,Study", + "ProjA,Example", + "ProjB,Alternate", + ], + }, + "lab/protocols.csv": { + "func": null_function, + "args": {}, + "content": [ + "protocol,protocol_type,protocol_description", + "ProtA,IRB expedited review,Protocol for managing " + "data ingestion", + "ProtB,Alternative Method,Limited protocol for " + "piloting only", + ], + }, + "lab/users.csv": { + "func": ingest_lab, + "args": { + "lab_csv_path": f"{test_user_data_dir}/lab/labs.csv", + "project_csv_path": f"{test_user_data_dir}/lab/projects.csv", + "publication_csv_path": f"{test_user_data_dir}/lab/publications.csv", + "keyword_csv_path": f"{test_user_data_dir}/lab/keywords.csv", + "protocol_csv_path": f"{test_user_data_dir}/lab/protocols.csv", + "users_csv_path": f"{test_user_data_dir}/lab/users.csv", + "project_user_csv_path": f"{test_user_data_dir}/lab/project_users.csv", + }, + "content": [ + "lab,user,user_role,user_email,user_cellphone", + "LabA,Sherlock,PI,Sherlock@BakerSt.com," + "+44 20 7946 0344", + "LabA,Watson,Dr,DrWatson@BakerSt.com,+44 73 8389 1763", + "LabB,Dr. Candace Pert,PI,Pert@gmail.com," + "+44 74 4046 5899", + "LabA,User1,Lab Tech,fake@email.com,+44 1632 960103", + "LabB,User2,Lab Tech,fake2@email.com,+44 1632 960102", + ], + }, + "subjects.csv": { + "func": ingest_subjects, + "args": {"subject_csv_path": f"{test_user_data_dir}/subjects.csv"}, + "content": [ + "subject,sex,subject_birth_date,subject_description", + "subject1,F,2020-01-01 00:00:01,dl56", + "subject2,M,2020-01-01 00:00:01,SC035", + "subject3,M,2020-01-01 00:00:01,SC038", + "subject4,M,2020-01-01 00:00:01,oe_talab", + "subject5,F,2020-01-01 00:00:01,rich", + "subject6,F,2020-01-01 00:00:01,manuel", + ], + }, + "sessions.csv": { + "func": ingest_sessions, + "args": {"session_csv_path": f"{test_user_data_dir}/sessions.csv"}, + "content": [ + "subject,session_dir,session_note,user", + f"subject1,{sessions_dirs[0]},Data collection notes,User2", + f"subject2,{sessions_dirs[1]},Data collection notes,User2", + f"subject2,{sessions_dirs[2]},Interrupted session,User2", + f"subject3,{sessions_dirs[3]},Data collection notes,User1", + f"subject4,{sessions_dirs[4]},Successful data collection,User2", + f"subject5,{sessions_dirs[5]},Successful data collection,User1", + f"subject6,{sessions_dirs[6]},Ambient temp abnormally low,User2", + ], }, } # If data in last table, presume didn't tear down last time, skip insert @@ -223,3 +330,167 @@ def ingest_data(setup, pipeline, test_data): for csv in all_csvs: csv_path = test_user_data_dir / csv csv_path.unlink() + + +@pytest.fixture(scope="session") +def testdata_paths(): + """Paths for test data 'subjectX/sessionY/probeZ/etc'""" + return { + "npx3A-p1-ks": "subject5/session1/probe_1/ks2.1_01", + "npx3A-p2-ks": "subject5/session1/probe_2/ks2.1_01", + "oe_npx3B-ks": "subject4/experiment1/recording1/continuous/" + + "Neuropix-PXI-100.0/ks", + "sglx_npx3A-p1": "subject5/session1/probe_1", + "oe_npx3B": "subject4/experiment1/recording1/continuous/" + + "Neuropix-PXI-100.0", + "sglx_npx3B-p1": "subject6/session1/towersTask_g0_imec0", + "npx3B-p1-ks": "subject6/session1/towersTask_g0_imec0", + } + + +@pytest.fixture(scope="session") +def ephys_insertionlocation(pipeline, ingest_data): + """Insert probe location into ephys.InsertionLocation""" + ephys = pipeline["ephys"] + + for probe_insertion_key in ephys.ProbeInsertion.fetch("KEY"): + ephys.InsertionLocation.insert1( + dict( + **probe_insertion_key, + skull_reference="Bregma", + ap_location=0, + ml_location=0, + depth=0, + theta=0, + phi=0, + beta=0, + ), + skip_duplicates=True, + ) + yield + + if _tear_down: + with verbose_context: + ephys.InsertionLocation.delete() + + +@pytest.fixture(scope="session") +def kilosort_paramset(pipeline): + """Insert kilosort parameters into ephys.ClusteringParamset""" + ephys = pipeline["ephys"] + + params_ks = { + "fs": 30000, + "fshigh": 150, + "minfr_goodchannels": 0.1, + "Th": [10, 4], + "lam": 10, + "AUCsplit": 0.9, + "minFR": 0.02, + "momentum": [20, 400], + "sigmaMask": 30, + "ThPr": 8, + "spkTh": -6, + "reorder": 1, + "nskip": 25, + "GPU": 1, + "Nfilt": 1024, + "nfilt_factor": 4, + "ntbuff": 64, + "whiteningRange": 32, + "nSkipCov": 25, + "scaleproc": 200, + "nPCs": 3, + "useRAM": 0, + } + + # Insert here, since most of the test will require this paramset inserted + ephys.ClusteringParamSet.insert_new_params( + clustering_method="kilosort2.5", + paramset_desc="Spike sorting using Kilosort2.5", + params=params_ks, + paramset_idx=0, + ) + + yield params_ks + + if _tear_down: + with verbose_context: + (ephys.ClusteringParamSet & "paramset_idx = 0").delete() + + +@pytest.fixture(scope="session") +def ephys_recordings(pipeline, ingest_data): + """Populate ephys.EphysRecording""" + ephys = pipeline["ephys"] + + ephys.EphysRecording.populate() + + yield + + if _tear_down: + with verbose_context: + ephys.EphysRecording.delete() + + +@pytest.fixture(scope="session") +def clustering_tasks(pipeline, kilosort_paramset, ephys_recordings): + """Insert keys from ephys.EphysRecording into ephys.Clustering""" + ephys = pipeline["ephys"] + + for ephys_rec_key in (ephys.EphysRecording - ephys.ClusteringTask).fetch("KEY"): + ephys_file_path = pathlib.Path( + ((ephys.EphysRecording.EphysFile & ephys_rec_key).fetch("file_path"))[0] + ) + ephys_file = find_full_path(get_ephys_root_data_dir(), ephys_file_path) + recording_dir = ephys_file.parent + kilosort_dir = next(recording_dir.rglob("spike_times.npy")).parent + ephys.ClusteringTask.insert1( + { + **ephys_rec_key, + "paramset_idx": 0, + "task_mode": "load", + "clustering_output_dir": kilosort_dir.as_posix(), + }, + skip_duplicates=True, + ) + + yield + + if _tear_down: + with verbose_context: + ephys.ClusteringTask.delete() + + +@pytest.fixture(scope="session") +def clustering(clustering_tasks, pipeline): + """Populate ephys.Clustering""" + ephys = pipeline["ephys"] + + ephys.Clustering.populate() + + yield + + if _tear_down: + with verbose_context: + ephys.Clustering.delete() + + +@pytest.fixture(scope="session") +def curations(clustering, pipeline): + """Insert keys from ephys.ClusteringTask into ephys.Curation""" + ephys_mode = pipeline["ephys_mode"] + + if ephys_mode == "no-curation": + yield + else: + ephys = pipeline["ephys"] + + for key in (ephys.ClusteringTask - ephys.Curation).fetch("KEY"): + ephys.Curation().create1_from_clustering_task(key) + + yield + + if _tear_down: + with verbose_context: + ephys.Curation.delete() diff --git a/tests/test_export.py b/tests/test_export.py new file mode 100644 index 0000000..6328eab --- /dev/null +++ b/tests/test_export.py @@ -0,0 +1,150 @@ +import datetime +import time + +from element_interface.utils import find_full_path, find_root_directory +from pynwb.ecephys import ElectricalSeries + +from workflow_array_ephys.export import ( + ecephys_session_to_nwb, + session_to_nwb, + write_nwb, +) + + +def test_session_to_nwb(setup, pipeline, ingest_data): + verbose_context, _ = setup + + with verbose_context: + nwbfile = session_to_nwb( + **{ + "session_key": { + "subject": "subject5", + "session_datetime": datetime.datetime(2018, 7, 3, 20, 32, 28), + }, + "lab_key": {"lab": "LabA"}, + "protocol_key": {"protocol": "ProtA"}, + "project_key": {"project": "ProjA"}, + } + ) + + assert nwbfile.session_id == "subject5_2018-07-03T20:32:28" + assert nwbfile.session_description == "Successful data collection" + # when saved in NWB, converts local to UTC + assert nwbfile.session_start_time == datetime.datetime( + 2018, 7, 3, 20, 32, 28 + ).astimezone(datetime.timezone.utc) + assert nwbfile.experimenter == ["User1"] + + assert nwbfile.subject.subject_id == "subject5" + assert nwbfile.subject.sex == "F" + + assert nwbfile.institution == "Example Uni" + assert nwbfile.lab == "The Example Lab" + + assert nwbfile.protocol == "ProtA" + assert nwbfile.notes == "Protocol for managing data ingestion" + + assert nwbfile.experiment_description == "Example project to populate element-lab" + + +def test_write_to_nwb( + setup, + pipeline, + ingest_data, + ephys_insertionlocation, + kilosort_paramset, + ephys_recordings, + clustering_tasks, + clustering, + curations, +): + verbose_context, verbose = setup + ephys = pipeline["ephys"] + + session_key = dict(subject="subject5", session_datetime="2018-07-03 20:32:28") + + ephys.LFP.populate(session_key, display_progress=verbose) + ephys.CuratedClustering.populate(session_key, display_progress=verbose) + ephys.WaveformSet.populate(session_key, display_progress=verbose) + + ecephys_kwargs = { + "session_key": session_key, + "raw": True, + "spikes": True, + "lfp": "dj", + "end_frame": 250, + } + + with verbose_context: + nwbfile = ecephys_session_to_nwb(**ecephys_kwargs) + + root_dirs = pipeline["get_ephys_root_data_dir"]() + root_dir = find_root_directory( + root_dirs, + find_full_path( + root_dirs, + (pipeline["session"].SessionDirectory & session_key).fetch1("session_dir"), + ), + ) + + write_nwb(nwbfile, root_dir / time.strftime("_test_%Y%m%d-%H%M%S.nwb")) + + +def test_convert_to_nwb( + setup, + pipeline, + ingest_data, + ephys_insertionlocation, + kilosort_paramset, + ephys_recordings, + clustering_tasks, + clustering, + curations, +): + verbose_context, verbose = setup + ephys = pipeline["ephys"] + + session_key = dict(subject="subject5", session_datetime="2018-07-03 20:32:28") + + ephys.CuratedClustering.populate(session_key, display_progress=verbose) + ephys.WaveformSet.populate(session_key, display_progress=verbose) + + ecephys_kwargs = { + "session_key": session_key, + "end_frame": 250, + "spikes": True, + "lab_key": {"lab": "LabA"}, + "protocol_key": {"protocol": "ProtA"}, + "project_key": {"project": "ProjA"}, + } + + with verbose_context: + nwbfile = ecephys_session_to_nwb(**ecephys_kwargs) + + for x in ("262716621", "714000838"): + assert x in nwbfile.devices + + assert len(nwbfile.electrodes) == 1920 + for col in ("shank", "shank_row", "shank_col"): + assert col in nwbfile.electrodes + + for es_name in ("ElectricalSeries1", "ElectricalSeries2"): + es = nwbfile.acquisition[es_name] + assert isinstance(es, ElectricalSeries) + assert es.conversion == 2.34375e-06 + + # make sure the ElectricalSeries objects don't share electrodes + assert not set(nwbfile.acquisition["ElectricalSeries1"].electrodes.data) & set( + nwbfile.acquisition["ElectricalSeries2"].electrodes.data + ) + + assert len(nwbfile.units) == 499 + + for col in ("cluster_quality_label", "spike_depths"): + assert col in nwbfile.units + + for es_name in ("ElectricalSeries1", "ElectricalSeries2"): + es = nwbfile.processing["ecephys"].data_interfaces["LFP"][es_name] + assert isinstance(es, ElectricalSeries) + assert es.conversion == 4.6875e-06 + assert es.rate == 2500.0 diff --git a/tests/test_ingest.py b/tests/test_ingest.py index 916c3a7..52b133d 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -1,4 +1,109 @@ -def test_ingest_volume(pipeline, ingest_data): - """Check length of various Volume schema tables""" - volume = pipeline["Volume"] - assert len(volume.Volume()) == 2, f"Check Volume: len={len(volume.Volume())}" +import os +import pathlib +import sys + +from element_interface.utils import find_full_path, find_root_directory + +docker_root = "/main/test_data/workflow_ephys_data1" + + +def test_ingest_subjects(pipeline, ingest_data): + """Check number of subjects inserted into the `subject.Subject` table""" + subject = pipeline["subject"] + assert len(subject.Subject()) == 6 + + +def test_ingest_sessions(pipeline, ingest_data): + ephys = pipeline["ephys"] + probe = pipeline["probe"] + session = pipeline["session"] + + assert len(session.Session()) == 7 + assert len(probe.Probe()) == 9 + assert len(ephys.ProbeInsertion()) == 13 + + session_info = ingest_data["sessions.csv"]["content"][1].split(",") + + assert (session.SessionDirectory & {"subject": session_info[0]}).fetch1( + "session_dir" + ) == session_info[1] + + +def test_find_valid_full_path(pipeline, ingest_data): + + if not os.environ.get("IS_DOCKER", False): + return # It doesn't make sense to assert the root testing locally + + get_ephys_root_data_dir = pipeline["get_ephys_root_data_dir"] + ephys_root_data_dir = ( + [get_ephys_root_data_dir()] + if not isinstance(get_ephys_root_data_dir(), list) + else get_ephys_root_data_dir() + ) + + # add more options for root directories + if sys.platform == "win32": # win32 even if Windows 64-bit + ephys_root_data_dir = ephys_root_data_dir + ["J:/", "M:/"] + else: + ephys_root_data_dir = ephys_root_data_dir + ["mnt/j", "mnt/m"] + + # test: providing relative-path: correctly search for the full-path + session_info = ingest_data["sessions.csv"]["content"][1].split(",") + + session_full_path = find_full_path(ephys_root_data_dir, session_info[1]) + + full_path = pathlib.Path(docker_root, "subject1/session1") + + assert full_path == session_full_path, str( + "Session path does not match docker root:" + + f"\n\t{full_path}\n\t{session_full_path}" + ) + + +def test_find_root_directory(pipeline, ingest_data): + """ + Test that `find_root_directory` works correctly. + """ + + get_ephys_root_data_dir = pipeline["get_ephys_root_data_dir"] + ephys_root_data_dir = ( + [get_ephys_root_data_dir()] + if not isinstance(get_ephys_root_data_dir(), list) + else get_ephys_root_data_dir() + ) + # add more options for root directories + if sys.platform == "win32": + ephys_root_data_dir = ephys_root_data_dir + ["J:/", "M:/"] + else: + ephys_root_data_dir = ephys_root_data_dir + ["mnt/j", "mnt/m"] + + ephys_root_data_dir = [pathlib.Path(p) for p in ephys_root_data_dir] + + # test: providing full-path: correctly search for the root_dir + session_info = ingest_data["sessions.csv"]["content"][1].split(",") + + if os.environ.get("IS_DOCKER", False): + session_full_path = pathlib.Path(docker_root, session_info[1]) + root_dir = find_root_directory(ephys_root_data_dir, session_full_path) + assert ( + root_dir.as_posix() == docker_root + ), f"Root path does not match: {docker_root}" + else: + session_full_path = find_full_path(get_ephys_root_data_dir(), session_info[1]) + root_dir = find_root_directory(ephys_root_data_dir, session_full_path) + assert root_dir in ephys_root_data_dir, "Problems finding root dir" + + +def test_paramset_insert(kilosort_paramset, pipeline): + ephys = pipeline["ephys"] + from element_interface.utils import dict_to_uuid + + method, desc, paramset_hash = ( + ephys.ClusteringParamSet & {"paramset_idx": 0} + ).fetch1("clustering_method", "paramset_desc", "param_set_hash") + assert method == "kilosort2.5" + assert desc == "Spike sorting using Kilosort2.5" + assert ( + dict_to_uuid({**kilosort_paramset, "clustering_method": method}) + == paramset_hash + ) diff --git a/tests/test_pipeline_generation.py b/tests/test_pipeline_generation.py index 7e7d740..9311a20 100644 --- a/tests/test_pipeline_generation.py +++ b/tests/test_pipeline_generation.py @@ -1,18 +1,26 @@ def test_generate_pipeline(pipeline): + subject = pipeline["subject"] session = pipeline["session"] - volume = pipeline["volume"] + ephys = pipeline["ephys"] + probe = pipeline["probe"] + ephys_report = pipeline["ephys_report"] - volume_children = volume.volume.children() - assert volume.Volume.full_table_name in volume_children - assert session.Session.volume.full_table_name in volume_children + # test elements connection from lab, subject to Session + assert subject.Subject.full_table_name in session.Session.parents() - # test connection Subject -> schema children - session_children_links = session.Session.children() - session_children_list = [ - volume.Volume, - ] + # test elements connection from Session to probe, ephys, ephys_report + assert session.Session.full_table_name in ephys.ProbeInsertion.parents() + assert probe.Probe.full_table_name in ephys.ProbeInsertion.parents() + assert "spike_times" in (ephys.CuratedClustering.Unit.heading.secondary_attributes) - for child in session_children_list: - assert ( - child.full_table_name in session_children_links - ), f"session.Session.children() did not include {child.full_table_name}" + assert all( + [ + ephys.CuratedClustering.full_table_name + in ephys_report.ProbeLevelReport.parents(), + ephys.CuratedClustering.Unit.full_table_name + in ephys_report.UnitLevelReport.parents(), + ] + ) + + # test the connection between quality metric tables + assert ephys.QualityMetrics.full_table_name in ephys_report.QualityMetricSet.parents() \ No newline at end of file diff --git a/tests/test_populate.py b/tests/test_populate.py new file mode 100644 index 0000000..40dea92 --- /dev/null +++ b/tests/test_populate.py @@ -0,0 +1,348 @@ +import json + +import numpy as np +import pandas as pd + + +def test_ephys_recording_populate(pipeline, ephys_recordings): + ephys = pipeline["ephys"] + assert len(ephys.EphysRecording()) == 13 + + +def test_LFP_populate_npx3B_OpenEphys(testdata_paths, pipeline, ephys_recordings): + """ + Populate ephys.LFP with OpenEphys items, + recording Neuropixels Phase 3B (Neuropixels 1.0) probe + """ + ephys = pipeline["ephys"] + rel_path = testdata_paths["oe_npx3B"] + rec_key = ( + ephys.EphysRecording + & (ephys.EphysRecording.EphysFile & f'file_path LIKE "%{rel_path}"') + ).fetch1("KEY") + ephys.LFP.populate(rec_key) + + lfp_mean = (ephys.LFP & rec_key).fetch1("lfp_mean") + assert len(lfp_mean) == 520054 + + electrodes = (ephys.LFP.Electrode & rec_key).fetch("electrode") + assert np.array_equal( + electrodes, + np.array( + [ + 5, + 14, + 23, + 32, + 41, + 50, + 59, + 68, + 77, + 86, + 95, + 104, + 113, + 122, + 131, + 140, + 149, + 158, + 167, + 176, + 185, + 194, + 203, + 212, + 221, + 230, + 239, + 248, + 257, + 266, + 275, + 284, + 293, + 302, + 311, + 320, + 329, + 338, + 347, + 356, + 365, + 374, + 383, + ] + ), + ) + + +def test_LFP_populate_npx3A_SpikeGLX(testdata_paths, pipeline, ephys_recordings): + """Populate ephys.LFP with SpikeGLX items, recording Neuropixels Phase 3A probe""" + ephys = pipeline["ephys"] + + rel_path = testdata_paths["sglx_npx3A-p1"] + rec_key = ( + ephys.EphysRecording + & (ephys.EphysRecording.EphysFile & f'file_path LIKE "%{rel_path}%"') + ).fetch1("KEY") + ephys.LFP.populate(rec_key) + + lfp_mean = (ephys.LFP & rec_key).fetch1("lfp_mean") + assert len(lfp_mean) == 846666 + + electrodes = (ephys.LFP.Electrode & rec_key).fetch("electrode") + assert np.array_equal( + electrodes, + np.array( + [ + 5, + 14, + 23, + 32, + 41, + 50, + 59, + 68, + 77, + 86, + 95, + 104, + 113, + 122, + 131, + 140, + 149, + 158, + 167, + 176, + 185, + 194, + 203, + 212, + 221, + 230, + 239, + 248, + 257, + 266, + 275, + 284, + 293, + 302, + 311, + 320, + 329, + 338, + 347, + 356, + 365, + 374, + 383, + ] + ), + ) + + +def test_LFP_populate_npx3B_SpikeGLX(testdata_paths, pipeline, ephys_recordings): + """ + Populate ephys.LFP with SpikeGLX items, + recording Neuropixels Phase 3B (Neuropixels 1.0) probe + """ + + ephys = pipeline["ephys"] + + rel_path = testdata_paths["sglx_npx3B-p1"] + rec_key = ( + ephys.EphysRecording + & (ephys.EphysRecording.EphysFile & f'file_path LIKE "%{rel_path}%"') + ).fetch1("KEY") + ephys.LFP.populate(rec_key) + + lfp_mean = (ephys.LFP & rec_key).fetch1("lfp_mean") + assert len(lfp_mean) == 4769946 + + electrodes = (ephys.LFP.Electrode & rec_key).fetch("electrode") + assert np.array_equal( + electrodes, + np.array( + [ + 5, + 14, + 23, + 32, + 41, + 50, + 59, + 68, + 77, + 86, + 95, + 104, + 113, + 122, + 131, + 140, + 149, + 158, + 167, + 176, + 185, + 194, + 203, + 212, + 221, + 230, + 239, + 248, + 257, + 266, + 275, + 284, + 293, + 302, + 311, + 320, + 329, + 338, + 347, + 356, + 365, + 374, + 383, + ] + ), + ) + + +def test_clustering_populate(clustering, pipeline): + ephys = pipeline["ephys"] + assert len(ephys.Clustering()) == 13 + + +def test_curated_clustering_populate(curations, pipeline, testdata_paths): + """Populate ephys.CuratedClustering with multiple recordings""" + ephys = pipeline["ephys"] + + rel_path = testdata_paths["npx3A-p1-ks"] + curation_key = _get_curation_key(rel_path, pipeline) + ephys.CuratedClustering.populate(curation_key) + assert ( + len( + ephys.CuratedClustering.Unit + & curation_key + & 'cluster_quality_label = "good"' + ) + == 76 + ) + + rel_path = testdata_paths["oe_npx3B-ks"] + curation_key = _get_curation_key(rel_path, pipeline) + ephys.CuratedClustering.populate(curation_key) + assert ( + len( + ephys.CuratedClustering.Unit + & curation_key + & 'cluster_quality_label = "good"' + ) + == 68 + ) + + rel_path = testdata_paths["npx3B-p1-ks"] + curation_key = _get_curation_key(rel_path, pipeline) + ephys.CuratedClustering.populate(curation_key) + assert ( + len( + ephys.CuratedClustering.Unit + & curation_key + & 'cluster_quality_label = "good"' + ) + == 55 + ) + + +def test_waveform_populate_npx3B_OpenEphys(curations, pipeline, testdata_paths): + """ + Populate ephys.WaveformSet with OpenEphys + Neuropixels Phase 3B (Neuropixels 1.0) probe + """ + ephys = pipeline["ephys"] + rel_path = testdata_paths["oe_npx3B-ks"] + curation_key = _get_curation_key(rel_path, pipeline) + ephys.CuratedClustering.populate(curation_key) + ephys.WaveformSet.populate(curation_key) + + waveforms = np.vstack( + (ephys.WaveformSet.PeakWaveform & curation_key).fetch("peak_electrode_waveform") + ) + + assert waveforms.shape == (204, 64) + + +def test_waveform_populate_npx3B_SpikeGLX(curations, pipeline, testdata_paths): + """ + Populate ephys.WaveformSet with SpikeGLX + Neuropixels Phase 3B (Neuropixels 1.0) probe + """ + + ephys = pipeline["ephys"] + + rel_path = testdata_paths["npx3B-p1-ks"] + curation_key = _get_curation_key(rel_path, pipeline) + ephys.CuratedClustering.populate(curation_key) + ephys.WaveformSet.populate(curation_key) + + waveforms = np.vstack( + (ephys.WaveformSet.PeakWaveform & curation_key).fetch("peak_electrode_waveform") + ) + + assert waveforms.shape == (150, 64) + + +def test_build_electrode_layouts(pipeline): + """ + Test build_electrode_layouts function in probe.py + """ + + # Load probe configuration + f = open("user_data/neuropixels_probes_config.json") + probe_configs = json.load(f) + # Load ground truth table for each probe type + truth_df = pd.read_csv("user_data/probe_type_electrode.csv") + + probe = pipeline["probe"] + + for probe_type, config in probe_configs.items(): + + test_df = pd.DataFrame(probe.build_electrode_layouts(probe_type, **config)) + + test_arr = np.array(test_df.drop(columns=["probe_type"]), dtype=np.int16) + truth_arr = np.array( + truth_df.loc[truth_df["probe_type"] == probe_type].drop( + columns=["probe_type"] + ), + dtype=np.int16, + ) + assert np.array_equal( + test_arr, truth_arr + ), f"probe type '{probe_type}' electrode layout does not match" + + +# ---- HELPER FUNCTIONS ---- + + +def _get_curation_key(output_relative_path, pipeline): + ephys = pipeline["ephys"] + ephys_mode = pipeline["ephys_mode"] + + if ephys_mode == "no-curation": + EphysCuration = ephys.ClusteringTask + output_dir_attr_name = "clustering_output_dir" + else: + EphysCuration = ephys.Curation + output_dir_attr_name = "curation_output_dir" + + return ( + EphysCuration & f'{output_dir_attr_name} LIKE "%{output_relative_path}"' + ).fetch1("KEY") From b3be149e93ae07a894d3f4bd1f3103fa194f74d9 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Thu, 27 Apr 2023 11:30:26 -0500 Subject: [PATCH 06/62] General code cleanup and update notebooks --- notebooks/tutorial.ipynb | 435 +++++++++++++++--------------- pyproject.toml | 2 +- requirements.txt | 2 +- setup.py | 8 +- tests/conftest.py | 404 ++++----------------------- tests/test_export.py | 166 ++---------- tests/test_ingest.py | 109 -------- tests/test_pipeline_generation.py | 26 +- tests/test_populate.py | 348 ------------------------ workflow_zstack/paths.py | 2 +- workflow_zstack/pipeline.py | 3 +- 11 files changed, 317 insertions(+), 1188 deletions(-) delete mode 100644 tests/test_ingest.py delete mode 100644 tests/test_populate.py diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb index d450e00..60af172 100644 --- a/notebooks/tutorial.ipynb +++ b/notebooks/tutorial.ipynb @@ -7,16 +7,21 @@ "tags": [] }, "source": [ - "# Process volumetric microscopic calcium imaging data with DataJoint Elements\n", + "# Process volumetric fluorescent microscopy data with DataJoint Elements\n", "\n", "This notebook will walk through processing volumetric two-photon calcium imaging data collected\n", "from ScanImage and segmented with cellpose. While anyone can work through this\n", - "notebook to process volumetric microscopic calcium imaging data through DataJoint's\n", + "notebook to process volumetric fluorescent microscopy data through DataJoint's\n", "`element-zstack` pipeline, for a detailed tutorial about the fundamentals of\n", "DataJoint including table types, make functions, and querying, please see the\n", "[DataJoint Tutorial](https://github.com/datajoint/datajoint-tutorials).\n", "\n", - "The DataJoint Python API and Element Calcium Imaging offer a lot of features to\n", + "**Please note that uploading data to BossDB via this pipeline requires an API\n", + "token which can be obtained by creating an account at\n", + "[api.bossdb.io](https://api.bossdb.io). You will also need resource manager\n", + "permissions from the team at [BossDB](https://bossdb.org).**\n", + "\n", + "The DataJoint Python API and Element ZStack offer a lot of features to\n", "support collaboration, automation, reproducibility, and visualizations.\n", "For more information on these topics, please visit our documentation: \n", " \n", @@ -98,21 +103,19 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2023-04-20 15:28:53,314][WARNING]: lab.Project and related tables will be removed in a future version of Element Lab. Please use the project schema.\n", - "[2023-04-20 15:28:53,339][INFO]: Connecting kushalbakshi2@tutorial-db.datajoint.io:3306\n", - "[2023-04-20 15:28:53,788][INFO]: Connected kushalbakshi2@tutorial-db.datajoint.io:3306\n" - ] - } - ], + "outputs": [], "source": [ - "from workflow_zstack.pipeline import lab, subject, session, scan, volume, volume_matching, bossdb, get_session_directory, get_volume_root_data_dir" + "from workflow_zstack.pipeline import (\n", + " lab,\n", + " subject,\n", + " session,\n", + " scan,\n", + " volume,\n", + " volume_matching,\n", + " bossdb,\n", + ")" ] }, { @@ -123,206 +126,216 @@ { "data": { "image/svg+xml": [ - "\n", + "\n", "\n", - "\n", - "\n", + "\n", + "\n", "\n", - "volume_matching.VolumeMatch\n", - "\n", - "\n", - "volume_matching.VolumeMatch\n", + "volume_matching.VolumeMatch.CommonMask\n", + "\n", + "\n", + "volume_matching.VolumeMatch.CommonMask\n", "\n", "\n", "\n", - "\n", - "\n", - "volume_matching.VolumeMatch.Transformation\n", - "\n", - "\n", - "volume_matching.VolumeMatch.Transformation\n", + "\n", + "\n", + "volume_matching.VolumeMatch.VolumeMask\n", + "\n", + "\n", + "volume_matching.VolumeMatch.VolumeMask\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "volume_matching.VolumeMatch->volume_matching.VolumeMatch.Transformation\n", - "\n", + "volume_matching.VolumeMatch.CommonMask->volume_matching.VolumeMatch.VolumeMask\n", + "\n", "\n", - "\n", + "\n", "\n", - "volume_matching.VolumeMatch.CommonMask\n", - "\n", - "\n", - "volume_matching.VolumeMatch.CommonMask\n", + "volume_matching.VolumeMatchTask\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask\n", "\n", "\n", "\n", - "\n", - "\n", - "volume_matching.VolumeMatch.VolumeMask\n", - "\n", - "\n", - "volume_matching.VolumeMatch.VolumeMask\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask.Volume\n", + "\n", + "\n", + "volume_matching.VolumeMatchTask.Volume\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "volume_matching.VolumeMatch.CommonMask->volume_matching.VolumeMatch.VolumeMask\n", - "\n", + "volume_matching.VolumeMatchTask->volume_matching.VolumeMatchTask.Volume\n", + "\n", "\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask.Volume\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask.Volume\n", + "\n", + "\n", + "volume_matching.VolumeMatch\n", + "\n", + "\n", + "volume_matching.VolumeMatch\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "volume_matching.VolumeMatchTask.Volume->volume_matching.VolumeMatch.Transformation\n", - "\n", + "volume_matching.VolumeMatchTask->volume_matching.VolumeMatch\n", + "\n", + "\n", + "\n", + "\n", + "volume.Segmentation\n", + "\n", + "\n", + "volume.Segmentation\n", + "\n", "\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask.Volume->volume_matching.VolumeMatch.VolumeMask\n", - "\n", "\n", "\n", "\n", "volume.Segmentation.Mask\n", "\n", - "\n", - "volume.Segmentation.Mask\n", + "\n", + "volume.Segmentation.Mask\n", "\n", "\n", "\n", - "\n", + "\n", + "\n", + "volume.Segmentation->volume.Segmentation.Mask\n", + "\n", + "\n", + "\n", "\n", + "volume.Segmentation->volume_matching.VolumeMatchTask.Volume\n", + "\n", + "\n", + "\n", + "\n", "volume.Segmentation.Mask->volume_matching.VolumeMatch.VolumeMask\n", - "\n", + "\n", "\n", "\n", "\n", "volume.Volume\n", "\n", - "\n", - "volume.Volume\n", + "\n", + "volume.Volume\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "volume.VoxelSize\n", + "\n", + "\n", + "volume.VoxelSize\n", "\n", "\n", "\n", + "\n", + "\n", + "volume.Volume->volume.VoxelSize\n", + "\n", + "\n", "\n", "\n", "bossdb.VolumeUploadTask\n", - "\n", - "\n", - "bossdb.VolumeUploadTask\n", + "\n", + "\n", + "bossdb.VolumeUploadTask\n", "\n", "\n", "\n", "\n", - "\n", + "\n", "volume.Volume->bossdb.VolumeUploadTask\n", - "\n", + "\n", "\n", "\n", - "\n", + "\n", "volume.SegmentationTask\n", - "\n", - "\n", - "volume.SegmentationTask\n", + "\n", + "\n", + "volume.SegmentationTask\n", "\n", "\n", "\n", "\n", - "\n", + "\n", "volume.Volume->volume.SegmentationTask\n", - "\n", + "\n", "\n", - "\n", - "\n", - "volume.SegmentationParamset\n", - "\n", - "\n", - "volume.SegmentationParamset\n", + "\n", + "\n", + "volume_matching.VolumeMatch.Transformation\n", + "\n", + "\n", + "volume_matching.VolumeMatch.Transformation\n", "\n", "\n", "\n", - "\n", - "\n", - "volume.SegmentationParamset->volume.SegmentationTask\n", - "\n", - "\n", "\n", - "\n", + "\n", "bossdb.BossDBURLs\n", - "\n", - "\n", - "bossdb.BossDBURLs\n", + "\n", + "\n", + "bossdb.BossDBURLs\n", "\n", "\n", "\n", "\n", - "\n", - "bossdb.VolumeUploadTask->bossdb.BossDBURLs\n", - "\n", - "\n", - "\n", - "\n", - "volume.Segmentation\n", - "\n", - "\n", - "volume.Segmentation\n", - "\n", - "\n", - "\n", - "\n", "\n", - "volume.Segmentation->volume_matching.VolumeMatchTask.Volume\n", - "\n", + "bossdb.VolumeUploadTask->bossdb.BossDBURLs\n", + "\n", "\n", - "\n", + "\n", "\n", - "volume.Segmentation->volume.Segmentation.Mask\n", - "\n", - "\n", - "\n", - "\n", - "bossdb.UploadParamSet\n", - "\n", - "\n", - "bossdb.UploadParamSet\n", - "\n", - "\n", + "volume_matching.VolumeMatchTask.Volume->volume_matching.VolumeMatch.Transformation\n", + "\n", "\n", - "\n", + "\n", "\n", - "bossdb.UploadParamSet->bossdb.VolumeUploadTask\n", - "\n", + "volume_matching.VolumeMatchTask.Volume->volume_matching.VolumeMatch.VolumeMask\n", + "\n", "\n", - "\n", - "\n", - "scan.Scan\n", - "\n", - "\n", - "scan.Scan\n", + "\n", + "\n", + "volume.SegmentationParamSet\n", + "\n", + "\n", + "volume.SegmentationParamSet\n", "\n", "\n", "\n", - "\n", + "\n", "\n", - "scan.Scan->volume.Volume\n", - "\n", + "volume.SegmentationParamSet->volume.SegmentationTask\n", + "\n", + "\n", + "\n", + "\n", + "volume_matching.VolumeMatch->volume_matching.VolumeMatch.Transformation\n", + "\n", + "\n", + "\n", + "\n", + "volume.SegmentationTask->volume.Segmentation\n", + "\n", "\n", "\n", - "\n", + "\n", "subject.Subject\n", - "\n", - "\n", - "subject.Subject\n", + "\n", + "\n", + "subject.Subject\n", "\n", "\n", "\n", @@ -330,50 +343,40 @@ "\n", "session.Session\n", "\n", - "\n", - "session.Session\n", + "\n", + "session.Session\n", "\n", "\n", "\n", "\n", - "\n", + "\n", "subject.Subject->session.Session\n", - "\n", + "\n", "\n", - "\n", - "\n", - "volume.SegmentationTask->volume.Segmentation\n", - "\n", - "\n", - "\n", + "\n", "\n", - "volume_matching.VolumeMatchTask\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask\n", + "scan.Scan\n", + "\n", + "\n", + "scan.Scan\n", "\n", "\n", "\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask->volume_matching.VolumeMatch\n", - "\n", - "\n", - "\n", + "\n", "\n", - "volume_matching.VolumeMatchTask->volume_matching.VolumeMatchTask.Volume\n", - "\n", + "scan.Scan->volume.Volume\n", + "\n", "\n", "\n", "\n", "session.Session->scan.Scan\n", - "\n", + "\n", "\n", "\n", "" ], "text/plain": [ - "" + "" ] }, "execution_count": 3, @@ -382,7 +385,14 @@ } ], "source": [ - "dj.Diagram(subject.Subject) + dj.Diagram(session.Session) + dj.Diagram(scan.Scan) + dj.Diagram(volume) + dj.Diagram(volume_matching) + dj.Diagram(bossdb)" + "(\n", + " dj.Diagram(subject.Subject)\n", + " + dj.Diagram(session.Session)\n", + " + dj.Diagram(scan.Scan)\n", + " + dj.Diagram(volume)\n", + " + dj.Diagram(volume_matching)\n", + " + dj.Diagram(bossdb)\n", + ")" ] }, { @@ -461,25 +471,17 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "invalid syntax (2346125880.py, line 6)", - "output_type": "error", - "traceback": [ - "\u001b[1;36m Cell \u001b[1;32mIn[4], line 6\u001b[1;36m\u001b[0m\n\u001b[1;33m subject_description=),\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" - ] - } - ], + "outputs": [], "source": [ "subject.Subject.insert1(\n", " dict(\n", " subject=\"subject1\",\n", " sex=\"M\",\n", " subject_birth_date=\"2023-01-01\",\n", - " subject_description=\"Cellpose segmentation of volumetric data.\"),\n", + " subject_description=\"Cellpose segmentation of volumetric data.\",\n", + " ),\n", " skip_duplicates=True,\n", ")\n", "subject.Subject()" @@ -535,12 +537,12 @@ "source": [ "session_key = dict(\n", " subject=\"subject1\",\n", - " session_id=1,\n", + " session_id=0,\n", ")\n", "session.Session.insert1(\n", " dict(\n", " session_key,\n", - " session_datetime=datetime.now(),\n", + " session_datetime=datetime.datetime.now(),\n", " ),\n", " skip_duplicates=True,\n", ")\n", @@ -583,10 +585,10 @@ "outputs": [], "source": [ "session.SessionDirectory.insert1(\n", - " dict(**session.Session.fetch1(\"KEY\"), session_dir=\"\"),\n", + " dict(session_key, session_dir=\"sub1\"),\n", " skip_duplicates=True,\n", ")\n", - "session.SessionDirectory()\n" + "session.SessionDirectory()" ] }, { @@ -594,7 +596,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Next, we'll use `describe` and `heading` for the Scan table." + "Each volume requires an entry in the `Scan` table from\n", + "`element-calcium-imaging`. Here, we'll use `describe` and `heading` for the Scan\n", + "table and insert an entry for the current session." ] }, { @@ -629,7 +633,7 @@ " ),\n", " skip_duplicates=True,\n", ")\n", - "scan_key = (scan.Scan & \"subject = 'sub1'\").fetch1(\"KEY\")" + "scan_key = (scan.Scan & \"subject = 'subject1'\").fetch1(\"KEY\")" ] }, { @@ -716,7 +720,7 @@ "metadata": {}, "outputs": [], "source": [ - "volume.SegmentationParamset.heading" + "volume.SegmentationParamSet.heading" ] }, { @@ -831,21 +835,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Finally, we can upload our data to BossDB. The `bossdb` schema contains three\n", - "tables to define the upload parameters, the upload tasks, and execute the\n", - "upload. The structure of these tables mirrors the `volume` schema. \n", + "Finally, we can upload our data to BossDB. The `bossdb` schema contains two\n", + "tables to the upload tasks, and execute the upload. The structure of these\n", + "tables mirrors the `volume` schema.\n", "\n", - "Let's begin by viewing the upload parameter table and inserting upload\n", - "parameters into it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bossdb.UploadParamSet.describe()" + "Volumetric data uploaded to BossDB requires information about voxel size. The\n", + "DataJoint table `volume.VoxelSize` can be used to insert this information for a\n", + "given dataset." ] }, { @@ -854,7 +850,7 @@ "metadata": {}, "outputs": [], "source": [ - "bossdb.UploadParamSet.heading" + "volume.VoxelSize.heading" ] }, { @@ -863,14 +859,7 @@ "metadata": {}, "outputs": [], "source": [ - "bossdb.UploadParamSet.insert_new_params(\n", - " paramset_idx=1,\n", - " paramset_desc=\"test params\",\n", - " params=dict(\n", - " voxel_units=\"micrometers\",\n", - " voxel_size=[1, 1, 1],\n", - " ),\n", - ")" + "volume.VoxelSize.insert1(dict(scan_key, width=0.001, height=0.001, depth=0.001))" ] }, { @@ -878,7 +867,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The next step is to define the upload task by naming the collection, experiment,\n", + "Now, we can define the upload task by naming the collection, experiment,\n", "and channel where the data should be uploaded. " ] }, @@ -907,16 +896,15 @@ "outputs": [], "source": [ "col_name = \"dataJointTestUpload\"\n", - "exp_name = \"CaImaging\"\n", - "chn_name = \"test10\"\n", - "bossdb.VolumeUploadTask.update1(\n", + "exp_name = \"CaImagingFinal\"\n", + "chn_name = \"test1-seg\"\n", + "bossdb.VolumeUploadTask.insert1(\n", " dict(\n", " scan_key,\n", - " paramset_idx=1,\n", " collection_name=col_name,\n", " experiment_name=exp_name,\n", " channel_name=chn_name,\n", - " upload_type=\"image\",\n", + " upload_type=\"annotation\",\n", " )\n", ")" ] @@ -927,9 +915,16 @@ "metadata": {}, "outputs": [], "source": [ - "upload_key = (bossdb.VolumeUploadTask & scan_key & \"channel_name = 'test10'\").fetch(\n", - " \"KEY\"\n", - ")" + "bossdb.VolumeUploadTask()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "upload_key = (bossdb.VolumeUploadTask & scan_key & \"upload_type = 'image'\").fetch(\"KEY\")" ] }, { @@ -941,6 +936,24 @@ "bossdb.BossDBURLs.populate(upload_key)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bossdb.BossDBURLs()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To visualize the volumetric data, import the neuroglancer URL and paste it into\n", + "your browser." + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/pyproject.toml b/pyproject.toml index 9cc021d..4298ca6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.pytest.ini_options] minversion = "6.0" -addopts = "--capture=tee-sys -p no:warnings --dj-teardown False --dj-verbose True --sw --cov=element_volume --cov-report term-missing" +addopts = "--capture=tee-sys -p no:warnings --dj-teardown False --dj-verbose True --sw --cov=element_zstack --cov-report term-missing" # Verbosity: -v for pytest more verbose # Warnings: -p no:warnings to disable # Stepwise: --sw to restart pytest at last failure point diff --git a/requirements.txt b/requirements.txt index 6519223..77ae707 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ element-animal element-lab element-session element-calcium-imaging -element-bossdb +element-zstack intern diff --git a/setup.py b/setup.py index 032205d..23a50c0 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import find_packages, setup -pkg_name = "workflow_volume" +pkg_name = "workflow_zstack" here = path.abspath(path.dirname(__file__)) long_description = """ @@ -16,14 +16,14 @@ exec(f.read()) setup( - name="workflow-volume", + name="workflow-zstack", version=__version__, # noqa: F821 - description="DataJoint Workflow for Element Volume", + description="DataJoint Workflow for Element ZStack", long_description=long_description, author="DataJoint", author_email="info@datajoint.com", license="MIT", - url="https://github.com/datajoint/workflow-volume", + url="https://github.com/datajoint/workflow-zstack", keywords="neuroscience volumetric BossDB datajoint", packages=find_packages(exclude=["contrib", "docs", "tests*"]), install_requires=requirements, diff --git a/tests/conftest.py b/tests/conftest.py index 0d95ea5..0b3e7ff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,25 +8,17 @@ import pytest from element_interface.utils import QuietStdOut, find_full_path, value_to_bool -from workflow_array_ephys.ingest import ingest_lab, ingest_sessions, ingest_subjects -from workflow_array_ephys.paths import get_ephys_root_data_dir +from workflow_zstack.paths import get_volume_root_data_dir # ------------------- SOME CONSTANTS ------------------- logger = logging.getLogger("datajoint") -pathlib.Path("./tests/user_data").mkdir(exist_ok=True) -pathlib.Path("./tests/user_data/lab").mkdir(exist_ok=True) +pathlib.Path("../example_data").mkdir(exist_ok=True) sessions_dirs = [ - "subject1/session1", - "subject2/session1", - "subject2/session2", - "subject3/session1", - "subject4/experiment1", - "subject5/session1", - "subject6/session1", + "sub1", ] @@ -102,17 +94,14 @@ def dj_config(setup): or dj.config["database.password"], "database.user": os.environ.get("DJ_USER") or dj.config["database.user"], "custom": { - "ephys_mode": ( - os.environ.get("EPHYS_MODE") or dj.config["custom"]["ephys_mode"] - ), "database.prefix": ( os.environ.get("DATABASE_PREFIX") or dj.config["custom"]["database.prefix"] ), - "ephys_root_data_dir": ( - os.environ.get("EPHYS_ROOT_DATA_DIR").split(",") - if os.environ.get("EPHYS_ROOT_DATA_DIR") - else dj.config["custom"]["ephys_root_data_dir"] + "volume_root_data_dir": ( + os.environ.get("VOLUME_ROOT_DATA_DIR").split(",") + if os.environ.get("VOLUME_ROOT_DATA_DIR") + else dj.config["custom"]["volume_root_data_dir"] ), }, } @@ -122,70 +111,28 @@ def dj_config(setup): @pytest.fixture(scope="session") def test_data(dj_config): - """If data does not exist or partial data is present, - attempt download with DJArchive to the first listed root directory""" test_data_exists = True for p in sessions_dirs: try: - find_full_path(get_ephys_root_data_dir(), p) + find_full_path(get_volume_root_data_dir, p).as_posix() except FileNotFoundError: - test_data_exists = False # If data not found + test_data_exists = False break - if not test_data_exists: # attempt to djArchive dowload - try: - dj.config["custom"].update( - { - "djarchive.client.endpoint": os.environ[ - "DJARCHIVE_CLIENT_ENDPOINT" - ], - "djarchive.client.bucket": os.environ["DJARCHIVE_CLIENT_BUCKET"], - "djarchive.client.access_key": os.environ[ - "DJARCHIVE_CLIENT_ACCESSKEY" - ], - "djarchive.client.secret_key": os.environ[ - "DJARCHIVE_CLIENT_SECRETKEY" - ], - } - ) - except KeyError as e: - raise FileNotFoundError( - "Full test data not available.\nAttempting to download from DJArchive," - + " but no credentials found in environment variables.\nError:" - + str(e) - ) - - import djarchive_client - - client = djarchive_client.client() - - test_data_dir = get_ephys_root_data_dir() - if isinstance(test_data_dir, list): # if multiple root dirs, first - test_data_dir = test_data_dir[0] - - client.download( - "workflow-array-ephys-benchmark", - "v2", - str(test_data_dir), - create_target=False, - ) - return - @pytest.fixture(autouse=True, scope="session") def pipeline(): - from workflow_array_ephys import pipeline + from workflow_zstack import pipeline yield { "subject": pipeline.subject, "lab": pipeline.lab, - "ephys": pipeline.ephys, - "probe": pipeline.probe, - "ephys_report": pipeline.ephys_report, "session": pipeline.session, - "get_ephys_root_data_dir": pipeline.get_ephys_root_data_dir, - "ephys_mode": pipeline.ephys_mode, + "scan": pipeline.scan, + "volume": pipeline.volume, + "volume_matching": pipeline.volume_matching, + "bossdb": pipeline.bossdb, } if _tear_down: @@ -193,304 +140,61 @@ def pipeline(): pipeline.subject.Subject.delete() -@pytest.fixture(scope="session") -def ingest_data(setup, pipeline, test_data): - """For each input, generates csv in test_user_data_dir and ingests in schema""" - # CSV as list of 3: filename, relevant tables, content - all_csvs = { - "lab/labs.csv": { - "func": null_function, - "args": {}, - "content": [ - "lab,lab_name,organization,org_name,address," - + "time_zone,location,location_description", - "LabA,The Example Lab,Uni1,Example Uni,'221B Baker St,London NW1 6XE,UK'," - + "UTC+0,Example Building,'2nd floor lab dedicated to all fictional experiments.'", - "LabB,The Other Lab,Uni2,Other Uni,'Oxford OX1 2JD, United Kingdom'," - + "UTC+0,Other Building,'fictional campus dedicated to imaginaryexperiments.'", - ], - }, - "lab/projects.csv": { - "func": null_function, - "args": {}, - "content": [ - "project,project_description,project_title,project_start_date," - + "repository_url,repository_name,codeurl", - "ProjA,Example project to populate element-lab," - + "Example project to populate element-lab,2020-01-01," - + "https://github.com/datajoint/element-lab/," - + "element-lab,https://github.com/datajoint/element" - + "-lab/tree/main/element_lab", - "ProjB,Other example project to populate element-lab," - + "Other example project to populate element-lab,2020-01-02," - + "https://github.com/datajoint/element-session/," - + "element-session,https://github.com/datajoint/" - + "element-session/tree/main/element_session", - ], - }, - "lab/project_users.csv": { - "func": null_function, - "args": {}, - "content": [ - "user,project", - "Sherlock,ProjA", - "Sherlock,ProjB", - "Watson,ProjB", - "Dr. Candace Pert,ProjA", - "User1,ProjA", - ], - }, - "lab/publications.csv": { - "func": null_function, - "args": {}, - "content": [ - "project,publication", - "ProjA,arXiv:1807.11104", - "ProjA,arXiv:1807.11104v1", - ], - }, - "lab/keywords.csv": { - "func": null_function, - "args": {}, - "content": [ - "project,keyword", - "ProjA,Study", - "ProjA,Example", - "ProjB,Alternate", - ], - }, - "lab/protocols.csv": { - "func": null_function, - "args": {}, - "content": [ - "protocol,protocol_type,protocol_description", - "ProtA,IRB expedited review,Protocol for managing " + "data ingestion", - "ProtB,Alternative Method,Limited protocol for " + "piloting only", - ], - }, - "lab/users.csv": { - "func": ingest_lab, - "args": { - "lab_csv_path": f"{test_user_data_dir}/lab/labs.csv", - "project_csv_path": f"{test_user_data_dir}/lab/projects.csv", - "publication_csv_path": f"{test_user_data_dir}/lab/publications.csv", - "keyword_csv_path": f"{test_user_data_dir}/lab/keywords.csv", - "protocol_csv_path": f"{test_user_data_dir}/lab/protocols.csv", - "users_csv_path": f"{test_user_data_dir}/lab/users.csv", - "project_user_csv_path": f"{test_user_data_dir}/lab/project_users.csv", - }, - "content": [ - "lab,user,user_role,user_email,user_cellphone", - "LabA,Sherlock,PI,Sherlock@BakerSt.com," + "+44 20 7946 0344", - "LabA,Watson,Dr,DrWatson@BakerSt.com,+44 73 8389 1763", - "LabB,Dr. Candace Pert,PI,Pert@gmail.com," + "+44 74 4046 5899", - "LabA,User1,Lab Tech,fake@email.com,+44 1632 960103", - "LabB,User2,Lab Tech,fake2@email.com,+44 1632 960102", - ], - }, - "subjects.csv": { - "func": ingest_subjects, - "args": {"subject_csv_path": f"{test_user_data_dir}/subjects.csv"}, - "content": [ - "subject,sex,subject_birth_date,subject_description", - "subject1,F,2020-01-01 00:00:01,dl56", - "subject2,M,2020-01-01 00:00:01,SC035", - "subject3,M,2020-01-01 00:00:01,SC038", - "subject4,M,2020-01-01 00:00:01,oe_talab", - "subject5,F,2020-01-01 00:00:01,rich", - "subject6,F,2020-01-01 00:00:01,manuel", - ], - }, - "sessions.csv": { - "func": ingest_sessions, - "args": {"session_csv_path": f"{test_user_data_dir}/sessions.csv"}, - "content": [ - "subject,session_dir,session_note,user", - f"subject1,{sessions_dirs[0]},Data collection notes,User2", - f"subject2,{sessions_dirs[1]},Data collection notes,User2", - f"subject2,{sessions_dirs[2]},Interrupted session,User2", - f"subject3,{sessions_dirs[3]},Data collection notes,User1", - f"subject4,{sessions_dirs[4]},Successful data collection,User2", - f"subject5,{sessions_dirs[5]},Successful data collection,User1", - f"subject6,{sessions_dirs[6]},Ambient temp abnormally low,User2", - ], - }, - } - # If data in last table, presume didn't tear down last time, skip insert - if len(pipeline["ephys"].Clustering()) == 0: - for csv_filename, csv_dict in all_csvs.items(): - csv_path = test_user_data_dir / csv_filename # add prefix for rel path - Path(csv_path).write_text("\n".join(csv_dict["content"]) + "\n") - csv_dict["func"](verbose=verbose, skip_duplicates=True, **csv_dict["args"]) - - yield all_csvs - - if _tear_down: - with verbose_context: - for csv in all_csvs: - csv_path = test_user_data_dir / csv - csv_path.unlink() - - @pytest.fixture(scope="session") def testdata_paths(): - """Paths for test data 'subjectX/sessionY/probeZ/etc'""" - return { - "npx3A-p1-ks": "subject5/session1/probe_1/ks2.1_01", - "npx3A-p2-ks": "subject5/session1/probe_2/ks2.1_01", - "oe_npx3B-ks": "subject4/experiment1/recording1/continuous/" - + "Neuropix-PXI-100.0/ks", - "sglx_npx3A-p1": "subject5/session1/probe_1", - "oe_npx3B": "subject4/experiment1/recording1/continuous/" - + "Neuropix-PXI-100.0", - "sglx_npx3B-p1": "subject6/session1/towersTask_g0_imec0", - "npx3B-p1-ks": "subject6/session1/towersTask_g0_imec0", - } - + return {"test1_stitched": "sub1"} @pytest.fixture(scope="session") -def ephys_insertionlocation(pipeline, ingest_data): - """Insert probe location into ephys.InsertionLocation""" - ephys = pipeline["ephys"] - - for probe_insertion_key in ephys.ProbeInsertion.fetch("KEY"): - ephys.InsertionLocation.insert1( - dict( - **probe_insertion_key, - skull_reference="Bregma", - ap_location=0, - ml_location=0, - depth=0, - theta=0, - phi=0, - beta=0, - ), - skip_duplicates=True, - ) - yield +def insert_upstream(pipeline): + import datetime - if _tear_down: - with verbose_context: - ephys.InsertionLocation.delete() + subject = pipeline["subject"] + session = pipeline["session"] + scan = pipeline["scan"] -@pytest.fixture(scope="session") -def kilosort_paramset(pipeline): - """Insert kilosort parameters into ephys.ClusteringParamset""" - ephys = pipeline["ephys"] - - params_ks = { - "fs": 30000, - "fshigh": 150, - "minfr_goodchannels": 0.1, - "Th": [10, 4], - "lam": 10, - "AUCsplit": 0.9, - "minFR": 0.02, - "momentum": [20, 400], - "sigmaMask": 30, - "ThPr": 8, - "spkTh": -6, - "reorder": 1, - "nskip": 25, - "GPU": 1, - "Nfilt": 1024, - "nfilt_factor": 4, - "ntbuff": 64, - "whiteningRange": 32, - "nSkipCov": 25, - "scaleproc": 200, - "nPCs": 3, - "useRAM": 0, - } - - # Insert here, since most of the test will require this paramset inserted - ephys.ClusteringParamSet.insert_new_params( - clustering_method="kilosort2.5", - paramset_desc="Spike sorting using Kilosort2.5", - params=params_ks, - paramset_idx=0, + subject.Subject.insert1( + dict( + subject="subject1", + sex="M", + subject_birth_date="2023-01-01", + subject_description="Cellpose segmentation of volumetric data."), + skip_duplicates=True, ) - yield params_ks - - if _tear_down: - with verbose_context: - (ephys.ClusteringParamSet & "paramset_idx = 0").delete() - - -@pytest.fixture(scope="session") -def ephys_recordings(pipeline, ingest_data): - """Populate ephys.EphysRecording""" - ephys = pipeline["ephys"] - - ephys.EphysRecording.populate() - - yield - - if _tear_down: - with verbose_context: - ephys.EphysRecording.delete() - + session_key = dict( + subject="subject1", + session_id=0, + ) + session.Session.insert1( + dict( + session_key, + session_datetime=datetime.datetime.now(), + ), + skip_duplicates=True, + ) -@pytest.fixture(scope="session") -def clustering_tasks(pipeline, kilosort_paramset, ephys_recordings): - """Insert keys from ephys.EphysRecording into ephys.Clustering""" - ephys = pipeline["ephys"] - - for ephys_rec_key in (ephys.EphysRecording - ephys.ClusteringTask).fetch("KEY"): - ephys_file_path = pathlib.Path( - ((ephys.EphysRecording.EphysFile & ephys_rec_key).fetch("file_path"))[0] - ) - ephys_file = find_full_path(get_ephys_root_data_dir(), ephys_file_path) - recording_dir = ephys_file.parent - kilosort_dir = next(recording_dir.rglob("spike_times.npy")).parent - ephys.ClusteringTask.insert1( - { - **ephys_rec_key, - "paramset_idx": 0, - "task_mode": "load", - "clustering_output_dir": kilosort_dir.as_posix(), - }, - skip_duplicates=True, - ) + session.SessionDirectory.insert1( + dict(session_key, session_dir="sub1"), + skip_duplicates=True, + ) + scan.Scan.insert1( + dict( + session_key, + scan_id=0, + acq_software="ScanImage", + ), + skip_duplicates=True, + ) yield - if _tear_down: - with verbose_context: - ephys.ClusteringTask.delete() - @pytest.fixture(scope="session") -def clustering(clustering_tasks, pipeline): - """Populate ephys.Clustering""" - ephys = pipeline["ephys"] +def volume_volume(pipeline): + volume = pipeline["volume"] - ephys.Clustering.populate() + volume.Volume.populate() yield - - if _tear_down: - with verbose_context: - ephys.Clustering.delete() - - -@pytest.fixture(scope="session") -def curations(clustering, pipeline): - """Insert keys from ephys.ClusteringTask into ephys.Curation""" - ephys_mode = pipeline["ephys_mode"] - - if ephys_mode == "no-curation": - yield - else: - ephys = pipeline["ephys"] - - for key in (ephys.ClusteringTask - ephys.Curation).fetch("KEY"): - ephys.Curation().create1_from_clustering_task(key) - - yield - - if _tear_down: - with verbose_context: - ephys.Curation.delete() + \ No newline at end of file diff --git a/tests/test_export.py b/tests/test_export.py index 6328eab..3112180 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -1,150 +1,20 @@ -import datetime -import time - -from element_interface.utils import find_full_path, find_root_directory -from pynwb.ecephys import ElectricalSeries - -from workflow_array_ephys.export import ( - ecephys_session_to_nwb, - session_to_nwb, - write_nwb, -) - - -def test_session_to_nwb(setup, pipeline, ingest_data): - verbose_context, _ = setup - - with verbose_context: - nwbfile = session_to_nwb( - **{ - "session_key": { - "subject": "subject5", - "session_datetime": datetime.datetime(2018, 7, 3, 20, 32, 28), - }, - "lab_key": {"lab": "LabA"}, - "protocol_key": {"protocol": "ProtA"}, - "project_key": {"project": "ProjA"}, - } - ) - - assert nwbfile.session_id == "subject5_2018-07-03T20:32:28" - assert nwbfile.session_description == "Successful data collection" - # when saved in NWB, converts local to UTC - assert nwbfile.session_start_time == datetime.datetime( - 2018, 7, 3, 20, 32, 28 - ).astimezone(datetime.timezone.utc) - assert nwbfile.experimenter == ["User1"] - - assert nwbfile.subject.subject_id == "subject5" - assert nwbfile.subject.sex == "F" - - assert nwbfile.institution == "Example Uni" - assert nwbfile.lab == "The Example Lab" - - assert nwbfile.protocol == "ProtA" - assert nwbfile.notes == "Protocol for managing data ingestion" - - assert nwbfile.experiment_description == "Example project to populate element-lab" - - -def test_write_to_nwb( - setup, - pipeline, - ingest_data, - ephys_insertionlocation, - kilosort_paramset, - ephys_recordings, - clustering_tasks, - clustering, - curations, -): - verbose_context, verbose = setup - ephys = pipeline["ephys"] - - session_key = dict(subject="subject5", session_datetime="2018-07-03 20:32:28") - - ephys.LFP.populate(session_key, display_progress=verbose) - ephys.CuratedClustering.populate(session_key, display_progress=verbose) - ephys.WaveformSet.populate(session_key, display_progress=verbose) - - ecephys_kwargs = { - "session_key": session_key, - "raw": True, - "spikes": True, - "lfp": "dj", - "end_frame": 250, - } - - with verbose_context: - nwbfile = ecephys_session_to_nwb(**ecephys_kwargs) - - root_dirs = pipeline["get_ephys_root_data_dir"]() - root_dir = find_root_directory( - root_dirs, - find_full_path( - root_dirs, - (pipeline["session"].SessionDirectory & session_key).fetch1("session_dir"), - ), +def test_export(pipeline): + scan = pipeline["scan"] + bossdb = pipeline["bossdb"] + + scan_key = (scan.Scan & "subject = 'subject1'").fetch1("KEY") + col_name = "dataJointTestUpload" + exp_name = "CaImagingFinal" + chn_name = "test1" + + bossdb.VolumeUploadTask.insert1( + dict( + scan_key, + collection_name=col_name, + experiment_name=exp_name, + channel_name=chn_name, + upload_type="image", + ), skip_duplicates=True ) - write_nwb(nwbfile, root_dir / time.strftime("_test_%Y%m%d-%H%M%S.nwb")) - - -def test_convert_to_nwb( - setup, - pipeline, - ingest_data, - ephys_insertionlocation, - kilosort_paramset, - ephys_recordings, - clustering_tasks, - clustering, - curations, -): - verbose_context, verbose = setup - ephys = pipeline["ephys"] - - session_key = dict(subject="subject5", session_datetime="2018-07-03 20:32:28") - - ephys.CuratedClustering.populate(session_key, display_progress=verbose) - ephys.WaveformSet.populate(session_key, display_progress=verbose) - - ecephys_kwargs = { - "session_key": session_key, - "end_frame": 250, - "spikes": True, - "lab_key": {"lab": "LabA"}, - "protocol_key": {"protocol": "ProtA"}, - "project_key": {"project": "ProjA"}, - } - - with verbose_context: - nwbfile = ecephys_session_to_nwb(**ecephys_kwargs) - - for x in ("262716621", "714000838"): - assert x in nwbfile.devices - - assert len(nwbfile.electrodes) == 1920 - for col in ("shank", "shank_row", "shank_col"): - assert col in nwbfile.electrodes - - for es_name in ("ElectricalSeries1", "ElectricalSeries2"): - es = nwbfile.acquisition[es_name] - assert isinstance(es, ElectricalSeries) - assert es.conversion == 2.34375e-06 - - # make sure the ElectricalSeries objects don't share electrodes - assert not set(nwbfile.acquisition["ElectricalSeries1"].electrodes.data) & set( - nwbfile.acquisition["ElectricalSeries2"].electrodes.data - ) - - assert len(nwbfile.units) == 499 - - for col in ("cluster_quality_label", "spike_depths"): - assert col in nwbfile.units - - for es_name in ("ElectricalSeries1", "ElectricalSeries2"): - es = nwbfile.processing["ecephys"].data_interfaces["LFP"][es_name] - assert isinstance(es, ElectricalSeries) - assert es.conversion == 4.6875e-06 - assert es.rate == 2500.0 + bossdb.BossDBURLs.populate(scan_key) \ No newline at end of file diff --git a/tests/test_ingest.py b/tests/test_ingest.py deleted file mode 100644 index 52b133d..0000000 --- a/tests/test_ingest.py +++ /dev/null @@ -1,109 +0,0 @@ -import os -import pathlib -import sys - -from element_interface.utils import find_full_path, find_root_directory - -docker_root = "/main/test_data/workflow_ephys_data1" - - -def test_ingest_subjects(pipeline, ingest_data): - """Check number of subjects inserted into the `subject.Subject` table""" - subject = pipeline["subject"] - assert len(subject.Subject()) == 6 - - -def test_ingest_sessions(pipeline, ingest_data): - ephys = pipeline["ephys"] - probe = pipeline["probe"] - session = pipeline["session"] - - assert len(session.Session()) == 7 - assert len(probe.Probe()) == 9 - assert len(ephys.ProbeInsertion()) == 13 - - session_info = ingest_data["sessions.csv"]["content"][1].split(",") - - assert (session.SessionDirectory & {"subject": session_info[0]}).fetch1( - "session_dir" - ) == session_info[1] - - -def test_find_valid_full_path(pipeline, ingest_data): - - if not os.environ.get("IS_DOCKER", False): - return # It doesn't make sense to assert the root testing locally - - get_ephys_root_data_dir = pipeline["get_ephys_root_data_dir"] - ephys_root_data_dir = ( - [get_ephys_root_data_dir()] - if not isinstance(get_ephys_root_data_dir(), list) - else get_ephys_root_data_dir() - ) - - # add more options for root directories - if sys.platform == "win32": # win32 even if Windows 64-bit - ephys_root_data_dir = ephys_root_data_dir + ["J:/", "M:/"] - else: - ephys_root_data_dir = ephys_root_data_dir + ["mnt/j", "mnt/m"] - - # test: providing relative-path: correctly search for the full-path - session_info = ingest_data["sessions.csv"]["content"][1].split(",") - - session_full_path = find_full_path(ephys_root_data_dir, session_info[1]) - - full_path = pathlib.Path(docker_root, "subject1/session1") - - assert full_path == session_full_path, str( - "Session path does not match docker root:" - + f"\n\t{full_path}\n\t{session_full_path}" - ) - - -def test_find_root_directory(pipeline, ingest_data): - """ - Test that `find_root_directory` works correctly. - """ - - get_ephys_root_data_dir = pipeline["get_ephys_root_data_dir"] - ephys_root_data_dir = ( - [get_ephys_root_data_dir()] - if not isinstance(get_ephys_root_data_dir(), list) - else get_ephys_root_data_dir() - ) - # add more options for root directories - if sys.platform == "win32": - ephys_root_data_dir = ephys_root_data_dir + ["J:/", "M:/"] - else: - ephys_root_data_dir = ephys_root_data_dir + ["mnt/j", "mnt/m"] - - ephys_root_data_dir = [pathlib.Path(p) for p in ephys_root_data_dir] - - # test: providing full-path: correctly search for the root_dir - session_info = ingest_data["sessions.csv"]["content"][1].split(",") - - if os.environ.get("IS_DOCKER", False): - session_full_path = pathlib.Path(docker_root, session_info[1]) - root_dir = find_root_directory(ephys_root_data_dir, session_full_path) - assert ( - root_dir.as_posix() == docker_root - ), f"Root path does not match: {docker_root}" - else: - session_full_path = find_full_path(get_ephys_root_data_dir(), session_info[1]) - root_dir = find_root_directory(ephys_root_data_dir, session_full_path) - assert root_dir in ephys_root_data_dir, "Problems finding root dir" - - -def test_paramset_insert(kilosort_paramset, pipeline): - ephys = pipeline["ephys"] - from element_interface.utils import dict_to_uuid - - method, desc, paramset_hash = ( - ephys.ClusteringParamSet & {"paramset_idx": 0} - ).fetch1("clustering_method", "paramset_desc", "param_set_hash") - assert method == "kilosort2.5" - assert desc == "Spike sorting using Kilosort2.5" - assert ( - dict_to_uuid({**kilosort_paramset, "clustering_method": method}) - == paramset_hash - ) diff --git a/tests/test_pipeline_generation.py b/tests/test_pipeline_generation.py index 9311a20..df584dc 100644 --- a/tests/test_pipeline_generation.py +++ b/tests/test_pipeline_generation.py @@ -1,26 +1,26 @@ def test_generate_pipeline(pipeline): subject = pipeline["subject"] session = pipeline["session"] - ephys = pipeline["ephys"] - probe = pipeline["probe"] - ephys_report = pipeline["ephys_report"] + scan = pipeline["scan"] + volume = pipeline["volume"] + volume_matching = pipeline["volume_matching"] + bossdb = pipeline["bossdb"] # test elements connection from lab, subject to Session assert subject.Subject.full_table_name in session.Session.parents() # test elements connection from Session to probe, ephys, ephys_report - assert session.Session.full_table_name in ephys.ProbeInsertion.parents() - assert probe.Probe.full_table_name in ephys.ProbeInsertion.parents() - assert "spike_times" in (ephys.CuratedClustering.Unit.heading.secondary_attributes) + assert session.Session.full_table_name in scan.Scan.parents() + assert scan.Scan.full_table_name in volume.Volume.parents() + assert "mask_npix" in (volume.Segmentation.Mask.heading.secondary_attributes) assert all( [ - ephys.CuratedClustering.full_table_name - in ephys_report.ProbeLevelReport.parents(), - ephys.CuratedClustering.Unit.full_table_name - in ephys_report.UnitLevelReport.parents(), + bossdb.VolumeUploadTask.full_table_name in bossdb.BossDBURLs.parents(), + volume.Volume.full_table_name in bossdb.VolumeUploadTask.parents(), ] ) - - # test the connection between quality metric tables - assert ephys.QualityMetrics.full_table_name in ephys_report.QualityMetricSet.parents() \ No newline at end of file + + assert "confidence" in ( + volume_matching.VolumeMatch.VolumeMask.heading.secondary_attributes + ) diff --git a/tests/test_populate.py b/tests/test_populate.py deleted file mode 100644 index 40dea92..0000000 --- a/tests/test_populate.py +++ /dev/null @@ -1,348 +0,0 @@ -import json - -import numpy as np -import pandas as pd - - -def test_ephys_recording_populate(pipeline, ephys_recordings): - ephys = pipeline["ephys"] - assert len(ephys.EphysRecording()) == 13 - - -def test_LFP_populate_npx3B_OpenEphys(testdata_paths, pipeline, ephys_recordings): - """ - Populate ephys.LFP with OpenEphys items, - recording Neuropixels Phase 3B (Neuropixels 1.0) probe - """ - ephys = pipeline["ephys"] - rel_path = testdata_paths["oe_npx3B"] - rec_key = ( - ephys.EphysRecording - & (ephys.EphysRecording.EphysFile & f'file_path LIKE "%{rel_path}"') - ).fetch1("KEY") - ephys.LFP.populate(rec_key) - - lfp_mean = (ephys.LFP & rec_key).fetch1("lfp_mean") - assert len(lfp_mean) == 520054 - - electrodes = (ephys.LFP.Electrode & rec_key).fetch("electrode") - assert np.array_equal( - electrodes, - np.array( - [ - 5, - 14, - 23, - 32, - 41, - 50, - 59, - 68, - 77, - 86, - 95, - 104, - 113, - 122, - 131, - 140, - 149, - 158, - 167, - 176, - 185, - 194, - 203, - 212, - 221, - 230, - 239, - 248, - 257, - 266, - 275, - 284, - 293, - 302, - 311, - 320, - 329, - 338, - 347, - 356, - 365, - 374, - 383, - ] - ), - ) - - -def test_LFP_populate_npx3A_SpikeGLX(testdata_paths, pipeline, ephys_recordings): - """Populate ephys.LFP with SpikeGLX items, recording Neuropixels Phase 3A probe""" - ephys = pipeline["ephys"] - - rel_path = testdata_paths["sglx_npx3A-p1"] - rec_key = ( - ephys.EphysRecording - & (ephys.EphysRecording.EphysFile & f'file_path LIKE "%{rel_path}%"') - ).fetch1("KEY") - ephys.LFP.populate(rec_key) - - lfp_mean = (ephys.LFP & rec_key).fetch1("lfp_mean") - assert len(lfp_mean) == 846666 - - electrodes = (ephys.LFP.Electrode & rec_key).fetch("electrode") - assert np.array_equal( - electrodes, - np.array( - [ - 5, - 14, - 23, - 32, - 41, - 50, - 59, - 68, - 77, - 86, - 95, - 104, - 113, - 122, - 131, - 140, - 149, - 158, - 167, - 176, - 185, - 194, - 203, - 212, - 221, - 230, - 239, - 248, - 257, - 266, - 275, - 284, - 293, - 302, - 311, - 320, - 329, - 338, - 347, - 356, - 365, - 374, - 383, - ] - ), - ) - - -def test_LFP_populate_npx3B_SpikeGLX(testdata_paths, pipeline, ephys_recordings): - """ - Populate ephys.LFP with SpikeGLX items, - recording Neuropixels Phase 3B (Neuropixels 1.0) probe - """ - - ephys = pipeline["ephys"] - - rel_path = testdata_paths["sglx_npx3B-p1"] - rec_key = ( - ephys.EphysRecording - & (ephys.EphysRecording.EphysFile & f'file_path LIKE "%{rel_path}%"') - ).fetch1("KEY") - ephys.LFP.populate(rec_key) - - lfp_mean = (ephys.LFP & rec_key).fetch1("lfp_mean") - assert len(lfp_mean) == 4769946 - - electrodes = (ephys.LFP.Electrode & rec_key).fetch("electrode") - assert np.array_equal( - electrodes, - np.array( - [ - 5, - 14, - 23, - 32, - 41, - 50, - 59, - 68, - 77, - 86, - 95, - 104, - 113, - 122, - 131, - 140, - 149, - 158, - 167, - 176, - 185, - 194, - 203, - 212, - 221, - 230, - 239, - 248, - 257, - 266, - 275, - 284, - 293, - 302, - 311, - 320, - 329, - 338, - 347, - 356, - 365, - 374, - 383, - ] - ), - ) - - -def test_clustering_populate(clustering, pipeline): - ephys = pipeline["ephys"] - assert len(ephys.Clustering()) == 13 - - -def test_curated_clustering_populate(curations, pipeline, testdata_paths): - """Populate ephys.CuratedClustering with multiple recordings""" - ephys = pipeline["ephys"] - - rel_path = testdata_paths["npx3A-p1-ks"] - curation_key = _get_curation_key(rel_path, pipeline) - ephys.CuratedClustering.populate(curation_key) - assert ( - len( - ephys.CuratedClustering.Unit - & curation_key - & 'cluster_quality_label = "good"' - ) - == 76 - ) - - rel_path = testdata_paths["oe_npx3B-ks"] - curation_key = _get_curation_key(rel_path, pipeline) - ephys.CuratedClustering.populate(curation_key) - assert ( - len( - ephys.CuratedClustering.Unit - & curation_key - & 'cluster_quality_label = "good"' - ) - == 68 - ) - - rel_path = testdata_paths["npx3B-p1-ks"] - curation_key = _get_curation_key(rel_path, pipeline) - ephys.CuratedClustering.populate(curation_key) - assert ( - len( - ephys.CuratedClustering.Unit - & curation_key - & 'cluster_quality_label = "good"' - ) - == 55 - ) - - -def test_waveform_populate_npx3B_OpenEphys(curations, pipeline, testdata_paths): - """ - Populate ephys.WaveformSet with OpenEphys - Neuropixels Phase 3B (Neuropixels 1.0) probe - """ - ephys = pipeline["ephys"] - rel_path = testdata_paths["oe_npx3B-ks"] - curation_key = _get_curation_key(rel_path, pipeline) - ephys.CuratedClustering.populate(curation_key) - ephys.WaveformSet.populate(curation_key) - - waveforms = np.vstack( - (ephys.WaveformSet.PeakWaveform & curation_key).fetch("peak_electrode_waveform") - ) - - assert waveforms.shape == (204, 64) - - -def test_waveform_populate_npx3B_SpikeGLX(curations, pipeline, testdata_paths): - """ - Populate ephys.WaveformSet with SpikeGLX - Neuropixels Phase 3B (Neuropixels 1.0) probe - """ - - ephys = pipeline["ephys"] - - rel_path = testdata_paths["npx3B-p1-ks"] - curation_key = _get_curation_key(rel_path, pipeline) - ephys.CuratedClustering.populate(curation_key) - ephys.WaveformSet.populate(curation_key) - - waveforms = np.vstack( - (ephys.WaveformSet.PeakWaveform & curation_key).fetch("peak_electrode_waveform") - ) - - assert waveforms.shape == (150, 64) - - -def test_build_electrode_layouts(pipeline): - """ - Test build_electrode_layouts function in probe.py - """ - - # Load probe configuration - f = open("user_data/neuropixels_probes_config.json") - probe_configs = json.load(f) - # Load ground truth table for each probe type - truth_df = pd.read_csv("user_data/probe_type_electrode.csv") - - probe = pipeline["probe"] - - for probe_type, config in probe_configs.items(): - - test_df = pd.DataFrame(probe.build_electrode_layouts(probe_type, **config)) - - test_arr = np.array(test_df.drop(columns=["probe_type"]), dtype=np.int16) - truth_arr = np.array( - truth_df.loc[truth_df["probe_type"] == probe_type].drop( - columns=["probe_type"] - ), - dtype=np.int16, - ) - assert np.array_equal( - test_arr, truth_arr - ), f"probe type '{probe_type}' electrode layout does not match" - - -# ---- HELPER FUNCTIONS ---- - - -def _get_curation_key(output_relative_path, pipeline): - ephys = pipeline["ephys"] - ephys_mode = pipeline["ephys_mode"] - - if ephys_mode == "no-curation": - EphysCuration = ephys.ClusteringTask - output_dir_attr_name = "clustering_output_dir" - else: - EphysCuration = ephys.Curation - output_dir_attr_name = "curation_output_dir" - - return ( - EphysCuration & f'{output_dir_attr_name} LIKE "%{output_relative_path}"' - ).fetch1("KEY") diff --git a/workflow_zstack/paths.py b/workflow_zstack/paths.py index ba8c52a..40cab54 100644 --- a/workflow_zstack/paths.py +++ b/workflow_zstack/paths.py @@ -37,7 +37,7 @@ def get_volume_root_data_dir() -> List[str]: Returns: path (any): List of path(s) if available or None """ - vol_root_dirs = dj.config.get("custom", {}).get("vol_root_data_dir", None) + vol_root_dirs = dj.config.get("custom", {}).get("volume_root_data_dir", None) if not vol_root_dirs: return None elif not isinstance(vol_root_dirs, Sequence): diff --git a/workflow_zstack/pipeline.py b/workflow_zstack/pipeline.py index ba153a0..db2dce1 100644 --- a/workflow_zstack/pipeline.py +++ b/workflow_zstack/pipeline.py @@ -5,8 +5,7 @@ from element_animal.subject import Subject from element_session import session_with_id as session from element_calcium_imaging import imaging, imaging_report, scan -from element_zstack import volume, volume_matching -from element_zstack.export import bossdb +from element_zstack import volume, volume_matching, bossdb from . import db_prefix from .paths import get_session_directory, get_volume_root_data_dir, get_volume_tif_file From 33093b75634d30f17ac6993f1dd50a6628309960 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Thu, 27 Apr 2023 11:35:27 -0500 Subject: [PATCH 07/62] Add CICD, update CHANGELOG + version --- .../anchored_u24_workflow_before_release.yaml | 29 ++++++++++ .../anchored_u24_workflow_release_call.yaml | 46 +++++++++++++++ .../anchored_u24_workflow_tag_to_release.yaml | 26 +++++++++ .github/.staging_workflows/normalize.sh | 14 +++++ .github/.test/.secrets | 4 ++ .github/.test/README.md | 3 + .github/ISSUE_TEMPLATE/bug_report.md | 39 +++++++++++++ .github/ISSUE_TEMPLATE/config.yml | 5 ++ .github/ISSUE_TEMPLATE/feature_request.md | 57 +++++++++++++++++++ .github/make-dev.sh | 2 + .github/make-prod.sh | 2 + .github/make-test.sh | 2 + .github/run-act.sh | 7 +++ .../u24_workflow_before_release.yaml | 18 ++++++ .../workflows/u24_workflow_release_call.yaml | 20 +++++++ .../u24_workflow_tag_to_release.yaml | 15 +++++ CHANGELOG.md | 6 +- workflow_zstack/version.py | 2 +- 18 files changed, 293 insertions(+), 4 deletions(-) create mode 100644 .github/.staging_workflows/anchored_u24_workflow_before_release.yaml create mode 100644 .github/.staging_workflows/anchored_u24_workflow_release_call.yaml create mode 100644 .github/.staging_workflows/anchored_u24_workflow_tag_to_release.yaml create mode 100644 .github/.staging_workflows/normalize.sh create mode 100644 .github/.test/.secrets create mode 100644 .github/.test/README.md create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/config.yml create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md create mode 100644 .github/make-dev.sh create mode 100644 .github/make-prod.sh create mode 100644 .github/make-test.sh create mode 100644 .github/run-act.sh create mode 100644 .github/workflows/u24_workflow_before_release.yaml create mode 100644 .github/workflows/u24_workflow_release_call.yaml create mode 100644 .github/workflows/u24_workflow_tag_to_release.yaml diff --git a/.github/.staging_workflows/anchored_u24_workflow_before_release.yaml b/.github/.staging_workflows/anchored_u24_workflow_before_release.yaml new file mode 100644 index 0000000..76ea65f --- /dev/null +++ b/.github/.staging_workflows/anchored_u24_workflow_before_release.yaml @@ -0,0 +1,29 @@ +name: u24_workflow_before_release_0.0.1 +on: + pull_request: + push: + branches: + - '**' + tags-ignore: + - '**' + workflow_dispatch: + +anchor-dev-build-call: &dev-build-call + uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main + +anchor-test-build-call: &test-build-call + uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main + +anchor-prod-build-call: &prod-build-call + uses: dj-sciops/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main + +jobs: + call_context_check: + uses: dj-sciops/djsciops-cicd/.github/workflows/context_check.yaml@main + + call_u24_workflow_build_debian: + !!merge <<: *$STAGE-build-call + with: + jhub_ver: 1.4.2 + py_ver: 3.9 + dist: debian \ No newline at end of file diff --git a/.github/.staging_workflows/anchored_u24_workflow_release_call.yaml b/.github/.staging_workflows/anchored_u24_workflow_release_call.yaml new file mode 100644 index 0000000..d9fc757 --- /dev/null +++ b/.github/.staging_workflows/anchored_u24_workflow_release_call.yaml @@ -0,0 +1,46 @@ +name: u24_workflow_release_call_0.0.1 + +on: + workflow_run: + workflows: ["u24_workflow_tag_to_release_0.0.1"] + types: + - completed + +anchor-dev-release-call: &dev-release-call + uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_release.yaml@main + +anchor-test-release-call: &test-release-call + uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_release.yaml@main + +anchor-prod-release-call: &prod-release-call + uses: dj-sciops/djsciops-cicd/.github/workflows/u24_workflow_release.yaml@main + +anchor-dev-release-if: &dev-release-if + if: >- + github.event.workflow_run.conclusion == 'success' && + github.repository_owner == 'yambottle' + +anchor-test-release-if: &test-release-if + if: >- + github.event.workflow_run.conclusion == 'success' && + github.repository_owner == 'yambottle' + +anchor-prod-release-if: &prod-release-if + if: >- + github.event.workflow_run.conclusion == 'success' && + github.repository_owner == 'datajoint' + +jobs: + call_context_check: + uses: dj-sciops/djsciops-cicd/.github/workflows/context_check.yaml@main + + call_u24_workflow_release_debian: + !!merge <<: *$STAGE-release-if + !!merge <<: *$STAGE-release-call + with: + jhub_ver: 1.4.2 + py_ver: 3.9 + dist: debian + secrets: + REGISTRY_USERNAME: ${{secrets.DOCKER_USERNAME}} + REGISTRY_PASSWORD: ${{secrets.DOCKER_PASSWORD}} \ No newline at end of file diff --git a/.github/.staging_workflows/anchored_u24_workflow_tag_to_release.yaml b/.github/.staging_workflows/anchored_u24_workflow_tag_to_release.yaml new file mode 100644 index 0000000..982f287 --- /dev/null +++ b/.github/.staging_workflows/anchored_u24_workflow_tag_to_release.yaml @@ -0,0 +1,26 @@ +name: u24_workflow_tag_to_release_0.0.1 + +on: + push: + tags: + - '*.*.*' + - 'test*.*.*' + +anchor-dev-build-call: &dev-build-call + uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main + +anchor-test-build-call: &test-build-call + uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main + +anchor-prod-build-call: &prod-build-call + uses: dj-sciops/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main + +jobs: + call_context_check: + uses: dj-sciops/djsciops-cicd/.github/workflows/context_check.yaml@main + call_u24_workflow_build_debian: + !!merge <<: *$STAGE-build-call + with: + jhub_ver: 1.4.2 + py_ver: 3.9 + dist: debian \ No newline at end of file diff --git a/.github/.staging_workflows/normalize.sh b/.github/.staging_workflows/normalize.sh new file mode 100644 index 0000000..08619ee --- /dev/null +++ b/.github/.staging_workflows/normalize.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# For Github Action that doesn't support anchor yet... +# https://github.com/actions/runner/issues/1182 + +STAGE=$1 +# .yaml in .staging_workflows has to be named using a prefix 'anchored_', this will be removed when normalizing +PREFIX="anchored_" +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +for source in $(ls $SCRIPT_DIR | grep yaml) +do + target=${source#$PREFIX} + export STAGE + envsubst '${STAGE}' < $SCRIPT_DIR/$source | yq e 'explode(.) | del(.anchor-*)' > $SCRIPT_DIR/../workflows/$target +done \ No newline at end of file diff --git a/.github/.test/.secrets b/.github/.test/.secrets new file mode 100644 index 0000000..0cdd0de --- /dev/null +++ b/.github/.test/.secrets @@ -0,0 +1,4 @@ +RAW_DEPLOY_KEY= +GITHUB_TOKEN= +REGISTRY_USERNAME= +REGISTRY_PASSWORD= \ No newline at end of file diff --git a/.github/.test/README.md b/.github/.test/README.md new file mode 100644 index 0000000..3ffc63a --- /dev/null +++ b/.github/.test/README.md @@ -0,0 +1,3 @@ +For local testing using act, you need to create few things: +- make a `.test/artifacts` dir for `act --artifact-server-path ./.test/artifacts/` +- make a `.test/.secrets` file similar as `.env` for `act --secret-file ./.test/.secrets` \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..31fe9fc --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,39 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: 'bug' +assignees: '' + +--- + +## Bug Report + +### Description + +A clear and concise description of what is the overall operation that is intended to be +performed that resulted in an error. + +### Reproducibility +Include: +- OS (WIN | MACOS | Linux) +- DataJoint Element Version +- MySQL Version +- MySQL Deployment Strategy (local-native | local-docker | remote) +- Minimum number of steps to reliably reproduce the issue +- Complete error stack as a result of evaluating the above steps + +### Expected Behavior +A clear and concise description of what you expected to happen. + +### Screenshots +If applicable, add screenshots to help explain your problem. + +### Additional Research and Context +Add any additional research or context that was conducted in creating this report. + +For example: +- Related GitHub issues and PR's either within this repository or in other relevant + repositories. +- Specific links to specific lines or a focus within source code. +- Relevant summary of Maintainers development meetings, milestones, projects, etc. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..d31fbac --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: DataJoint Contribution Guideline + url: https://docs.datajoint.org/python/community/02-Contribute.html + about: Please make sure to review the DataJoint Contribution Guidelines \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..1f2b784 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,57 @@ +--- +name: Feature request +about: Suggest an idea for a new feature +title: '' +labels: 'enhancement' +assignees: '' + +--- + +## Feature Request + +### Problem + +A clear and concise description how this idea has manifested and the context. Elaborate +on the need for this feature and/or what could be improved. Ex. I'm always frustrated +when [...] + +### Requirements + +A clear and concise description of the requirements to satisfy the new feature. Detail +what you expect from a successful implementation of the feature. Ex. When using this +feature, it should [...] + +### Justification + +Provide the key benefits in making this a supported feature. Ex. Adding support for this +feature would ensure [...] + +### Alternative Considerations + +Do you currently have a work-around for this? Provide any alternative solutions or +features you've considered. + +### Related Errors +Add any errors as a direct result of not exposing this feature. + +Please include steps to reproduce provided errors as follows: +- OS (WIN | MACOS | Linux) +- DataJoint Element Version +- MySQL Version +- MySQL Deployment Strategy (local-native | local-docker | remote) +- Minimum number of steps to reliably reproduce the issue +- Complete error stack as a result of evaluating the above steps + +### Screenshots +If applicable, add screenshots to help explain your feature. + +### Additional Research and Context +Add any additional research or context that was conducted in creating this feature request. + +For example: +- Related GitHub issues and PR's either within this repository or in other relevant + repositories. +- Specific links to specific lines or a focus within source code. +- Relevant summary of Maintainers development meetings, milestones, projects, etc. +- Any additional supplemental web references or links that would further justify this + feature request. diff --git a/.github/make-dev.sh b/.github/make-dev.sh new file mode 100644 index 0000000..e9bdb38 --- /dev/null +++ b/.github/make-dev.sh @@ -0,0 +1,2 @@ +#!/bin/bash +bash ./.staging_workflows/normalize.sh dev \ No newline at end of file diff --git a/.github/make-prod.sh b/.github/make-prod.sh new file mode 100644 index 0000000..806e39e --- /dev/null +++ b/.github/make-prod.sh @@ -0,0 +1,2 @@ +#!/bin/bash +bash ./.staging_workflows/normalize.sh prod \ No newline at end of file diff --git a/.github/make-test.sh b/.github/make-test.sh new file mode 100644 index 0000000..3870e77 --- /dev/null +++ b/.github/make-test.sh @@ -0,0 +1,2 @@ +#!/bin/bash +bash ./.staging_workflows/normalize.sh test \ No newline at end of file diff --git a/.github/run-act.sh b/.github/run-act.sh new file mode 100644 index 0000000..4ae9cdf --- /dev/null +++ b/.github/run-act.sh @@ -0,0 +1,7 @@ +#!/bin/bash +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +bash $SCRIPT_DIR/.staging_workflows/normalize.sh dev +cd .. +act -P ubuntu-latest=drewyangdev/ubuntu:act-latest \ + --secret-file $SCRIPT_DIR/.test/.secrets \ + --artifact-server-path $SCRIPT_DIR/.test/artifacts/ \ No newline at end of file diff --git a/.github/workflows/u24_workflow_before_release.yaml b/.github/workflows/u24_workflow_before_release.yaml new file mode 100644 index 0000000..28a5ff5 --- /dev/null +++ b/.github/workflows/u24_workflow_before_release.yaml @@ -0,0 +1,18 @@ +name: u24_workflow_before_release_0.0.1 +on: + pull_request: + push: + branches: + - '**' + tags-ignore: + - '**' + workflow_dispatch: +jobs: + call_context_check: + uses: dj-sciops/djsciops-cicd/.github/workflows/context_check.yaml@main + call_u24_workflow_build_debian: + uses: dj-sciops/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main + with: + jhub_ver: 1.4.2 + py_ver: 3.9 + dist: debian diff --git a/.github/workflows/u24_workflow_release_call.yaml b/.github/workflows/u24_workflow_release_call.yaml new file mode 100644 index 0000000..8196673 --- /dev/null +++ b/.github/workflows/u24_workflow_release_call.yaml @@ -0,0 +1,20 @@ +name: u24_workflow_release_call_0.0.1 +on: + workflow_run: + workflows: ["u24_workflow_tag_to_release_0.0.1"] + types: + - completed +jobs: + call_context_check: + uses: dj-sciops/djsciops-cicd/.github/workflows/context_check.yaml@main + call_u24_workflow_release_debian: + if: >- + github.event.workflow_run.conclusion == 'success' && github.repository_owner == 'datajoint' + uses: dj-sciops/djsciops-cicd/.github/workflows/u24_workflow_release.yaml@main + with: + jhub_ver: 1.4.2 + py_ver: 3.9 + dist: debian + secrets: + REGISTRY_USERNAME: ${{secrets.DOCKER_USERNAME}} + REGISTRY_PASSWORD: ${{secrets.DOCKER_PASSWORD}} diff --git a/.github/workflows/u24_workflow_tag_to_release.yaml b/.github/workflows/u24_workflow_tag_to_release.yaml new file mode 100644 index 0000000..3a6ce58 --- /dev/null +++ b/.github/workflows/u24_workflow_tag_to_release.yaml @@ -0,0 +1,15 @@ +name: u24_workflow_tag_to_release_0.0.1 +on: + push: + tags: + - '*.*.*' + - 'test*.*.*' +jobs: + call_context_check: + uses: dj-sciops/djsciops-cicd/.github/workflows/context_check.yaml@main + call_u24_workflow_build_debian: + uses: dj-sciops/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main + with: + jhub_ver: 1.4.2 + py_ver: 3.9 + dist: debian diff --git a/CHANGELOG.md b/CHANGELOG.md index 35047ef..e324533 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,8 @@ Observes [Semantic Versioning](https://semver.org/spec/v2.0.0.html) standard and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) convention. -## [0.0.0] - Unreleased +## [0.1.0] - Unreleased -+ Add - Workflow pipeline ++ Add - Workflow pipeline, pytests, CICD -[0.0.0]: https://github.com/datajoint/workflow-session/releases/tag/0.0.0 +[0.1.0]: https://github.com/datajoint/workflow-session/releases/tag/0.1.0 diff --git a/workflow_zstack/version.py b/workflow_zstack/version.py index bbc2222..ee6de92 100644 --- a/workflow_zstack/version.py +++ b/workflow_zstack/version.py @@ -1,2 +1,2 @@ """Package metadata.""" -__version__ = "0.0.0" +__version__ = "0.1.0" From eb01692083c0767b18de949356ebe3685c61e687 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Thu, 27 Apr 2023 13:06:20 -0500 Subject: [PATCH 08/62] Update CHANGELOG.md Co-authored-by: Kabilar Gunalan --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e324533..76b45af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,4 +7,4 @@ Observes [Semantic Versioning](https://semver.org/spec/v2.0.0.html) standard and + Add - Workflow pipeline, pytests, CICD -[0.1.0]: https://github.com/datajoint/workflow-session/releases/tag/0.1.0 +[0.1.0]: https://github.com/datajoint/workflow-zstack/releases/tag/0.1.0 From 8ce86601115353c1ba7ab0fbd72479054e09cd87 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Thu, 27 Apr 2023 13:11:25 -0500 Subject: [PATCH 09/62] Update .devcontainer/Dockerfile Co-authored-by: Kabilar Gunalan --- .devcontainer/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index a65d696..7ea8513 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -37,6 +37,7 @@ ENV DJ_HOST fakeservices.datajoint.io ENV DJ_USER root ENV DJ_PASS simple +ENV VOLUME_ROOT_DATA_DIR /workspaces/workflow-zstack/example_data ENV DATABASE_PREFIX neuro_ USER vscode From 90044d390cf0d877d110dd25d723f29c6c1ce10b Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Thu, 27 Apr 2023 13:11:42 -0500 Subject: [PATCH 10/62] Update .github/ISSUE_TEMPLATE/config.yml Co-authored-by: Kabilar Gunalan --- .github/ISSUE_TEMPLATE/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index d31fbac..b3d197d 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,5 +1,5 @@ blank_issues_enabled: false contact_links: - name: DataJoint Contribution Guideline - url: https://docs.datajoint.org/python/community/02-Contribute.html + url: https://datajoint.com/docs/community/contribute/ about: Please make sure to review the DataJoint Contribution Guidelines \ No newline at end of file From 59c6f63df58e95ae4eb53196fd3ea78837e60105 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Thu, 27 Apr 2023 13:12:25 -0500 Subject: [PATCH 11/62] Update tests/test_pipeline_generation.py Co-authored-by: Kabilar Gunalan --- tests/test_pipeline_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pipeline_generation.py b/tests/test_pipeline_generation.py index df584dc..9d05345 100644 --- a/tests/test_pipeline_generation.py +++ b/tests/test_pipeline_generation.py @@ -6,7 +6,7 @@ def test_generate_pipeline(pipeline): volume_matching = pipeline["volume_matching"] bossdb = pipeline["bossdb"] - # test elements connection from lab, subject to Session + # Test connection from Subject to Session assert subject.Subject.full_table_name in session.Session.parents() # test elements connection from Session to probe, ephys, ephys_report From 1ef1edd126103196cda8b0d9a06777e03666d568 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Thu, 27 Apr 2023 13:13:31 -0500 Subject: [PATCH 12/62] Apply suggestions from code review Co-authored-by: Kabilar Gunalan --- user_data/sessions.csv | 2 -- user_data/subjects.csv | 2 -- workflow_zstack/ingest.py | 48 ------------------------------------ workflow_zstack/pipeline.py | 2 +- workflow_zstack/reference.py | 3 +-- 5 files changed, 2 insertions(+), 55 deletions(-) diff --git a/user_data/sessions.csv b/user_data/sessions.csv index 1d3ccb1..e69de29 100644 --- a/user_data/sessions.csv +++ b/user_data/sessions.csv @@ -1,2 +0,0 @@ -subject,session_dir,session_id,session_datetime -subject8,,1,2022-05-05 12:13:14 diff --git a/user_data/subjects.csv b/user_data/subjects.csv index a2e14a6..e69de29 100644 --- a/user_data/subjects.csv +++ b/user_data/subjects.csv @@ -1,2 +0,0 @@ -subject,sex,subject_birth_date,subject_description -subject8,F,2023-03-03,EM data collection diff --git a/workflow_zstack/ingest.py b/workflow_zstack/ingest.py index 8706caa..e69de29 100644 --- a/workflow_zstack/ingest.py +++ b/workflow_zstack/ingest.py @@ -1,48 +0,0 @@ -from element_interface.utils import ingest_csv_to_table - -from .pipeline import session, subject - - -def ingest_subjects( - subject_csv_path: str = "./user_data/subjects.csv", - skip_duplicates: bool = True, - verbose: bool = True, -): - """Inserts ./user_data/subject.csv data into corresponding subject schema tables - - Args: - subject_csv_path (str): relative path of subject csv - skip_duplicates (bool): Default True. Passed to DataJoint insert - verbose (bool): Display number of entries inserted when ingesting - """ - csvs = [subject_csv_path] - tables = [subject.Subject()] - ingest_csv_to_table(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose) - - -def ingest_sessions( - session_csv_path: str = "./user_data/sessions.csv", - skip_duplicates: bool = True, - verbose: bool = True, -): - """ - Inserts data from a sessions csv into corresponding session schema tables - By default, uses data from workflow_session/user_data/session/ - session_csv_path (str): relative path of session csv - skip_duplicates (bool): Default True. See DataJoint `insert` function - verbose (bool): Print number inserted (i.e., table length change) - """ - csvs = [ - session_csv_path, - session_csv_path, - ] - tables = [ - session.Session(), - session.SessionDirectory(), - ] - - ingest_csv_to_table(csvs, tables, skip_duplicates=skip_duplicates, verbose=verbose) - - -if __name__ == "__main__": - ingest_sessions() diff --git a/workflow_zstack/pipeline.py b/workflow_zstack/pipeline.py index db2dce1..1d40ee2 100644 --- a/workflow_zstack/pipeline.py +++ b/workflow_zstack/pipeline.py @@ -4,7 +4,7 @@ from element_animal import subject, surgery from element_animal.subject import Subject from element_session import session_with_id as session -from element_calcium_imaging import imaging, imaging_report, scan +from element_calcium_imaging import imaging, scan from element_zstack import volume, volume_matching, bossdb from . import db_prefix diff --git a/workflow_zstack/reference.py b/workflow_zstack/reference.py index 1bfde38..0dfad14 100644 --- a/workflow_zstack/reference.py +++ b/workflow_zstack/reference.py @@ -22,6 +22,5 @@ class Device(dj.Lookup): description=null : varchar(256) """ contents = [ - ["scanner1", "calcium imaging", ""], - ["scanner2", "calcium imaging", ""], + ["scanner1", "fluorescence microscope", ""], ] From d39e3b4aa76b2a1d1913a729cfdba9366345f9cd Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Thu, 27 Apr 2023 13:34:17 -0500 Subject: [PATCH 13/62] Update CICD folder, version-pin requirements --- .../anchored_u24_workflow_before_release.yaml | 29 ------------ .../anchored_u24_workflow_release_call.yaml | 46 ------------------- .../anchored_u24_workflow_tag_to_release.yaml | 26 ----------- .github/.staging_workflows/normalize.sh | 14 ------ .github/.test/.secrets | 4 -- .github/.test/README.md | 3 -- .github/make-dev.sh | 2 - .github/make-prod.sh | 2 - .github/make-test.sh | 2 - .github/run-act.sh | 7 --- requirements.txt | 13 +++--- workflow_zstack/ingest.py | 0 12 files changed, 7 insertions(+), 141 deletions(-) delete mode 100644 .github/.staging_workflows/anchored_u24_workflow_before_release.yaml delete mode 100644 .github/.staging_workflows/anchored_u24_workflow_release_call.yaml delete mode 100644 .github/.staging_workflows/anchored_u24_workflow_tag_to_release.yaml delete mode 100644 .github/.staging_workflows/normalize.sh delete mode 100644 .github/.test/.secrets delete mode 100644 .github/.test/README.md delete mode 100644 .github/make-dev.sh delete mode 100644 .github/make-prod.sh delete mode 100644 .github/make-test.sh delete mode 100644 .github/run-act.sh delete mode 100644 workflow_zstack/ingest.py diff --git a/.github/.staging_workflows/anchored_u24_workflow_before_release.yaml b/.github/.staging_workflows/anchored_u24_workflow_before_release.yaml deleted file mode 100644 index 76ea65f..0000000 --- a/.github/.staging_workflows/anchored_u24_workflow_before_release.yaml +++ /dev/null @@ -1,29 +0,0 @@ -name: u24_workflow_before_release_0.0.1 -on: - pull_request: - push: - branches: - - '**' - tags-ignore: - - '**' - workflow_dispatch: - -anchor-dev-build-call: &dev-build-call - uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main - -anchor-test-build-call: &test-build-call - uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main - -anchor-prod-build-call: &prod-build-call - uses: dj-sciops/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main - -jobs: - call_context_check: - uses: dj-sciops/djsciops-cicd/.github/workflows/context_check.yaml@main - - call_u24_workflow_build_debian: - !!merge <<: *$STAGE-build-call - with: - jhub_ver: 1.4.2 - py_ver: 3.9 - dist: debian \ No newline at end of file diff --git a/.github/.staging_workflows/anchored_u24_workflow_release_call.yaml b/.github/.staging_workflows/anchored_u24_workflow_release_call.yaml deleted file mode 100644 index d9fc757..0000000 --- a/.github/.staging_workflows/anchored_u24_workflow_release_call.yaml +++ /dev/null @@ -1,46 +0,0 @@ -name: u24_workflow_release_call_0.0.1 - -on: - workflow_run: - workflows: ["u24_workflow_tag_to_release_0.0.1"] - types: - - completed - -anchor-dev-release-call: &dev-release-call - uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_release.yaml@main - -anchor-test-release-call: &test-release-call - uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_release.yaml@main - -anchor-prod-release-call: &prod-release-call - uses: dj-sciops/djsciops-cicd/.github/workflows/u24_workflow_release.yaml@main - -anchor-dev-release-if: &dev-release-if - if: >- - github.event.workflow_run.conclusion == 'success' && - github.repository_owner == 'yambottle' - -anchor-test-release-if: &test-release-if - if: >- - github.event.workflow_run.conclusion == 'success' && - github.repository_owner == 'yambottle' - -anchor-prod-release-if: &prod-release-if - if: >- - github.event.workflow_run.conclusion == 'success' && - github.repository_owner == 'datajoint' - -jobs: - call_context_check: - uses: dj-sciops/djsciops-cicd/.github/workflows/context_check.yaml@main - - call_u24_workflow_release_debian: - !!merge <<: *$STAGE-release-if - !!merge <<: *$STAGE-release-call - with: - jhub_ver: 1.4.2 - py_ver: 3.9 - dist: debian - secrets: - REGISTRY_USERNAME: ${{secrets.DOCKER_USERNAME}} - REGISTRY_PASSWORD: ${{secrets.DOCKER_PASSWORD}} \ No newline at end of file diff --git a/.github/.staging_workflows/anchored_u24_workflow_tag_to_release.yaml b/.github/.staging_workflows/anchored_u24_workflow_tag_to_release.yaml deleted file mode 100644 index 982f287..0000000 --- a/.github/.staging_workflows/anchored_u24_workflow_tag_to_release.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: u24_workflow_tag_to_release_0.0.1 - -on: - push: - tags: - - '*.*.*' - - 'test*.*.*' - -anchor-dev-build-call: &dev-build-call - uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main - -anchor-test-build-call: &test-build-call - uses: yambottle/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main - -anchor-prod-build-call: &prod-build-call - uses: dj-sciops/djsciops-cicd/.github/workflows/u24_workflow_build.yaml@main - -jobs: - call_context_check: - uses: dj-sciops/djsciops-cicd/.github/workflows/context_check.yaml@main - call_u24_workflow_build_debian: - !!merge <<: *$STAGE-build-call - with: - jhub_ver: 1.4.2 - py_ver: 3.9 - dist: debian \ No newline at end of file diff --git a/.github/.staging_workflows/normalize.sh b/.github/.staging_workflows/normalize.sh deleted file mode 100644 index 08619ee..0000000 --- a/.github/.staging_workflows/normalize.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -# For Github Action that doesn't support anchor yet... -# https://github.com/actions/runner/issues/1182 - -STAGE=$1 -# .yaml in .staging_workflows has to be named using a prefix 'anchored_', this will be removed when normalizing -PREFIX="anchored_" -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -for source in $(ls $SCRIPT_DIR | grep yaml) -do - target=${source#$PREFIX} - export STAGE - envsubst '${STAGE}' < $SCRIPT_DIR/$source | yq e 'explode(.) | del(.anchor-*)' > $SCRIPT_DIR/../workflows/$target -done \ No newline at end of file diff --git a/.github/.test/.secrets b/.github/.test/.secrets deleted file mode 100644 index 0cdd0de..0000000 --- a/.github/.test/.secrets +++ /dev/null @@ -1,4 +0,0 @@ -RAW_DEPLOY_KEY= -GITHUB_TOKEN= -REGISTRY_USERNAME= -REGISTRY_PASSWORD= \ No newline at end of file diff --git a/.github/.test/README.md b/.github/.test/README.md deleted file mode 100644 index 3ffc63a..0000000 --- a/.github/.test/README.md +++ /dev/null @@ -1,3 +0,0 @@ -For local testing using act, you need to create few things: -- make a `.test/artifacts` dir for `act --artifact-server-path ./.test/artifacts/` -- make a `.test/.secrets` file similar as `.env` for `act --secret-file ./.test/.secrets` \ No newline at end of file diff --git a/.github/make-dev.sh b/.github/make-dev.sh deleted file mode 100644 index e9bdb38..0000000 --- a/.github/make-dev.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -bash ./.staging_workflows/normalize.sh dev \ No newline at end of file diff --git a/.github/make-prod.sh b/.github/make-prod.sh deleted file mode 100644 index 806e39e..0000000 --- a/.github/make-prod.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -bash ./.staging_workflows/normalize.sh prod \ No newline at end of file diff --git a/.github/make-test.sh b/.github/make-test.sh deleted file mode 100644 index 3870e77..0000000 --- a/.github/make-test.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -bash ./.staging_workflows/normalize.sh test \ No newline at end of file diff --git a/.github/run-act.sh b/.github/run-act.sh deleted file mode 100644 index 4ae9cdf..0000000 --- a/.github/run-act.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -bash $SCRIPT_DIR/.staging_workflows/normalize.sh dev -cd .. -act -P ubuntu-latest=drewyangdev/ubuntu:act-latest \ - --secret-file $SCRIPT_DIR/.test/.secrets \ - --artifact-server-path $SCRIPT_DIR/.test/artifacts/ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 77ae707..e0c9d23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ datajoint>=0.13.0 -element-animal -element-lab -element-session -element-calcium-imaging -element-zstack -intern +element-animal>=0.1.5 +element-lab>=0.2.0 +element-session>=0.1.2 +element-calcium-imaging>=0.5.5 +element-zstack>=0.1.0 +intern>=1.4.1 +ipykernel>=6.0.1 diff --git a/workflow_zstack/ingest.py b/workflow_zstack/ingest.py deleted file mode 100644 index e69de29..0000000 From 92d6dae6600665ec5a03eb821eaa97cc22d77434 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Thu, 27 Apr 2023 14:12:15 -0500 Subject: [PATCH 14/62] Delete .csv files --- user_data/sessions.csv | 0 user_data/subjects.csv | 0 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 user_data/sessions.csv delete mode 100644 user_data/subjects.csv diff --git a/user_data/sessions.csv b/user_data/sessions.csv deleted file mode 100644 index e69de29..0000000 diff --git a/user_data/subjects.csv b/user_data/subjects.csv deleted file mode 100644 index e69de29..0000000 From 8fc3b7fedee7c65c4db4d9bc2968a3ee5ef776d0 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Thu, 27 Apr 2023 14:15:57 -0500 Subject: [PATCH 15/62] Update tests/test_pipeline_generation.py Co-authored-by: Kabilar Gunalan --- tests/test_pipeline_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pipeline_generation.py b/tests/test_pipeline_generation.py index 9d05345..a81d53b 100644 --- a/tests/test_pipeline_generation.py +++ b/tests/test_pipeline_generation.py @@ -9,7 +9,7 @@ def test_generate_pipeline(pipeline): # Test connection from Subject to Session assert subject.Subject.full_table_name in session.Session.parents() - # test elements connection from Session to probe, ephys, ephys_report + # Test connection from Session to Scan and Scan to Volume assert session.Session.full_table_name in scan.Scan.parents() assert scan.Scan.full_table_name in volume.Volume.parents() assert "mask_npix" in (volume.Segmentation.Mask.heading.secondary_attributes) From 6a0cc3163dbecabfb241fdf08dc4d9bbb7c57e71 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Thu, 27 Apr 2023 14:22:42 -0500 Subject: [PATCH 16/62] Remove `Docker` directory + update devcontainer.json --- .devcontainer/local/devcontainer.json | 2 +- docker/Dockerfile | 25 ------------------- docker/apt_requirements.txt | 2 -- docker/setup.sh | 35 --------------------------- 4 files changed, 1 insertion(+), 63 deletions(-) delete mode 100644 docker/Dockerfile delete mode 100644 docker/apt_requirements.txt delete mode 100644 docker/setup.sh diff --git a/.devcontainer/local/devcontainer.json b/.devcontainer/local/devcontainer.json index af0d917..ad34d46 100644 --- a/.devcontainer/local/devcontainer.json +++ b/.devcontainer/local/devcontainer.json @@ -1,5 +1,5 @@ { - "name": "Local", + "name": "Local Test", "dockerComposeFile": "docker-compose.yaml", "service": "app", "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", diff --git a/docker/Dockerfile b/docker/Dockerfile deleted file mode 100644 index cb1ef61..0000000 --- a/docker/Dockerfile +++ /dev/null @@ -1,25 +0,0 @@ -FROM datajoint/djbase:py3.9-debian-8eb1715 - -USER anaconda:anaconda - -COPY ./workflow-volume/docker/apt_requirements.txt /tmp/ -RUN /entrypoint.sh echo "Installed dependencies." - -WORKDIR /main/workflow-volume - -# Always move local - conditional install in setup.sh -COPY --chown=anaconda:anaconda ./element-lab/ /main/element-lab/ -COPY --chown=anaconda:anaconda ./element-animal/ /main/element-animal/ -COPY --chown=anaconda:anaconda ./element-session/ /main/element-session/ -COPY --chown=anaconda:anaconda ./element-event/ /main/element-event/ -COPY --chown=anaconda:anaconda ./element-interface/ /main/element-interface/ -COPY --chown=anaconda:anaconda ./element-volume/ /main/element-volume/ -COPY --chown=anaconda:anaconda ./workflow-volume/ /main/workflow-volume/ - -# Conditional install - local-all, local-dlc, or git -COPY --chown=anaconda:anaconda ./workflow-volume/docker/setup.sh /main/ -COPY --chown=anaconda:anaconda ./workflow-volume/docker/.env /main/ -RUN chmod 755 /main/setup.sh -RUN chmod 755 /main/.env -RUN /main/setup.sh -RUN rm -f ./dj_local_conf.json diff --git a/docker/apt_requirements.txt b/docker/apt_requirements.txt deleted file mode 100644 index 3505bb3..0000000 --- a/docker/apt_requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -git -locales-all diff --git a/docker/setup.sh b/docker/setup.sh deleted file mode 100644 index 373f4c0..0000000 --- a/docker/setup.sh +++ /dev/null @@ -1,35 +0,0 @@ -#! /bin/bash -alias ll='ls -aGg' -export $(grep -v '^#' /main/.env | xargs) - -cd /main/ -echo "INSALL OPTION:" $INSTALL_OPTION - -# Always get djarchive -pip install --no-deps git+https://github.com/datajoint/djarchive-client.git - -if [ "$INSTALL_OPTION" == "local-all" ]; then # all local installs, mapped from host - for f in lab animal session interface; do - pip install -e ./element-${f} - done - pip install -e ./element-array-ephys[nwb] - pip install -e ./workflow-array-ephys -else # all except this repo pip installed - for f in lab animal session interface; do - pip install git+https://github.com/${GITHUB_USERNAME}/element-${f}.git - done - if [ "$INSTALL_OPTION" == "local-ephys" ]; then # only array-ephys items from local - pip install -e ./element-array-ephys[nwb] - pip install -e ./workflow-array-ephys - elif [ "$INSTALL_OPTION" == "git" ]; then # all from github - pip install git+https://github.com/${GITHUB_USERNAME}/element-array-ephys.git - pip install git+https://github.com/${GITHUB_USERNAME}/workflow-array-ephys.git - fi -fi - -# If test cmd contains pytest, install -if [[ "$TEST_CMD" == *pytest* ]]; then - pip install pytest - pip install pytest-cov - pip install opencv-python -fi From 7cdba76e24e62c68ab258dfc5ccd2e0a046c838f Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Thu, 27 Apr 2023 14:55:28 -0500 Subject: [PATCH 17/62] Update .devcontainer/devcontainer.json Co-authored-by: Kabilar Gunalan --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 47d92ee..f575459 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,5 @@ { - "name": "Tutorial", + "name": "Tutorial with remote data", "dockerComposeFile": "docker-compose.yaml", "service": "app", "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", From 3f5362837eecf6abe25393a186081bb7820e0291 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Thu, 27 Apr 2023 14:55:39 -0500 Subject: [PATCH 18/62] Update .devcontainer/devcontainer.json Co-authored-by: Kabilar Gunalan --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index f575459..d3bcb7b 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -6,7 +6,7 @@ "remoteEnv": { "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" }, - "onCreateCommand": "pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", + "onCreateCommand": "mkdir -p ${DJ_PUBLIC_S3_MOUNT_PATH} && pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", "postStartCommand": "docker volume prune -f", "hostRequirements": { "cpus": 4, From 43cd684ffb89c7cfd21d0cbe5d67b48f345694f2 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Thu, 27 Apr 2023 14:55:52 -0500 Subject: [PATCH 19/62] Update .devcontainer/devcontainer.json Co-authored-by: Kabilar Gunalan --- .devcontainer/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index d3bcb7b..97d9fbe 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -7,7 +7,7 @@ "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" }, "onCreateCommand": "mkdir -p ${DJ_PUBLIC_S3_MOUNT_PATH} && pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", - "postStartCommand": "docker volume prune -f", + "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${DJ_PUBLIC_S3_MOUNT_PATH} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", "hostRequirements": { "cpus": 4, "memory": "8gb", From 66a2a7ad1d10ab86d50cb0093b8efd56951a5ba1 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Wed, 3 May 2023 17:01:34 -0500 Subject: [PATCH 20/62] Update .devcontainer/local/devcontainer.json Co-authored-by: Kabilar Gunalan --- .devcontainer/local/devcontainer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devcontainer/local/devcontainer.json b/.devcontainer/local/devcontainer.json index ad34d46..29b7bca 100644 --- a/.devcontainer/local/devcontainer.json +++ b/.devcontainer/local/devcontainer.json @@ -1,5 +1,5 @@ { - "name": "Local Test", + "name": "Local test environment with remote data", "dockerComposeFile": "docker-compose.yaml", "service": "app", "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", From 833e5bef4c36bec717bde0b12754cc408c86692d Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Wed, 3 May 2023 17:03:24 -0500 Subject: [PATCH 21/62] Rename local to local-test --- .devcontainer/{local => local-test}/devcontainer.json | 7 +++++-- .devcontainer/{local => local-test}/docker-compose.yaml | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) rename .devcontainer/{local => local-test}/devcontainer.json (54%) rename .devcontainer/{local => local-test}/docker-compose.yaml (56%) diff --git a/.devcontainer/local/devcontainer.json b/.devcontainer/local-test/devcontainer.json similarity index 54% rename from .devcontainer/local/devcontainer.json rename to .devcontainer/local-test/devcontainer.json index ad34d46..1efc00a 100644 --- a/.devcontainer/local/devcontainer.json +++ b/.devcontainer/local-test/devcontainer.json @@ -1,12 +1,15 @@ +// For format details, see https://aka.ms/devcontainer.json. { - "name": "Local Test", + "name": "Local test environment with remote data", "dockerComposeFile": "docker-compose.yaml", "service": "app", "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + // Use this environment variable if you need to bind mount your local source code into a new container. "remoteEnv": { "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" }, - "onCreateCommand": "pip install -r ./requirements_dev.txt && pip install -e . && pip install -e ../element-zstack && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", + // https://containers.dev/features + "onCreateCommand": "pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", "postStartCommand": "docker volume prune -f", "hostRequirements": { "cpus": 4, diff --git a/.devcontainer/local/docker-compose.yaml b/.devcontainer/local-test/docker-compose.yaml similarity index 56% rename from .devcontainer/local/docker-compose.yaml rename to .devcontainer/local-test/docker-compose.yaml index 0dd6db4..ffdd0e7 100644 --- a/.devcontainer/local/docker-compose.yaml +++ b/.devcontainer/local-test/docker-compose.yaml @@ -9,5 +9,8 @@ services: extra_hosts: - fakeservices.datajoint.io:127.0.0.1 volumes: - - ../../..:/workspaces - privileged: true # only because of dind \ No newline at end of file + - ../../..:/workspaces:cached + - docker_data:/var/lib/docker # persist docker images + privileged: true # only because of dind +volumes: + docker_data: \ No newline at end of file From db789d48d5dc73bc5beafb1941c52f2d23e36e96 Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Fri, 5 May 2023 14:15:58 -0500 Subject: [PATCH 22/62] Update gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 560b576..b2ec45f 100644 --- a/.gitignore +++ b/.gitignore @@ -75,3 +75,6 @@ ENV/ dj_local_c*.json temp* *nwb + +# vscode +*.code-workspace \ No newline at end of file From 67cf768d4d905423e59bd172d70eb2a67be8dc9f Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Fri, 5 May 2023 20:38:14 -0500 Subject: [PATCH 23/62] Add Compose file --- docker-compose.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 docker-compose.yaml diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..0a2c1fc --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,15 @@ +# MYSQL_VER=8.0 docker compose up --build +version: "2.4" +services: + db: + restart: always + image: datajoint/mysql:${MYSQL_VER} + environment: + - MYSQL_ROOT_PASSWORD=${DJ_PASS} + ports: + - "3306:3306" + healthcheck: + test: [ "CMD", "mysqladmin", "ping", "-h", "localhost" ] + timeout: 15s + retries: 10 + interval: 15s \ No newline at end of file From 73d78ef8bcad0e6a72b485b5acd9dbab5442c023 Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Sat, 6 May 2023 01:52:37 +0000 Subject: [PATCH 24/62] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 23a50c0..bfdbb1a 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ author_email="info@datajoint.com", license="MIT", url="https://github.com/datajoint/workflow-zstack", - keywords="neuroscience volumetric BossDB datajoint", + keywords="neuroscience volumetric bossdb datajoint", packages=find_packages(exclude=["contrib", "docs", "tests*"]), install_requires=requirements, ) From d1bb2bc36e28f7ca8dcda18254e82b7588b0fe90 Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Fri, 5 May 2023 20:58:04 -0500 Subject: [PATCH 25/62] Update Dev Container --- .devcontainer/Dockerfile | 4 ++++ .devcontainer/devcontainer.json | 10 +++++++--- .devcontainer/docker-compose.yaml | 6 +++++- .devcontainer/local-test/devcontainer.json | 13 +++++++------ 4 files changed, 23 insertions(+), 10 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 7ea8513..6c2ee6d 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -25,18 +25,22 @@ RUN \ # dircolors -b >> /home/vscode/.bashrc && \ # somehow fix colors apt-get clean COPY ./requirements.txt /tmp/ +COPY ./requirements_dev.txt /tmp/ RUN \ # workflow dependencies apt-get install gcc ffmpeg libsm6 libxext6 -y && \ pip install --no-cache-dir -r /tmp/requirements.txt && \ + pip install --no-cache-dir -r /tmp/requirements_dev.txt && \ # clean up rm /tmp/requirements.txt && \ + rm /tmp/requirements_dev.txt && \ apt-get clean ENV DJ_HOST fakeservices.datajoint.io ENV DJ_USER root ENV DJ_PASS simple +ENV DJ_PUBLIC_S3_MOUNT_PATH /workspaces/workflow-zstack/example_data ENV VOLUME_ROOT_DATA_DIR /workspaces/workflow-zstack/example_data ENV DATABASE_PREFIX neuro_ diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 97d9fbe..b2f1238 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -9,17 +9,21 @@ "onCreateCommand": "mkdir -p ${DJ_PUBLIC_S3_MOUNT_PATH} && pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${DJ_PUBLIC_S3_MOUNT_PATH} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", "hostRequirements": { - "cpus": 4, - "memory": "8gb", + "cpus": 8, + "memory": "16gb", "storage": "32gb" }, "forwardPorts": [ 3306 ], "customizations": { + "settings": { + "python.pythonPath": "/usr/local/bin/python" + }, "vscode": { "extensions": [ - "ms-python.python" + "ms-python.python", + "ms-toolsai.jupyter" ] } } diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index 28db3c5..0601f6e 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -4,9 +4,13 @@ services: extends: file: ./local-test/docker-compose.yaml service: app + environment: + - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-zstack-v1 devices: - /dev/fuse cap_add: - SYS_ADMIN security_opt: - - apparmor:unconfined \ No newline at end of file + - apparmor:unconfined +volumes: + docker_data: \ No newline at end of file diff --git a/.devcontainer/local-test/devcontainer.json b/.devcontainer/local-test/devcontainer.json index 1efc00a..4b843ec 100644 --- a/.devcontainer/local-test/devcontainer.json +++ b/.devcontainer/local-test/devcontainer.json @@ -1,28 +1,29 @@ -// For format details, see https://aka.ms/devcontainer.json. { "name": "Local test environment with remote data", "dockerComposeFile": "docker-compose.yaml", "service": "app", "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", - // Use this environment variable if you need to bind mount your local source code into a new container. "remoteEnv": { "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" }, - // https://containers.dev/features "onCreateCommand": "pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", "postStartCommand": "docker volume prune -f", "hostRequirements": { - "cpus": 4, - "memory": "8gb", + "cpus": 8, + "memory": "16gb", "storage": "32gb" }, "forwardPorts": [ 3306 ], "customizations": { + "settings": { + "python.pythonPath": "/usr/local/bin/python" + }, "vscode": { "extensions": [ - "ms-python.python" + "ms-python.python", + "ms-toolsai.jupyter" ] } } From e2462d484825d3802ca066bbc7fede8716a16092 Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Sat, 6 May 2023 02:13:13 +0000 Subject: [PATCH 26/62] Update readme --- README.md | 73 ++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index b211863..eeb38b9 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,64 @@ -# Workflow for volumetric data +# DataJoint Workflow for ZStack -This directory provides an example workflow to save the information related to -volumetric data management, using the following DataJoint Elements +The DataJoint Workflow for ZStack combines four DataJoint Elements for cell +segmentation, volume registration, and cell matching - Elements Lab, +Animal, Session, and ZStack. DataJoint Elements collectively standardize and automate data collection and +analysis for neuroscience experiments. Each Element is a modular pipeline for data +storage and processing with corresponding database tables that can be combined with +other Elements to assemble a fully functional pipeline. This repository also provides +a tutorial environment and notebook to learn the pipeline. -+ [element-lab](https://github.com/datajoint/element-lab) -+ [element-animal](https://github.com/datajoint/element-animal) -+ [element-session](https://github.com/datajoint/element-session) -+ [element-volume](https://github.com/datajoint/element-volume) +## Experiment Flowchart -This repository provides a demonstration for setting up a workflow using these Elements -in the [pipeline script](workflow_session/pipeline.py)). +![flowchart](https://raw.githubusercontent.com/datajoint/element-zstack/main/images/flowchart.svg) -See the [Element Volume documentation](https://datajoint.com/docs/elements/element-volume/) -for the background information and development timeline. +## Data Pipeline Diagram -For more information on the DataJoint Elements project, please visit our -[general documentation](https://datajoint.com/docs/elements/). This work is supported by -the National Institutes of Health. +![pipeline](https://raw.githubusercontent.com/datajoint/element-zstack/main/images/pipeline.svg) + +## Getting Started + ++ [Interactive tutorial on GitHub Codespaces](#interactive-tutorial) + ++ Install Element ZStack from PyPI + + ```bash + pip install element-zstack + ``` + ++ [Documentation](https://datajoint.com/docs/elements/element-zstack) + +## Support + ++ If you need help getting started or run into any errors, please contact our team by email at support@datajoint.com. + +## Interactive Tutorial + ++ The easiest way to learn about DataJoint Elements is to use the tutorial notebook within the included interactive environment configured using [Dev Container](https://containers.dev/). + +### Launch Environment + +Here are some options that provide a great experience: + +- Cloud-based Environment (*recommended*) + - Launch using [GitHub Codespaces](https://github.com/features/codespaces) using the `+` option which will `Create codespace on main` in the codebase repository on your fork with default options. For more control, see the `...` where you may create `New with options...`. + - Build time for a codespace is several minutes. This is done infrequently and cached for convenience. + - Start time for a codespace is less than 1 minute. This will pull the built codespace from cache when you need it. + - *Tip*: Each month, GitHub renews a [free-tier](https://docs.github.com/en/billing/managing-billing-for-github-codespaces/about-billing-for-github-codespaces#monthly-included-storage-and-core-hours-for-personal-accounts) quota of compute and storage. Typically we run into the storage limits before anything else since codespaces consume storage while stopped. It is best to delete Codespaces when not actively in use and recreate when needed. We'll soon be creating prebuilds to avoid larger build times. Once any portion of your quota is reached, you will need to wait for it to be reset at the end of your cycle or add billing info to your GitHub account to handle overages. + - *Tip*: GitHub auto names the codespace but you can rename the codespace so that it is easier to identify later. + +- Local Environment + - Install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) + - Install [Docker](https://docs.docker.com/get-docker/) + - Install [VSCode](https://code.visualstudio.com/) + - Install the VSCode [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) + - `git clone` the codebase repository and open it in VSCode + - Use the `Dev Containers extension` to `Reopen in Container` (More info is in the `Getting started` included with the extension.) + +You will know your environment has finished loading once you either see a terminal open related to `Running postStartCommand` with a final message of `Done` or the `README.md` is opened in `Preview`. + +### Instructions + +1. We recommend you start by navigating to the `notebooks` directory on the left panel and go through the `tutorial.ipynb` Jupyter notebook. Execute the cells in the notebook to begin your walk through of the tutorial. + +1. Once you are done, see the options available to you in the menu in the bottom-left corner. For example, in codespace you will have an option to `Stop Current Codespace` but when running Dev Container on your own machine the equivalent option is `Reopen folder locally`. By default, GitHub will also automatically stop the Codespace after 30 minutes of inactivity. Once the codespace is no longer being used, we recommend deleting the codespace. From cd10451f1511168a962bccaeea2fdc4676b90fe9 Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Sat, 6 May 2023 02:16:13 +0000 Subject: [PATCH 27/62] Update notebook --- notebooks/tutorial.ipynb | 294 ++------------------------------------- 1 file changed, 12 insertions(+), 282 deletions(-) diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb index 60af172..a362713 100644 --- a/notebooks/tutorial.ipynb +++ b/notebooks/tutorial.ipynb @@ -40,16 +40,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import os\n", - "\n", - "# change to the upper level folder to detect dj_local_conf.json\n", - "if os.path.basename(os.getcwd()) == \"notebooks\":\n", - " os.chdir(\"..\")\n", - "\n", "import datajoint as dj\n", "import datetime\n", "import numpy as np" @@ -120,270 +114,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatch.CommonMask\n", - "\n", - "\n", - "volume_matching.VolumeMatch.CommonMask\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatch.VolumeMask\n", - "\n", - "\n", - "volume_matching.VolumeMatch.VolumeMask\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatch.CommonMask->volume_matching.VolumeMatch.VolumeMask\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask.Volume\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask.Volume\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask->volume_matching.VolumeMatchTask.Volume\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatch\n", - "\n", - "\n", - "volume_matching.VolumeMatch\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask->volume_matching.VolumeMatch\n", - "\n", - "\n", - "\n", - "\n", - "volume.Segmentation\n", - "\n", - "\n", - "volume.Segmentation\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume.Segmentation.Mask\n", - "\n", - "\n", - "volume.Segmentation.Mask\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume.Segmentation->volume.Segmentation.Mask\n", - "\n", - "\n", - "\n", - "\n", - "volume.Segmentation->volume_matching.VolumeMatchTask.Volume\n", - "\n", - "\n", - "\n", - "\n", - "volume.Segmentation.Mask->volume_matching.VolumeMatch.VolumeMask\n", - "\n", - "\n", - "\n", - "\n", - "volume.Volume\n", - "\n", - "\n", - "volume.Volume\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume.VoxelSize\n", - "\n", - "\n", - "volume.VoxelSize\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume.Volume->volume.VoxelSize\n", - "\n", - "\n", - "\n", - "\n", - "bossdb.VolumeUploadTask\n", - "\n", - "\n", - "bossdb.VolumeUploadTask\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume.Volume->bossdb.VolumeUploadTask\n", - "\n", - "\n", - "\n", - "\n", - "volume.SegmentationTask\n", - "\n", - "\n", - "volume.SegmentationTask\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume.Volume->volume.SegmentationTask\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatch.Transformation\n", - "\n", - "\n", - "volume_matching.VolumeMatch.Transformation\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "bossdb.BossDBURLs\n", - "\n", - "\n", - "bossdb.BossDBURLs\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "bossdb.VolumeUploadTask->bossdb.BossDBURLs\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask.Volume->volume_matching.VolumeMatch.Transformation\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatchTask.Volume->volume_matching.VolumeMatch.VolumeMask\n", - "\n", - "\n", - "\n", - "\n", - "volume.SegmentationParamSet\n", - "\n", - "\n", - "volume.SegmentationParamSet\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "volume.SegmentationParamSet->volume.SegmentationTask\n", - "\n", - "\n", - "\n", - "\n", - "volume_matching.VolumeMatch->volume_matching.VolumeMatch.Transformation\n", - "\n", - "\n", - "\n", - "\n", - "volume.SegmentationTask->volume.Segmentation\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject\n", - "\n", - "\n", - "subject.Subject\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "session.Session\n", - "\n", - "\n", - "session.Session\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "subject.Subject->session.Session\n", - "\n", - "\n", - "\n", - "\n", - "scan.Scan\n", - "\n", - "\n", - "scan.Scan\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "scan.Scan->volume.Volume\n", - "\n", - "\n", - "\n", - "\n", - "session.Session->scan.Scan\n", - "\n", - "\n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "(\n", " dj.Diagram(subject.Subject)\n", @@ -457,7 +190,7 @@ "metadata": {}, "outputs": [], "source": [ - "subject.Subject.describe()" + "print(subject.Subject.describe())" ] }, { @@ -481,8 +214,7 @@ " sex=\"M\",\n", " subject_birth_date=\"2023-01-01\",\n", " subject_description=\"Cellpose segmentation of volumetric data.\",\n", - " ),\n", - " skip_duplicates=True,\n", + " )\n", ")\n", "subject.Subject()" ] @@ -500,7 +232,7 @@ "metadata": {}, "outputs": [], "source": [ - "session.Session.describe()" + "print(session.Session.describe())" ] }, { @@ -544,7 +276,6 @@ " session_key,\n", " session_datetime=datetime.datetime.now(),\n", " ),\n", - " skip_duplicates=True,\n", ")\n", "session.Session()" ] @@ -566,7 +297,7 @@ "metadata": {}, "outputs": [], "source": [ - "session.SessionDirectory.describe()" + "print(session.SessionDirectory.describe())" ] }, { @@ -607,7 +338,7 @@ "metadata": {}, "outputs": [], "source": [ - "scan.Scan.describe()" + "print(scan.Scan.describe())" ] }, { @@ -764,7 +495,7 @@ "metadata": {}, "outputs": [], "source": [ - "volume.SegmentationTask.describe()" + "print(volume.SegmentationTask.describe())" ] }, { @@ -807,7 +538,6 @@ " paramset_idx=1,\n", " task_mode=\"trigger\",\n", " ),\n", - " skip_duplicates=True,\n", ")" ] }, @@ -877,7 +607,7 @@ "metadata": {}, "outputs": [], "source": [ - "bossdb.VolumeUploadTask.describe()" + "print(bossdb.VolumeUploadTask.describe())" ] }, { @@ -996,7 +726,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.10" + "version": "3.9.16" }, "vscode": { "interpreter": { From a4926301b5567844e79fbf221dddabb994f2819f Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Wed, 10 May 2023 09:13:28 -0500 Subject: [PATCH 28/62] Update Dev Container --- .devcontainer/Dockerfile | 4 --- .devcontainer/devcontainer.json | 4 +-- .devcontainer/docker-compose.yaml | 4 +-- .devcontainer/local-data/devcontainer.json | 30 ++++++++++++++++++++ .devcontainer/local-data/docker-compose.yaml | 18 ++++++++++++ .devcontainer/local-test/devcontainer.json | 4 +-- .devcontainer/local-test/docker-compose.yaml | 11 ++++--- 7 files changed, 58 insertions(+), 17 deletions(-) create mode 100644 .devcontainer/local-data/devcontainer.json create mode 100644 .devcontainer/local-data/docker-compose.yaml diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 6c2ee6d..7ea8513 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -25,22 +25,18 @@ RUN \ # dircolors -b >> /home/vscode/.bashrc && \ # somehow fix colors apt-get clean COPY ./requirements.txt /tmp/ -COPY ./requirements_dev.txt /tmp/ RUN \ # workflow dependencies apt-get install gcc ffmpeg libsm6 libxext6 -y && \ pip install --no-cache-dir -r /tmp/requirements.txt && \ - pip install --no-cache-dir -r /tmp/requirements_dev.txt && \ # clean up rm /tmp/requirements.txt && \ - rm /tmp/requirements_dev.txt && \ apt-get clean ENV DJ_HOST fakeservices.datajoint.io ENV DJ_USER root ENV DJ_PASS simple -ENV DJ_PUBLIC_S3_MOUNT_PATH /workspaces/workflow-zstack/example_data ENV VOLUME_ROOT_DATA_DIR /workspaces/workflow-zstack/example_data ENV DATABASE_PREFIX neuro_ diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b2f1238..20bde70 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -6,8 +6,8 @@ "remoteEnv": { "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" }, - "onCreateCommand": "mkdir -p ${DJ_PUBLIC_S3_MOUNT_PATH} && pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", - "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${DJ_PUBLIC_S3_MOUNT_PATH} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", + "onCreateCommand": "mkdir -p ${VOLUME_ROOT_DATA_DIR} && pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", + "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${VOLUME_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", "hostRequirements": { "cpus": 8, "memory": "16gb", diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index 0601f6e..ce313b7 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -2,10 +2,8 @@ version: "3" services: app: extends: - file: ./local-test/docker-compose.yaml + file: ./local-data/docker-compose.yaml service: app - environment: - - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-zstack-v1 devices: - /dev/fuse cap_add: diff --git a/.devcontainer/local-data/devcontainer.json b/.devcontainer/local-data/devcontainer.json new file mode 100644 index 0000000..02acd0a --- /dev/null +++ b/.devcontainer/local-data/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "Tutorial with local data", + "dockerComposeFile": "docker-compose.yaml", + "service": "app", + "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + "remoteEnv": { + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + "onCreateCommand": "pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", + "postStartCommand": "docker volume prune -f", + "hostRequirements": { + "cpus": 2, + "memory": "16gb", + "storage": "32gb" + }, + "forwardPorts": [ + 3306 + ], + "customizations": { + "settings": { + "python.pythonPath": "/usr/local/bin/python" + }, + "vscode": { + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/local-data/docker-compose.yaml b/.devcontainer/local-data/docker-compose.yaml new file mode 100644 index 0000000..7ec3f1f --- /dev/null +++ b/.devcontainer/local-data/docker-compose.yaml @@ -0,0 +1,18 @@ +version: "3" +services: + app: + cpus: 2 + mem_limit: 16g + environment: + - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-zstack-v1 + build: + context: ../.. + dockerfile: ./.devcontainer/Dockerfile + extra_hosts: + - fakeservices.datajoint.io:127.0.0.1 + volumes: + - ../../..:/workspaces:cached + - docker_data:/var/lib/docker # persist docker images + privileged: true # only because of dind +volumes: + docker_data: diff --git a/.devcontainer/local-test/devcontainer.json b/.devcontainer/local-test/devcontainer.json index 4b843ec..d8bbbfc 100644 --- a/.devcontainer/local-test/devcontainer.json +++ b/.devcontainer/local-test/devcontainer.json @@ -6,8 +6,8 @@ "remoteEnv": { "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" }, - "onCreateCommand": "pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", - "postStartCommand": "docker volume prune -f", + "onCreateCommand": "pip install -r ./requirements_dev.txt && pip install -e . && pip install -e ../element-zstack && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", + "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${VOLUME_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", "hostRequirements": { "cpus": 8, "memory": "16gb", diff --git a/.devcontainer/local-test/docker-compose.yaml b/.devcontainer/local-test/docker-compose.yaml index ffdd0e7..f036388 100644 --- a/.devcontainer/local-test/docker-compose.yaml +++ b/.devcontainer/local-test/docker-compose.yaml @@ -1,8 +1,10 @@ version: "3" services: app: - cpus: 4 - mem_limit: 8g + cpus: 2 + mem_limit: 16g + environment: + - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-zstack-v1 build: context: ../.. dockerfile: ./.devcontainer/Dockerfile @@ -10,7 +12,4 @@ services: - fakeservices.datajoint.io:127.0.0.1 volumes: - ../../..:/workspaces:cached - - docker_data:/var/lib/docker # persist docker images - privileged: true # only because of dind -volumes: - docker_data: \ No newline at end of file + privileged: true # only because of dind \ No newline at end of file From 836f40aa00b15f7af098ae92a232de1082a4d0c4 Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Wed, 10 May 2023 16:52:04 +0000 Subject: [PATCH 29/62] Update dev container --- .devcontainer/devcontainer.json | 4 ++-- .devcontainer/docker-compose.yaml | 2 ++ .devcontainer/local-data/devcontainer.json | 2 +- .devcontainer/local-data/docker-compose.yaml | 2 -- .devcontainer/local-test/devcontainer.json | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 20bde70..d7b9b0d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,5 @@ { - "name": "Tutorial with remote data", + "name": "Tutorial environment", "dockerComposeFile": "docker-compose.yaml", "service": "app", "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", @@ -9,7 +9,7 @@ "onCreateCommand": "mkdir -p ${VOLUME_ROOT_DATA_DIR} && pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${VOLUME_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", "hostRequirements": { - "cpus": 8, + "cpus": 2, "memory": "16gb", "storage": "32gb" }, diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index ce313b7..d194e95 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -4,6 +4,8 @@ services: extends: file: ./local-data/docker-compose.yaml service: app + environment: + - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-zstack-v1 devices: - /dev/fuse cap_add: diff --git a/.devcontainer/local-data/devcontainer.json b/.devcontainer/local-data/devcontainer.json index 02acd0a..5cc077e 100644 --- a/.devcontainer/local-data/devcontainer.json +++ b/.devcontainer/local-data/devcontainer.json @@ -1,5 +1,5 @@ { - "name": "Tutorial with local data", + "name": "Dev environment with local data", "dockerComposeFile": "docker-compose.yaml", "service": "app", "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", diff --git a/.devcontainer/local-data/docker-compose.yaml b/.devcontainer/local-data/docker-compose.yaml index 7ec3f1f..80d22c4 100644 --- a/.devcontainer/local-data/docker-compose.yaml +++ b/.devcontainer/local-data/docker-compose.yaml @@ -3,8 +3,6 @@ services: app: cpus: 2 mem_limit: 16g - environment: - - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-zstack-v1 build: context: ../.. dockerfile: ./.devcontainer/Dockerfile diff --git a/.devcontainer/local-test/devcontainer.json b/.devcontainer/local-test/devcontainer.json index d8bbbfc..9725463 100644 --- a/.devcontainer/local-test/devcontainer.json +++ b/.devcontainer/local-test/devcontainer.json @@ -1,5 +1,5 @@ { - "name": "Local test environment with remote data", + "name": "Dev environment for local pytests with remote data", "dockerComposeFile": "docker-compose.yaml", "service": "app", "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", @@ -9,7 +9,7 @@ "onCreateCommand": "pip install -r ./requirements_dev.txt && pip install -e . && pip install -e ../element-zstack && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${VOLUME_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", "hostRequirements": { - "cpus": 8, + "cpus": 2, "memory": "16gb", "storage": "32gb" }, From 27d311cdd3cc1f45937f134ccdf4de1e6178a40f Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Wed, 10 May 2023 14:41:23 -0500 Subject: [PATCH 30/62] Update requirement --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e0c9d23..e90b60c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ element-animal>=0.1.5 element-lab>=0.2.0 element-session>=0.1.2 element-calcium-imaging>=0.5.5 -element-zstack>=0.1.0 +element-zstack @ git+https://github.com/kabilar/element-zstack.git intern>=1.4.1 ipykernel>=6.0.1 From adcfa2f67b66c61b7413a0ea446fe185d3ba76d3 Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Wed, 10 May 2023 14:51:08 -0500 Subject: [PATCH 31/62] Update requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e90b60c..e0c9d23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ element-animal>=0.1.5 element-lab>=0.2.0 element-session>=0.1.2 element-calcium-imaging>=0.5.5 -element-zstack @ git+https://github.com/kabilar/element-zstack.git +element-zstack>=0.1.0 intern>=1.4.1 ipykernel>=6.0.1 From 4d2ce8c70f38fe3c779d9b7fddc2d72aa3b375cd Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Wed, 10 May 2023 21:07:17 -0500 Subject: [PATCH 32/62] Update readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index eeb38b9..64fe1af 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # DataJoint Workflow for ZStack -The DataJoint Workflow for ZStack combines four DataJoint Elements for cell +The DataJoint Workflow for ZStack combines five DataJoint Elements for cell segmentation, volume registration, and cell matching - Elements Lab, -Animal, Session, and ZStack. DataJoint Elements collectively standardize and automate data collection and +Animal, Session, Calcium Imaging, and ZStack. DataJoint Elements collectively standardize and automate data collection and analysis for neuroscience experiments. Each Element is a modular pipeline for data storage and processing with corresponding database tables that can be combined with other Elements to assemble a fully functional pipeline. This repository also provides From 55dd878f720c8e1c65cfe276f40392a1019e2259 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Fri, 12 May 2023 21:50:10 -0500 Subject: [PATCH 33/62] Update notebook + workflow --- notebooks/tutorial.ipynb | 59 ++++++++++++------------------------- workflow_zstack/pipeline.py | 1 - 2 files changed, 19 insertions(+), 41 deletions(-) diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb index a362713..a9ec665 100644 --- a/notebooks/tutorial.ipynb +++ b/notebooks/tutorial.ipynb @@ -565,10 +565,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Finally, we can upload our data to BossDB. The `bossdb` schema contains two\n", - "tables to the upload tasks, and execute the upload. The structure of these\n", - "tables mirrors the `volume` schema.\n", - "\n", "Volumetric data uploaded to BossDB requires information about voxel size. The\n", "DataJoint table `volume.VoxelSize` can be used to insert this information for a\n", "given dataset." @@ -592,12 +588,27 @@ "volume.VoxelSize.insert1(dict(scan_key, width=0.001, height=0.001, depth=0.001))" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "upload_key = dict(\n", + " scan_key,\n", + " paramset_idx=1\n", + ")" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Now, we can define the upload task by naming the collection, experiment,\n", + "Finally, we can upload our data to BossDB. The `bossdb` schema contains two\n", + "tables to upload tasks, and execute the upload.\n", + "\n", + "Define the upload task by naming the collection, experiment,\n", "and channel where the data should be uploaded. " ] }, @@ -630,11 +641,10 @@ "chn_name = \"test1-seg\"\n", "bossdb.VolumeUploadTask.insert1(\n", " dict(\n", - " scan_key,\n", + " upload_key,\n", " collection_name=col_name,\n", " experiment_name=exp_name,\n", " channel_name=chn_name,\n", - " upload_type=\"annotation\",\n", " )\n", ")" ] @@ -654,25 +664,7 @@ "metadata": {}, "outputs": [], "source": [ - "upload_key = (bossdb.VolumeUploadTask & scan_key & \"upload_type = 'image'\").fetch(\"KEY\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bossdb.BossDBURLs.populate(upload_key)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bossdb.BossDBURLs()" + "bossdb.VolumeUpload.populate(upload_key)" ] }, { @@ -690,20 +682,7 @@ "metadata": {}, "outputs": [], "source": [ - "(bossdb.BossDBURLs & scan_key).fetch1(\"neuroglancer_url\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Explain how to get permission from APL to upload data. \n", - "\n", - "Create a schema to automatically generate neuroglancer link and insert into DJ\n", - "table. \n", - "\n", - "Include BossDBUpload in BossDBURLs as a computed/imported table. " + "(bossdb.VolumeUpload.WebAddress & upload_key & \"upload_type='image+annotation'\").fetch1(\"web_address\")" ] } ], diff --git a/workflow_zstack/pipeline.py b/workflow_zstack/pipeline.py index 1d40ee2..575dcdf 100644 --- a/workflow_zstack/pipeline.py +++ b/workflow_zstack/pipeline.py @@ -16,7 +16,6 @@ "lab", "scan", "imaging", - "imaging_report", "session", "subject", "surgery", From 7e75c34394b7641772466324c889264721647a83 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Sat, 13 May 2023 15:06:55 -0500 Subject: [PATCH 34/62] Update notebook + tests --- notebooks/tutorial.ipynb | 246 ++++++++++++++++-------------- tests/test_export.py | 9 +- tests/test_pipeline_generation.py | 6 +- 3 files changed, 145 insertions(+), 116 deletions(-) diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb index a9ec665..4633f39 100644 --- a/notebooks/tutorial.ipynb +++ b/notebooks/tutorial.ipynb @@ -7,35 +7,37 @@ "tags": [] }, "source": [ - "# Process volumetric fluorescent microscopy data with DataJoint Elements\n", + "# Manage volumetric fluorescent microscopy experiments with DataJoint Elements\n", "\n", - "This notebook will walk through processing volumetric two-photon calcium imaging data collected\n", - "from ScanImage and segmented with cellpose. While anyone can work through this\n", - "notebook to process volumetric fluorescent microscopy data through DataJoint's\n", - "`element-zstack` pipeline, for a detailed tutorial about the fundamentals of\n", - "DataJoint including table types, make functions, and querying, please see the\n", - "[DataJoint Tutorial](https://github.com/datajoint/datajoint-tutorials).\n", + "This notebook will walk through processing volumetric two-photon calcium imaging\n", + "data with the DataJoint Workflow for volumetric image processing. The workflow\n", + "currently supports volumetric data collected\n", + "from ScanImage. \n", "\n", "**Please note that uploading data to BossDB via this pipeline requires an API\n", "token which can be obtained by creating an account at\n", "[api.bossdb.io](https://api.bossdb.io). You will also need resource manager\n", "permissions from the team at [BossDB](https://bossdb.org).**\n", "\n", - "The DataJoint Python API and Element ZStack offer a lot of features to\n", - "support collaboration, automation, reproducibility, and visualizations.\n", - "For more information on these topics, please visit our documentation: \n", - " \n", - "- [DataJoint Core](https://datajoint.com/docs/core/): General principles\n", + "We will explain the following concepts as they relate to this workflow:\n", + "- What is an Element versus a Workflow?\n", + "- Plot the workflow with `dj.Diagram`\n", + "- Insert data into tables\n", + "- Query table contents\n", + "- Fetch table contents\n", + "- Run the workflow for your experiments\n", "\n", - "- DataJoint [Python](https://datajoint.com/docs/core/datajoint-python/) and\n", - " [MATLAB](https://datajoint.com/docs/core/datajoint-matlab/) APIs: in-depth reviews of\n", - " specifics\n", + "For detailed documentation and tutorials on general DwataJoint principles that support collaboration, automation, reproducibility, and visualizations:\n", "\n", - "- [DataJoint Element ZStack](https://datajoint.com/docs/elements/element-zstack/):\n", - " A modular pipeline for volumetric calcium imaging data analysis\n", + "- [DataJoint Interactive Tutorials](https://github.com/datajoint/datajoint-tutorials) - Fundamentals including table tiers, query operations, fetch operations, automated computations with the `make` function, etc.\n", "\n", + "- [DataJoint Core - Documentation](https://datajoint.com/docs/core/) - Relational data model principles\n", "\n", - "Let's start by importing the packages necessary to run this tutorial." + "- [DataJoint API for Python - Documentation](https://datajoint.com/docs/core/datajoint-python/)\n", + "\n", + "- [DataJoint Element for Volumetric Calcium Imaging - Documentation](https://datajoint.com/docs/elements/element-zstack/)\n", + "\n", + "Let's start by importing the packages necessary to run this workflow." ] }, { @@ -45,8 +47,7 @@ "outputs": [], "source": [ "import datajoint as dj\n", - "import datetime\n", - "import numpy as np" + "import datetime" ] }, { @@ -54,45 +55,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### The Basics:\n", - "\n", - "Any DataJoint workflow can be broken down into basic 3 parts:\n", + "## The DataJoint Workflow for volumetric Calcium Imaging is assembled from 5 DataJoint Elements\n", "\n", - "- `Insert`\n", - "- `Populate` (or process)\n", - "- `Query`\n", + "| Element | Source Code | Documentation | Description |\n", + "| -- | -- | -- | -- |\n", + "| Element Lab | [Link](https://github.com/datajoint/element-lab) | [Link](https://datajoint.com/docs/elements/element-lab) | Lab management related information, such as Lab, User, Project, Protocol, Source. |\n", + "| Element Animal | [Link](https://github.com/datajoint/element-animal) | [Link](https://datajoint.com/docs/elements/element-animal) | General animal metadata and surgery information. |\n", + "| Element Session | [Link](https://github.com/datajoint/element-session) | [Link](https://datajoint.com/docs/elements/element-session) | General information of experimental sessions. |\n", + "| Element Calcium Imaging | [Link](https://github.com/datajoint/element-calcium-imaging) | [Link](https://datajoint.com/docs/elements/element-calcium-imaging) | General information about the calcium imaging scan. |\n", + "| Element zstack | [Link](https://github.com/datajoint/element-zstack) | [Link](https://datajoint.com/docs/elements/element-zstack) | Volumetric data segmentation and export. |\n", "\n", - "In this demo we will:\n", - "- `Insert` metadata about an animal subject, recording session, and \n", - " parameters related to processing calcium imaging data through Suite2p.\n", - "- `Populate` tables with outputs of image processing including motion correction,\n", - " segmentation, mask classification, fluorescence traces and deconvolved activity traces.\n", - "- `Query` the processed data from the database and plot calcium activity traces.\n", + "Each workflow is composed of multiple Elements. Each Element contains 1 or more modules, and each module declares its own schema in the database.\n", "\n", - "Each of these topics will be explained thoroughly in this notebook." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Workflow diagram\n", + "The Elements are imported within the `workflow_zstack.pipeline` script.\n", "\n", - "This workflow is assembled from 5 DataJoint elements:\n", - "+ [element-lab](https://github.com/datajoint/element-lab)\n", - "+ [element-animal](https://github.com/datajoint/element-animal)\n", - "+ [element-session](https://github.com/datajoint/element-session)\n", - "+ [element-calcium-imaging](https://github.com/datajoint/element-calcium-imaging)\n", - "+ [element-zstack](https://github.com/datajoint/element-zstack)\n", + "By importing the modules for the first time, the schemas and tables will be created in the database. Once created, importing modules will not create schemas and tables again, but the existing schemas/tables can be accessed.\n", "\n", - "Each element declares its own schema in the database. These schemas can be imported like\n", - "any other Python package. This workflow is composed of schemas from each of the Elements\n", - "above and correspond to a module within `workflow_zstack.pipeline`.\n", + "The schema diagram (shown below) is a good reference for understanding the order of the tables within the workflow.\n", "\n", - "The schema diagram is a good reference for understanding the order of the tables\n", - "within the workflow, as well as the corresponding table type.\n", - "Let's activate the elements and view the schema diagram." + "Let's activate the Elements." ] }, { @@ -112,6 +93,16 @@ ")" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Diagram\n", + "\n", + "We can plot the diagram of tables within multiple schemas and their dependencies using `dj.Diagram()`. For details, see the [documentation](https://datajoint.com/docs/core/concepts/getting-started/diagrams/)." + ] + }, { "cell_type": "code", "execution_count": null, @@ -133,38 +124,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Diagram Breakdown\n", + "While the diagram above seems complex at first, it becomes more clear when it's approached as a hierarchy of tables that define the order in which the workflow expects to receive data in each of its tables.\n", "\n", - "While the diagram above seems complex at first, it becomes more clear when it's\n", - "approached as a hierarchy of tables that **define the order** in which the\n", - "workflow **expects to receive data** in each of its tables. \n", + "The tables higher up in the diagram such as `subject.Subject()` should be the first to receive data.\n", "\n", - "- Tables with a green, or rectangular shape expect to receive data manually using the\n", - "`insert()` function. \n", - "- The tables higher up in the diagram such as `subject.Subject()`\n", - "should be the first to receive data. This ensures data integrity by preventing orphaned\n", - "data within DataJoint schemas. \n", - "- Tables with a purple oval or red circle can be automatically filled with relevant data\n", - " by calling `populate()`. For example `volume.Segmentation` and its part-table\n", - " `volume.Segmentation.Mask` are both populated with `volume.Segmentation.populate()`.\n", - "- Tables connected by a solid line depend on attributes (entries) in the table\n", - " above it.\n", + "Data is manually entered into the green, rectangular tables with the `insert1()` method.\n", "\n", - "#### Table Types\n", + "Tables connected by a solid line depend on entries from the table above it.\n", "\n", - "There are 5 table types in DataJoint. Each of these appear in the diagram above.\n", + "There are 5 table tiers in DataJoint. Some of these tables appear in the diagram above.\n", "\n", - "- **Manual table**: green box, manually inserted table, expect new entries daily, e.g. `Subject`, `Scan`. \n", - "- **Lookup table**: gray box, pre inserted table, commonly used for general facts or parameters. e.g. `bossdb.UploadParamset`, `volume.SegmentationParamset`. \n", - "- **Imported table**: blue oval, auto-processing table, the processing depends\n", - " on the importing of external files. e.g. process of obtaining the `Volume` data requires\n", - " raw data stored outside the database. \n", - "- **Computed table**: red circle, auto-processing table, the processing does not\n", - " depend on files external to the database, commonly used for computations such\n", - " as `volume.Segmentation`, `volume_match.VolumeMatch`. \n", - "- **Part table**: plain text, as an appendix to the master table, all the part\n", - " entries of a given master entry represent a intact set of the master entry.\n", - " e.g. Masks of `Segmentation`." + "| Table tier | Color and shape | Description |\n", + "| -- | -- | -- |\n", + "| Manual table | Green box | Data entered from outside the pipeline, either by hand or with external helper scripts. |\n", + "| Lookup table | Gray box | Small tables containing general facts and settings of the data pipeline; not specific to any experiment or dataset. | \n", + "| Imported table | Blue oval | Data ingested automatically inside the pipeline but requiring access to data outside the pipeline. |\n", + "| Computed table | Red circle | Data computed automatically entirely inside the pipeline. |\n", + "| Part table | Plain text | Part tables share the same tier as their master table. |" ] }, { @@ -172,16 +148,47 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Starting the workflow: Insert\n", + "## Insert entries into manual tables\n", "\n", - "### Insert entries into manual tables\n", + "In this section, we will insert metadata about an animal subject, experiment session, and optogenetic stimulation parameters.\n", "\n", - "To view details about a table's dependencies and attributes, use functions `.describe()`\n", - "and `.heading`, respectively.\n", + "Let's start with the first schema and table in the schema diagram (i.e. `subject.Subject` table).\n", "\n", - "Let's start with the first table in the schema diagram (the `subject` table) and view\n", - "the table attributes we need to insert. There are two ways you can do this: *run each\n", - "of the two cells below*" + "Each module (e.g. `subject`) contains a schema object that enables interaction with the schema in the database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "subject.schema" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The table classes in the module correspond to a table in the database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "subject.Subject()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can view the table dependencies and the attributes we need to insert by using the functions `.describe()` and `.heading`. The `describe()` function displays the table definition with foreign key references and the `heading` function displays the attributes of the table definition. These are particularly useful functions if you are new to DataJoint Elements and are unsure of the attributes required for each table." ] }, { @@ -202,6 +209,14 @@ "subject.Subject.heading" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will insert data into the `subject.Subject` table. " + ] + }, { "cell_type": "code", "execution_count": null, @@ -220,10 +235,11 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Let's repeat the steps above for the `Session` table and see how the output varies between `.describe` and `.heading`. " + "Let's continue inserting in the other manual tables. The `Session` table is next." ] }, { @@ -250,15 +266,11 @@ "metadata": {}, "source": [ "The cells above show the dependencies and attributes for the `session.Session` table.\n", - "Notice that `describe` shows the dependencies of the table on upstream tables. The\n", - "`Session` table depends on the upstream `Subject` table. \n", "\n", - "Whereas `heading` lists all the attributes of the `Session` table, regardless of\n", - "whether they are declared in an upstream table. \n", + "Notice that `describe` shows the dependencies of the table on upstream tables (i.e. foreign key references). The `Session` table depends on the upstream `Subject` table. \n", "\n", - "Here we will demonstrate a very useful way of inserting data by assigning the dictionary\n", - "to a variable `session_key`. This variable can be used to insert entries into tables that\n", - "contain the `Session` table as one of its attributes." + "Whereas `heading` lists all the attributes of the `Session` table, regardless of\n", + "whether they are declared in an upstream table." ] }, { @@ -378,7 +390,7 @@ "\n", "`volume.Volume` is the first table in the pipeline that can be populated automatically.\n", "If a table contains a part table, this part table is also populated during the\n", - "`populate()` call. `populate()` takes several arguments including the a session\n", + "`populate()` call. `populate()` takes several arguments including a session\n", "key. This key restricts `populate()` to performing the operation on the session\n", "of interest rather than all possible sessions which could be a time-intensive\n", "process for databases with lots of entries.\n", @@ -481,8 +493,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now that we've inserted cellpose parameters into the `SegmentationParamSet` table,\n", - "we're almost ready to run image processing. DataJoint uses a `SegmentationTask` table to\n", + "DataJoint uses a `SegmentationTask` table to\n", "manage which `Volume` and `SegmentationParamSet` should be used during processing. \n", "\n", "This table is important for defining several important aspects of\n", @@ -588,16 +599,23 @@ "volume.VoxelSize.insert1(dict(scan_key, width=0.001, height=0.001, depth=0.001))" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's define an `upload_key` to easily upload the volume to BossDB via this\n", + "workflow. The `upload_key` combines information about the current scan from\n", + "`scan.Scan` and the `paramset_idx` from `SegmentationParamSet`." + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "upload_key = dict(\n", - " scan_key,\n", - " paramset_idx=1\n", - ")" + "upload_key = dict(scan_key, paramset_idx=1)" ] }, { @@ -605,10 +623,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Finally, we can upload our data to BossDB. The `bossdb` schema contains two\n", - "tables to upload tasks, and execute the upload.\n", + "Finally, we can upload the volume and its corresponding segmentation data to\n", + "BossDB and generate a neuroglancer link to visualize the data ..\n", "\n", - "Define the upload task by naming the collection, experiment,\n", + "The first table is `VolumeUploadTask`. Let's define the upload task by naming the collection, experiment,\n", "and channel where the data should be uploaded. " ] }, @@ -650,12 +668,16 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "attachments": {}, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "bossdb.VolumeUploadTask()" + "Finally, we can upload data. \n", + "\n", + "As a reminder, uploading data to BossDB via this pipeline requires an API\n", + "token which can be obtained by creating an account at\n", + "[api.bossdb.io](https://api.bossdb.io). You will also need resource manager\n", + "permissions from the team at [BossDB](https://bossdb.org)." ] }, { @@ -682,7 +704,9 @@ "metadata": {}, "outputs": [], "source": [ - "(bossdb.VolumeUpload.WebAddress & upload_key & \"upload_type='image+annotation'\").fetch1(\"web_address\")" + "(bossdb.VolumeUpload.WebAddress & upload_key & \"upload_type='image+annotation'\").fetch1(\n", + " \"web_address\"\n", + ")" ] } ], @@ -705,7 +729,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.10" }, "vscode": { "interpreter": { diff --git a/tests/test_export.py b/tests/test_export.py index 3112180..dd3cae7 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -3,18 +3,21 @@ def test_export(pipeline): bossdb = pipeline["bossdb"] scan_key = (scan.Scan & "subject = 'subject1'").fetch1("KEY") + upload_key = dict( + scan_key, + paramset_idx=1, + ) col_name = "dataJointTestUpload" exp_name = "CaImagingFinal" chn_name = "test1" bossdb.VolumeUploadTask.insert1( dict( - scan_key, + upload_key, collection_name=col_name, experiment_name=exp_name, channel_name=chn_name, - upload_type="image", ), skip_duplicates=True ) - bossdb.BossDBURLs.populate(scan_key) \ No newline at end of file + bossdb.VolumeUpload.populate(upload_key) \ No newline at end of file diff --git a/tests/test_pipeline_generation.py b/tests/test_pipeline_generation.py index a81d53b..6f50420 100644 --- a/tests/test_pipeline_generation.py +++ b/tests/test_pipeline_generation.py @@ -16,11 +16,13 @@ def test_generate_pipeline(pipeline): assert all( [ - bossdb.VolumeUploadTask.full_table_name in bossdb.BossDBURLs.parents(), - volume.Volume.full_table_name in bossdb.VolumeUploadTask.parents(), + bossdb.VolumeUploadTask.full_table_name in bossdb.VolumeUpload.parents(), + volume.Segmentation.full_table_name in bossdb.VolumeUploadTask.parents(), ] ) assert "confidence" in ( volume_matching.VolumeMatch.VolumeMask.heading.secondary_attributes ) + + assert "web_adress" in (bossdb.VolumeUpload.WebAddress.heading.secondary_attributes) From 3ae484eaf26ffdc0ffdf1984b983c92cc54f47e6 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Mon, 15 May 2023 14:53:28 -0500 Subject: [PATCH 35/62] Apply suggestions from code review Co-authored-by: Kabilar Gunalan --- README.md | 4 ++-- tests/conftest.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 64fe1af..32cb857 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# DataJoint Workflow for ZStack +# DataJoint Workflow for ZStack Imaging -The DataJoint Workflow for ZStack combines five DataJoint Elements for cell +The DataJoint Workflow for ZStack Imaging combines five DataJoint Elements for cell segmentation, volume registration, and cell matching - Elements Lab, Animal, Session, Calcium Imaging, and ZStack. DataJoint Elements collectively standardize and automate data collection and analysis for neuroscience experiments. Each Element is a modular pipeline for data diff --git a/tests/conftest.py b/tests/conftest.py index 0b3e7ff..f8610d8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,7 +18,7 @@ pathlib.Path("../example_data").mkdir(exist_ok=True) sessions_dirs = [ - "sub1", + "subject1", ] From 2836ab6f3590531b459e1b3d7d5f561125945aff Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Mon, 15 May 2023 14:56:02 -0500 Subject: [PATCH 36/62] Remove `dj_config` from pytests --- tests/conftest.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0b3e7ff..a20aeb4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -81,34 +81,6 @@ def null_function(*args, **kwargs): # ---------------------- FIXTURES ---------------------- -@pytest.fixture(autouse=True, scope="session") -def dj_config(setup): - """If dj_local_config exists, load""" - if pathlib.Path("./dj_local_conf.json").exists(): - dj.config.load("./dj_local_conf.json") - dj.config.update( - { - "safemode": False, - "database.host": os.environ.get("DJ_HOST") or dj.config["database.host"], - "database.password": os.environ.get("DJ_PASS") - or dj.config["database.password"], - "database.user": os.environ.get("DJ_USER") or dj.config["database.user"], - "custom": { - "database.prefix": ( - os.environ.get("DATABASE_PREFIX") - or dj.config["custom"]["database.prefix"] - ), - "volume_root_data_dir": ( - os.environ.get("VOLUME_ROOT_DATA_DIR").split(",") - if os.environ.get("VOLUME_ROOT_DATA_DIR") - else dj.config["custom"]["volume_root_data_dir"] - ), - }, - } - ) - return - - @pytest.fixture(scope="session") def test_data(dj_config): test_data_exists = True From 758a11604a1e9dab76b7478b187b90b3e4ba7675 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Mon, 15 May 2023 15:29:20 -0500 Subject: [PATCH 37/62] Apply suggestions from code review Co-authored-by: Kabilar Gunalan --- tests/conftest.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 46a559a..f80648b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,42 +15,12 @@ logger = logging.getLogger("datajoint") -pathlib.Path("../example_data").mkdir(exist_ok=True) sessions_dirs = [ "subject1", ] -def pytest_addoption(parser): - """ - Permit constants when calling pytest at command line e.g., pytest --dj-verbose False - - Arguments: - --dj-verbose (bool): Default True. Pass print statements from Elements. - --dj-teardown (bool): Default True. Delete pipeline on close. - --dj-datadir (str): Default ./tests/user_data. Relative path of test CSV data. - """ - parser.addoption( - "--dj-verbose", - action="store", - default="True", - help="Verbose for dj items: True or False", - choices=("True", "False"), - ) - parser.addoption( - "--dj-teardown", - action="store", - default="True", - help="Verbose for dj items: True or False", - choices=("True", "False"), - ) - parser.addoption( - "--dj-datadir", - action="store", - default="./tests/user_data", - help="Relative path for saving tests data", - ) @pytest.fixture(autouse=True, scope="session") From 5fe35dce5a440cd0a5555aabd3e7f4649a46420a Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Mon, 15 May 2023 15:30:47 -0500 Subject: [PATCH 38/62] Add BossDB API info to `test_export` --- tests/test_export.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_export.py b/tests/test_export.py index dd3cae7..b26c8c7 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -1,4 +1,11 @@ def test_export(pipeline): + """Test export to BossDB. + + Please note that uploading data to BossDB via this pipeline requires an API + token which can be obtained by creating an account at + https://api.bossdb.io. You will also need resource manager permissions from the team at https://bossdb.org. + """ + scan = pipeline["scan"] bossdb = pipeline["bossdb"] From fb43b49d908968c1dcfc8065d7d090159729def4 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Tue, 16 May 2023 10:19:35 -0500 Subject: [PATCH 39/62] Apply suggestions from code review Co-authored-by: Kabilar Gunalan --- tests/conftest.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index f80648b..c95db87 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,22 +23,11 @@ -@pytest.fixture(autouse=True, scope="session") -def setup(request): - """Take passed commandline variables, set as global""" - global verbose, _tear_down, test_user_data_dir, verbose_context - - verbose = value_to_bool(request.config.getoption("--dj-verbose")) - _tear_down = value_to_bool(request.config.getoption("--dj-teardown")) - test_user_data_dir = Path(request.config.getoption("--dj-datadir")) - test_user_data_dir.mkdir(exist_ok=True) - if not verbose: logging.getLogger("datajoint").setLevel(logging.CRITICAL) verbose_context = nullcontext() if verbose else QuietStdOut() - yield verbose_context, verbose # -------------------- HELPER CLASS -------------------- From c9c44c617dad5f5910364ba313029f9f3bbe3f49 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Tue, 16 May 2023 10:36:04 -0500 Subject: [PATCH 40/62] Remove old functions from conftest.py --- tests/conftest.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index c95db87..ce2f68e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,23 +20,6 @@ "subject1", ] - - - - if not verbose: - logging.getLogger("datajoint").setLevel(logging.CRITICAL) - - verbose_context = nullcontext() if verbose else QuietStdOut() - - - -# -------------------- HELPER CLASS -------------------- - - -def null_function(*args, **kwargs): - pass - - # ---------------------- FIXTURES ---------------------- @@ -66,14 +49,6 @@ def pipeline(): "bossdb": pipeline.bossdb, } - if _tear_down: - with verbose_context: - pipeline.subject.Subject.delete() - - -@pytest.fixture(scope="session") -def testdata_paths(): - return {"test1_stitched": "sub1"} @pytest.fixture(scope="session") def insert_upstream(pipeline): From ae643b6fd85eef369d96e59c5e1a3a72f2b045e6 Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Tue, 16 May 2023 12:26:19 -0500 Subject: [PATCH 41/62] Update requirements --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e0c9d23..c69e8fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ element-animal>=0.1.5 element-lab>=0.2.0 element-session>=0.1.2 element-calcium-imaging>=0.5.5 -element-zstack>=0.1.0 +element-zstack @ git+https://github.com/kabilar/element-zstack intern>=1.4.1 ipykernel>=6.0.1 From 5f9ec99d985950d210ae1914ff90a20dd529c22c Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Tue, 16 May 2023 12:27:24 -0500 Subject: [PATCH 42/62] Update requirements for Dev Container --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e0c9d23..05b012c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ element-animal>=0.1.5 element-lab>=0.2.0 element-session>=0.1.2 element-calcium-imaging>=0.5.5 -element-zstack>=0.1.0 +element-zstack @ git+https://github.com/kushalbakshi/element-zstack intern>=1.4.1 ipykernel>=6.0.1 From adae8bd52a3ade9d116148a19132a6a62886c5a4 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Tue, 16 May 2023 13:34:35 -0500 Subject: [PATCH 43/62] Update S3 location --- .devcontainer/docker-compose.yaml | 2 +- .devcontainer/local-test/docker-compose.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml index d194e95..e8107cd 100644 --- a/.devcontainer/docker-compose.yaml +++ b/.devcontainer/docker-compose.yaml @@ -5,7 +5,7 @@ services: file: ./local-data/docker-compose.yaml service: app environment: - - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-zstack-v1 + - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-calcium-imaging-data-v2 devices: - /dev/fuse cap_add: diff --git a/.devcontainer/local-test/docker-compose.yaml b/.devcontainer/local-test/docker-compose.yaml index f036388..add45d0 100644 --- a/.devcontainer/local-test/docker-compose.yaml +++ b/.devcontainer/local-test/docker-compose.yaml @@ -4,7 +4,7 @@ services: cpus: 2 mem_limit: 16g environment: - - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-zstack-v1 + - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-calcium-imaging-data-v2 build: context: ../.. dockerfile: ./.devcontainer/Dockerfile From 8e54e59e0238e36599fcb93b6571a3e84edd4ec3 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Tue, 16 May 2023 14:22:54 -0500 Subject: [PATCH 44/62] Update __init__.py to get volume_root_data_dir --- workflow_zstack/__init__.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/workflow_zstack/__init__.py b/workflow_zstack/__init__.py index 111bd2f..8d01cc8 100644 --- a/workflow_zstack/__init__.py +++ b/workflow_zstack/__init__.py @@ -1,6 +1,17 @@ +import os import datajoint as dj if "custom" not in dj.config: dj.config["custom"] = {} -db_prefix = dj.config["custom"].get("database.prefix", "") +# overwrite dj.config['custom'] values with environment variables if available + +dj.config["custom"]["database.prefix"] = os.getenv( + "DATABASE_PREFIX", dj.config["custom"].get("database.prefix", "") +) + +dj.config["custom"]["volume_root_data_dir"] = os.getenv( + "VOLUME_ROOT_DATA_DIR", dj.config["custom"].get("volume_root_data_dir", "") +) + +db_prefix = dj.config["custom"].get("database.prefix", "") \ No newline at end of file From ab844202f3bf3a565a57584cdb6a686a77e6fc0b Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Tue, 16 May 2023 14:47:22 -0500 Subject: [PATCH 45/62] Update `paths.py` to work with Codespaces --- workflow_zstack/paths.py | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/workflow_zstack/paths.py b/workflow_zstack/paths.py index 40cab54..73f8897 100644 --- a/workflow_zstack/paths.py +++ b/workflow_zstack/paths.py @@ -6,33 +6,8 @@ from element_session import session_with_id as session -def get_session_directory(session_key: dict) -> str: - """Return relative path from SessionDirectory table given key - - Args: - session_key (dict): Key uniquely identifying a session - - Returns: - path (str): Relative path of session directory - """ - from .pipeline import session - - # NOTE: fetch (vs. fetch1) permits dir to not exist, may be the case when saving - # slices directly from from BossDB into inferred dir based on BossDB structure - session_dir = (session.SessionDirectory & session_key).fetch("session_dir") - - if len(session_dir) > 1: - raise ValueError( - f"Found >1 directory for this key:\n\t{session_key}\n\t{session_dir}" - ) - elif len(session_dir) == 1: - return session_dir[0] - else: - return None - - def get_volume_root_data_dir() -> List[str]: - """Return root directory for ephys from 'vol_root_data_dir' in dj.config + """Return root directory for volumetric data in dj.config Returns: path (any): List of path(s) if available or None @@ -43,7 +18,7 @@ def get_volume_root_data_dir() -> List[str]: elif not isinstance(vol_root_dirs, Sequence): return list(vol_root_dirs) else: - return pathlib.Path(vol_root_dirs[0]) + return pathlib.Path(vol_root_dirs) def _find_files_by_type(scan_key, filetype: str): @@ -68,7 +43,7 @@ def get_volume_tif_file(scan_key): FileNotFoundError: If the session directory or tiff files are not found. """ # Folder structure: root / subject / session / .tif (raw) - sess_dir, tiff_filepaths = _find_files_by_type(scan_key, "*_stitched.tif") + sess_dir, tiff_filepaths = _find_files_by_type(scan_key, "*.tif") if tiff_filepaths: return tiff_filepaths else: From 41beccfb2f14f9bb5a40a9958f060c1b971a0881 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Tue, 16 May 2023 20:08:45 +0000 Subject: [PATCH 46/62] Update `paths.py` and `pipeline.py` to fix workflow errors --- workflow_zstack/paths.py | 2 +- workflow_zstack/pipeline.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/workflow_zstack/paths.py b/workflow_zstack/paths.py index 73f8897..504cc61 100644 --- a/workflow_zstack/paths.py +++ b/workflow_zstack/paths.py @@ -27,7 +27,7 @@ def _find_files_by_type(scan_key, filetype: str): get_volume_root_data_dir(), pathlib.Path((session.SessionDirectory & scan_key).fetch1("session_dir")), ) - return sess_dir, [fp.as_posix() for fp in sess_dir.rglob(filetype)] + return sess_dir, [fp.as_posix() for fp in sess_dir.rglob(filetype)][0] def get_volume_tif_file(scan_key): diff --git a/workflow_zstack/pipeline.py b/workflow_zstack/pipeline.py index 575dcdf..a7a4aac 100644 --- a/workflow_zstack/pipeline.py +++ b/workflow_zstack/pipeline.py @@ -8,7 +8,7 @@ from element_zstack import volume, volume_matching, bossdb from . import db_prefix -from .paths import get_session_directory, get_volume_root_data_dir, get_volume_tif_file +from .paths import get_volume_root_data_dir, get_volume_tif_file from .reference import Device __all__ = [ @@ -23,7 +23,6 @@ "volume_matching", "bossdb", "Device", - "get_session_directory", "get_volume_root_data_dir", "get_volume_tif_file", ] From 9d89473f200ad1237c12e7a093edb6b0efabd124 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Tue, 16 May 2023 15:23:26 -0500 Subject: [PATCH 47/62] Update workflow_zstack/paths.py Co-authored-by: Kabilar Gunalan --- workflow_zstack/paths.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/workflow_zstack/paths.py b/workflow_zstack/paths.py index 504cc61..f2ef680 100644 --- a/workflow_zstack/paths.py +++ b/workflow_zstack/paths.py @@ -21,15 +21,6 @@ def get_volume_root_data_dir() -> List[str]: return pathlib.Path(vol_root_dirs) -def _find_files_by_type(scan_key, filetype: str): - """Uses roots + relative SessionDirectory, returns list of files with filetype""" - sess_dir = find_full_path( - get_volume_root_data_dir(), - pathlib.Path((session.SessionDirectory & scan_key).fetch1("session_dir")), - ) - return sess_dir, [fp.as_posix() for fp in sess_dir.rglob(filetype)][0] - - def get_volume_tif_file(scan_key): """Retrieve the list of ScanImage files associated with a given Scan. @@ -40,10 +31,16 @@ def get_volume_tif_file(scan_key): path (list): Absolute path(s) of the scan files. Raises: - FileNotFoundError: If the session directory or tiff files are not found. + FileNotFoundError: If the tiff file(s) are not found. """ - # Folder structure: root / subject / session / .tif (raw) - sess_dir, tiff_filepaths = _find_files_by_type(scan_key, "*.tif") + # Folder structure: root / subject / session / .tif (raw) + sess_dir = find_full_path( + get_volume_root_data_dir(), + pathlib.Path((session.SessionDirectory & scan_key).fetch1("session_dir")), + ) + + tiff_filepaths = [fp.as_posix() for fp in sess_dir.rglob("*.tif")][0] + if tiff_filepaths: return tiff_filepaths else: From cda192ffc66cccbea91be280d7f140dc474c3d4c Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Tue, 16 May 2023 16:05:07 -0500 Subject: [PATCH 48/62] Lost transaction error: Attempted fix 1 --- docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 0a2c1fc..f9765c9 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -11,5 +11,5 @@ services: healthcheck: test: [ "CMD", "mysqladmin", "ping", "-h", "localhost" ] timeout: 15s - retries: 10 + retries: 100 interval: 15s \ No newline at end of file From e8368c4941115631b3c387ccee74c445a218d0e8 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Tue, 16 May 2023 16:05:47 -0500 Subject: [PATCH 49/62] Update session directory in notebook --- notebooks/tutorial.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb index 4633f39..6579f9b 100644 --- a/notebooks/tutorial.ipynb +++ b/notebooks/tutorial.ipynb @@ -328,7 +328,7 @@ "outputs": [], "source": [ "session.SessionDirectory.insert1(\n", - " dict(session_key, session_dir=\"sub1\"),\n", + " dict(session_key, session_dir=\"subject1/session1\"),\n", " skip_duplicates=True,\n", ")\n", "session.SessionDirectory()" From b35d7cbf7bb8256508efe232ea7c6b115cc6d86b Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Tue, 16 May 2023 16:52:15 -0500 Subject: [PATCH 50/62] Apply suggestions from code review Co-authored-by: Kabilar Gunalan --- tests/conftest.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index ce2f68e..b330381 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,12 +1,11 @@ import logging import os import pathlib -from contextlib import nullcontext from pathlib import Path import datajoint as dj import pytest -from element_interface.utils import QuietStdOut, find_full_path, value_to_bool +from element_interface.utils import find_full_path from workflow_zstack.paths import get_volume_root_data_dir @@ -24,15 +23,14 @@ @pytest.fixture(scope="session") -def test_data(dj_config): +def test_data(): test_data_exists = True for p in sessions_dirs: try: find_full_path(get_volume_root_data_dir, p).as_posix() - except FileNotFoundError: - test_data_exists = False - break + except FileNotFoundError as e: + print(e) @pytest.fixture(autouse=True, scope="session") @@ -81,7 +79,7 @@ def insert_upstream(pipeline): ) session.SessionDirectory.insert1( - dict(session_key, session_dir="sub1"), + dict(session_key, session_dir="subject1/session1"), skip_duplicates=True, ) scan.Scan.insert1( From aeec2f84c62ff37367bf176e24c9e14aa74245da Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Tue, 16 May 2023 17:13:26 -0500 Subject: [PATCH 51/62] Update tests/conftest.py Co-authored-by: Kabilar Gunalan --- tests/conftest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index b330381..99f3fe5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,7 +24,6 @@ @pytest.fixture(scope="session") def test_data(): - test_data_exists = True for p in sessions_dirs: try: From 4796a9db872f5a1f8287b7c989a8ddae77941840 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Wed, 17 May 2023 14:41:37 +0000 Subject: [PATCH 52/62] Add assertion when returning tif files --- workflow_zstack/paths.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/workflow_zstack/paths.py b/workflow_zstack/paths.py index f2ef680..e53f4af 100644 --- a/workflow_zstack/paths.py +++ b/workflow_zstack/paths.py @@ -39,9 +39,10 @@ def get_volume_tif_file(scan_key): pathlib.Path((session.SessionDirectory & scan_key).fetch1("session_dir")), ) - tiff_filepaths = [fp.as_posix() for fp in sess_dir.rglob("*.tif")][0] + tiff_filepaths = [fp.as_posix() for fp in sess_dir.rglob("*.tif")] if tiff_filepaths: - return tiff_filepaths + assert len(tiff_filepaths) == 1, "More than 1 `.tif` file in file path. Please ensure the session directory contains only 1 image file." + return tiff_filepaths[0] else: raise FileNotFoundError(f"No tiff file found in {sess_dir}") From eb3bd43da474db86c03965d5663b1d5f51982004 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Wed, 17 May 2023 15:04:35 +0000 Subject: [PATCH 53/62] Update pytest options --- pyproject.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4298ca6..24c287d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.pytest.ini_options] minversion = "6.0" -addopts = "--capture=tee-sys -p no:warnings --dj-teardown False --dj-verbose True --sw --cov=element_zstack --cov-report term-missing" +addopts = "--capture=tee-sys -p no:warnings --sw" # Verbosity: -v for pytest more verbose # Warnings: -p no:warnings to disable # Stepwise: --sw to restart pytest at last failure point @@ -8,8 +8,6 @@ addopts = "--capture=tee-sys -p no:warnings --dj-teardown False --dj-verbose Tru # Capturing output: -s for none, --capture=tee-sys for both stdout and stderr # Coverage: --cov={package} - package for which we're measuring coverage # Coverage report: --cov-report term-missing send report to stdout with line numbers of missed -# Custom teardown: --dj-teardown {True, False} to teardown tables, -# Custom verbose: --dj-verbose {True,False} print out dj info like table inserts testpaths = [ "tests", ] From cf85d234a6cd48fc3e6b6d7703b01ec307ccc2ea Mon Sep 17 00:00:00 2001 From: Kabilar Gunalan Date: Wed, 17 May 2023 18:06:37 +0000 Subject: [PATCH 54/62] Update requirements --- .devcontainer/Dockerfile | 1 - requirements.txt | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 7ea8513..364338f 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -27,7 +27,6 @@ RUN \ COPY ./requirements.txt /tmp/ RUN \ # workflow dependencies - apt-get install gcc ffmpeg libsm6 libxext6 -y && \ pip install --no-cache-dir -r /tmp/requirements.txt && \ # clean up rm /tmp/requirements.txt && \ diff --git a/requirements.txt b/requirements.txt index 05b012c..c69e8fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ element-animal>=0.1.5 element-lab>=0.2.0 element-session>=0.1.2 element-calcium-imaging>=0.5.5 -element-zstack @ git+https://github.com/kushalbakshi/element-zstack +element-zstack @ git+https://github.com/kabilar/element-zstack intern>=1.4.1 ipykernel>=6.0.1 From 6c1c08aecc7d85a77dd78c1950c73301df84c562 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Wed, 17 May 2023 13:27:12 -0500 Subject: [PATCH 55/62] Update workflow_zstack/paths.py Co-authored-by: Kabilar Gunalan --- workflow_zstack/paths.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow_zstack/paths.py b/workflow_zstack/paths.py index e53f4af..9da17a0 100644 --- a/workflow_zstack/paths.py +++ b/workflow_zstack/paths.py @@ -22,13 +22,13 @@ def get_volume_root_data_dir() -> List[str]: def get_volume_tif_file(scan_key): - """Retrieve the list of ScanImage files associated with a given Scan. + """Retrieve the ScanImage file associated with a given Scan. Args: scan_key (dict): Primary key from Scan. Returns: - path (list): Absolute path(s) of the scan files. + path (str): Absolute path of the scan file. Raises: FileNotFoundError: If the tiff file(s) are not found. From 90ecb3446f254ff4be925931a9d8118e203da474 Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Wed, 17 May 2023 19:36:25 +0000 Subject: [PATCH 56/62] Update pytests --- pyproject.toml | 2 +- tests/conftest.py | 80 ++++++++++++++++++++++++++++++- tests/test_export.py | 73 ++++++++++++++++++++++++++-- tests/test_pipeline_generation.py | 2 +- 4 files changed, 149 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 24c287d..8e45a4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.pytest.ini_options] minversion = "6.0" -addopts = "--capture=tee-sys -p no:warnings --sw" +addopts = "--capture=tee-sys -p no:warnings --cov=element_zstack --cov-report term-missing" # Verbosity: -v for pytest more verbose # Warnings: -p no:warnings to disable # Stepwise: --sw to restart pytest at last failure point diff --git a/tests/conftest.py b/tests/conftest.py index 99f3fe5..cabe1de 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -100,4 +100,82 @@ def volume_volume(pipeline): volume.Volume.populate() yield - \ No newline at end of file + + +@pytest.fixture(scope="session") +def volume_segmentation_task(pipeline): + volume = pipeline["volume"] + key = (volume.Volume & "subject='subject1'").fetch1("KEY") + volume.SegmentationParamSet.insert_new_params( + segmentation_method="cellpose", + paramset_idx=1, + params=dict( + diameter=8, + min_size=2, + do_3d=False, + anisotropy=0.5, + model_type="nuclei", + channels=[[0, 0]], + z_axis=0, + skip_duplicates=True, + ), + ) + yield + + +@pytest.fixture(scope="session") +def volume_segmentation_task(pipeline): + volume = pipeline["volume"] + volume.SegmentationTask.insert1(dict( + key, + paramset_idx=1, + )) + yield + + +@pytest.fixture(scope="session") +def volume_segmentation(pipeline): + volume = pipeline["volume"] + key = (volume.Volume & "subject='subject1'").fetch1("KEY") + volume.Segmentation.populate(key) + yield + + +@pytest.fixture(scope="session") +def volume_voxel_size(pipeline): + volume = pipeline["volume"] + key = (volume.Volume & "subject='subject1'").fetch1("KEY") + volume.VoxelSize.insert1( + dict( + key, + width=0.001, + height=0.001, + depth=0.001, + ) + ) + yield + +@pytest.fixture(scope="session") +def bossdb_volume_upload_task(pipeline): + bossdb = pipeline["bossdb"] + volume = pipeline["volume"] + key = (volume.Segmentation & "subject='subject1'").fetch1("KEY") + col_name = "dataJointTestUpload" + exp_name = "CaImagingFinal" + chn_name = "test1" + + bossdb.VolumeUploadTask.insert1( + dict( + key, + collection_name=col_name, + experiment_name=exp_name, + channel_name=chn_name, + ), skip_duplicates=True + ) + yield + +@pytest.fixture(scope="session") +def bossdb_volume_upload(pipeline): + bossdb = pipeline["bossdb"] + bossdb.VolumeUpload.populate() + yield \ No newline at end of file diff --git a/tests/test_export.py b/tests/test_export.py index b26c8c7..3db840c 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -5,22 +5,85 @@ def test_export(pipeline): token which can be obtained by creating an account at https://api.bossdb.io. You will also need resource manager permissions from the team at https://bossdb.org. """ - + import datetime + + subject = pipeline["subject"] + session = pipeline["session"] scan = pipeline["scan"] + volume = pipeline["volume"] bossdb = pipeline["bossdb"] - scan_key = (scan.Scan & "subject = 'subject1'").fetch1("KEY") - upload_key = dict( - scan_key, + subject.Subject.insert1( + dict( + subject="subject1", + sex="M", + subject_birth_date="2023-01-01", + subject_description="Cellpose segmentation of volumetric data."), + skip_duplicates=True, + ) + + session_key = dict( + subject="subject1", + session_id=0, + ) + session.Session.insert1( + dict( + session_key, + session_datetime=datetime.datetime.now(), + ), + skip_duplicates=True, + ) + + session.SessionDirectory.insert1( + dict(session_key, session_dir="subject1/session1"), + skip_duplicates=True, + ) + scan.Scan.insert1( + dict( + session_key, + scan_id=0, + acq_software="ScanImage", + ), + skip_duplicates=True, + ) + volume.Volume.populate() + key = (volume.Volume & "subject='subject1'").fetch1("KEY") + volume.SegmentationParamSet.insert_new_params( + segmentation_method="cellpose", paramset_idx=1, + params=dict( + diameter=8, + min_size=2, + do_3d=False, + anisotropy=0.5, + model_type="nuclei", + channels=[[0, 0]], + z_axis=0, + skip_duplicates=True, + ), ) + volume.SegmentationTask.update1(dict( + key, + paramset_idx=1, + task_mode="load", + )) + segmentation_key = (volume.SegmentationTask & "subject='subject1'").fetch1("KEY") + volume.Segmentation.populate(segmentation_key) + volume.VoxelSize.insert1( + dict( + segmentation_key, + width=0.001, + height=0.001, + depth=0.001, + ) + ) col_name = "dataJointTestUpload" exp_name = "CaImagingFinal" chn_name = "test1" bossdb.VolumeUploadTask.insert1( dict( - upload_key, + segmentation_key, collection_name=col_name, experiment_name=exp_name, channel_name=chn_name, diff --git a/tests/test_pipeline_generation.py b/tests/test_pipeline_generation.py index 6f50420..7bdef77 100644 --- a/tests/test_pipeline_generation.py +++ b/tests/test_pipeline_generation.py @@ -25,4 +25,4 @@ def test_generate_pipeline(pipeline): volume_matching.VolumeMatch.VolumeMask.heading.secondary_attributes ) - assert "web_adress" in (bossdb.VolumeUpload.WebAddress.heading.secondary_attributes) + assert "web_address" in (bossdb.VolumeUpload.WebAddress.heading.secondary_attributes) From 74330f920e09fe52f58880914be7ff7419573d23 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Thu, 18 May 2023 13:22:26 -0500 Subject: [PATCH 57/62] Remove dev container files and update README --- .devcontainer/Dockerfile | 43 -------------------- .devcontainer/devcontainer.json | 30 -------------- .devcontainer/docker-compose.yaml | 16 -------- .devcontainer/local-data/devcontainer.json | 30 -------------- .devcontainer/local-data/docker-compose.yaml | 16 -------- .devcontainer/local-test/devcontainer.json | 30 -------------- .devcontainer/local-test/docker-compose.yaml | 15 ------- README.md | 33 ++++++--------- 8 files changed, 12 insertions(+), 201 deletions(-) delete mode 100644 .devcontainer/Dockerfile delete mode 100644 .devcontainer/devcontainer.json delete mode 100644 .devcontainer/docker-compose.yaml delete mode 100644 .devcontainer/local-data/devcontainer.json delete mode 100644 .devcontainer/local-data/docker-compose.yaml delete mode 100644 .devcontainer/local-test/devcontainer.json delete mode 100644 .devcontainer/local-test/docker-compose.yaml diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 364338f..0000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -FROM python:3.9-slim - -RUN \ - adduser --system --disabled-password --shell /bin/bash vscode && \ - # install docker - apt-get update && \ - apt-get install ca-certificates curl gnupg lsb-release -y && \ - mkdir -m 0755 -p /etc/apt/keyrings && \ - curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \ - echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null && \ - apt-get update && \ - apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y && \ - usermod -aG docker vscode && \ - apt-get clean - -RUN \ - # dev setup - apt update && \ - apt-get install sudo git bash-completion graphviz default-mysql-client s3fs procps -y && \ - usermod -aG sudo vscode && \ - echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \ - pip install --no-cache-dir --upgrade black pip && \ - echo '. /etc/bash_completion' >> /home/vscode/.bashrc && \ - echo 'export PS1="\[\e[32;1m\]\u\[\e[m\]@\[\e[34;1m\]\H\[\e[m\]:\[\e[33;1m\]\w\[\e[m\]$ "' >> /home/vscode/.bashrc && \ - # dircolors -b >> /home/vscode/.bashrc && \ # somehow fix colors - apt-get clean -COPY ./requirements.txt /tmp/ -RUN \ - # workflow dependencies - pip install --no-cache-dir -r /tmp/requirements.txt && \ - # clean up - rm /tmp/requirements.txt && \ - apt-get clean - -ENV DJ_HOST fakeservices.datajoint.io -ENV DJ_USER root -ENV DJ_PASS simple - -ENV VOLUME_ROOT_DATA_DIR /workspaces/workflow-zstack/example_data -ENV DATABASE_PREFIX neuro_ - -USER vscode -CMD bash -c "sudo rm /var/run/docker.pid; sudo dockerd" \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index d7b9b0d..0000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "Tutorial environment", - "dockerComposeFile": "docker-compose.yaml", - "service": "app", - "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", - "remoteEnv": { - "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" - }, - "onCreateCommand": "mkdir -p ${VOLUME_ROOT_DATA_DIR} && pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", - "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${VOLUME_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", - "hostRequirements": { - "cpus": 2, - "memory": "16gb", - "storage": "32gb" - }, - "forwardPorts": [ - 3306 - ], - "customizations": { - "settings": { - "python.pythonPath": "/usr/local/bin/python" - }, - "vscode": { - "extensions": [ - "ms-python.python", - "ms-toolsai.jupyter" - ] - } - } -} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml deleted file mode 100644 index e8107cd..0000000 --- a/.devcontainer/docker-compose.yaml +++ /dev/null @@ -1,16 +0,0 @@ -version: "3" -services: - app: - extends: - file: ./local-data/docker-compose.yaml - service: app - environment: - - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-calcium-imaging-data-v2 - devices: - - /dev/fuse - cap_add: - - SYS_ADMIN - security_opt: - - apparmor:unconfined -volumes: - docker_data: \ No newline at end of file diff --git a/.devcontainer/local-data/devcontainer.json b/.devcontainer/local-data/devcontainer.json deleted file mode 100644 index 5cc077e..0000000 --- a/.devcontainer/local-data/devcontainer.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "Dev environment with local data", - "dockerComposeFile": "docker-compose.yaml", - "service": "app", - "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", - "remoteEnv": { - "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" - }, - "onCreateCommand": "pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", - "postStartCommand": "docker volume prune -f", - "hostRequirements": { - "cpus": 2, - "memory": "16gb", - "storage": "32gb" - }, - "forwardPorts": [ - 3306 - ], - "customizations": { - "settings": { - "python.pythonPath": "/usr/local/bin/python" - }, - "vscode": { - "extensions": [ - "ms-python.python", - "ms-toolsai.jupyter" - ] - } - } -} \ No newline at end of file diff --git a/.devcontainer/local-data/docker-compose.yaml b/.devcontainer/local-data/docker-compose.yaml deleted file mode 100644 index 80d22c4..0000000 --- a/.devcontainer/local-data/docker-compose.yaml +++ /dev/null @@ -1,16 +0,0 @@ -version: "3" -services: - app: - cpus: 2 - mem_limit: 16g - build: - context: ../.. - dockerfile: ./.devcontainer/Dockerfile - extra_hosts: - - fakeservices.datajoint.io:127.0.0.1 - volumes: - - ../../..:/workspaces:cached - - docker_data:/var/lib/docker # persist docker images - privileged: true # only because of dind -volumes: - docker_data: diff --git a/.devcontainer/local-test/devcontainer.json b/.devcontainer/local-test/devcontainer.json deleted file mode 100644 index 9725463..0000000 --- a/.devcontainer/local-test/devcontainer.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "Dev environment for local pytests with remote data", - "dockerComposeFile": "docker-compose.yaml", - "service": "app", - "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", - "remoteEnv": { - "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" - }, - "onCreateCommand": "pip install -r ./requirements_dev.txt && pip install -e . && pip install -e ../element-zstack && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", - "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${VOLUME_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", - "hostRequirements": { - "cpus": 2, - "memory": "16gb", - "storage": "32gb" - }, - "forwardPorts": [ - 3306 - ], - "customizations": { - "settings": { - "python.pythonPath": "/usr/local/bin/python" - }, - "vscode": { - "extensions": [ - "ms-python.python", - "ms-toolsai.jupyter" - ] - } - } -} \ No newline at end of file diff --git a/.devcontainer/local-test/docker-compose.yaml b/.devcontainer/local-test/docker-compose.yaml deleted file mode 100644 index add45d0..0000000 --- a/.devcontainer/local-test/docker-compose.yaml +++ /dev/null @@ -1,15 +0,0 @@ -version: "3" -services: - app: - cpus: 2 - mem_limit: 16g - environment: - - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-calcium-imaging-data-v2 - build: - context: ../.. - dockerfile: ./.devcontainer/Dockerfile - extra_hosts: - - fakeservices.datajoint.io:127.0.0.1 - volumes: - - ../../..:/workspaces:cached - privileged: true # only because of dind \ No newline at end of file diff --git a/README.md b/README.md index 32cb857..74eadb2 100644 --- a/README.md +++ b/README.md @@ -36,29 +36,20 @@ a tutorial environment and notebook to learn the pipeline. + The easiest way to learn about DataJoint Elements is to use the tutorial notebook within the included interactive environment configured using [Dev Container](https://containers.dev/). -### Launch Environment - -Here are some options that provide a great experience: - -- Cloud-based Environment (*recommended*) - - Launch using [GitHub Codespaces](https://github.com/features/codespaces) using the `+` option which will `Create codespace on main` in the codebase repository on your fork with default options. For more control, see the `...` where you may create `New with options...`. - - Build time for a codespace is several minutes. This is done infrequently and cached for convenience. - - Start time for a codespace is less than 1 minute. This will pull the built codespace from cache when you need it. - - *Tip*: Each month, GitHub renews a [free-tier](https://docs.github.com/en/billing/managing-billing-for-github-codespaces/about-billing-for-github-codespaces#monthly-included-storage-and-core-hours-for-personal-accounts) quota of compute and storage. Typically we run into the storage limits before anything else since codespaces consume storage while stopped. It is best to delete Codespaces when not actively in use and recreate when needed. We'll soon be creating prebuilds to avoid larger build times. Once any portion of your quota is reached, you will need to wait for it to be reset at the end of your cycle or add billing info to your GitHub account to handle overages. - - *Tip*: GitHub auto names the codespace but you can rename the codespace so that it is easier to identify later. +## Interactive Tutorial -- Local Environment - - Install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) - - Install [Docker](https://docs.docker.com/get-docker/) - - Install [VSCode](https://code.visualstudio.com/) - - Install the VSCode [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) - - `git clone` the codebase repository and open it in VSCode - - Use the `Dev Containers extension` to `Reopen in Container` (More info is in the `Getting started` included with the extension.) +### Launch Environment -You will know your environment has finished loading once you either see a terminal open related to `Running postStartCommand` with a final message of `Done` or the `README.md` is opened in `Preview`. ++ Local Environment + + Install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) + + Install [VSCode](https://code.visualstudio.com/) + + Install [Conda](https://docs.conda.io/en/latest/miniconda.html) + + Configure a database. See [here](https://tutorials.datajoint.org/setting-up/local-database.html) for details. + + `git clone` the code repository and open it in VSCode + + Install the repository with `pip install -e .` + + Setup a `dj_local_conf.json` with the `database.prefix` and `volume_root_data_dir`. See [User Guide](https://datajoint.com/docs/elements/user-guide/) for details. + + Add your example data to the `volume_root_data_dir`. ### Instructions -1. We recommend you start by navigating to the `notebooks` directory on the left panel and go through the `tutorial.ipynb` Jupyter notebook. Execute the cells in the notebook to begin your walk through of the tutorial. - -1. Once you are done, see the options available to you in the menu in the bottom-left corner. For example, in codespace you will have an option to `Stop Current Codespace` but when running Dev Container on your own machine the equivalent option is `Reopen folder locally`. By default, GitHub will also automatically stop the Codespace after 30 minutes of inactivity. Once the codespace is no longer being used, we recommend deleting the codespace. +1. We recommend you start by navigating to the `notebooks` directory. Execute the cells in the notebooks to begin your walk through of the tutorial. From 41c5a39d79918eb27aaee2aad2f22f811f6ae468 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Thu, 18 May 2023 13:28:33 -0500 Subject: [PATCH 58/62] Revert "Remove dev container files and update README" This reverts commit 74330f920e09fe52f58880914be7ff7419573d23. --- .devcontainer/Dockerfile | 43 ++++++++++++++++++++ .devcontainer/devcontainer.json | 30 ++++++++++++++ .devcontainer/docker-compose.yaml | 16 ++++++++ .devcontainer/local-data/devcontainer.json | 30 ++++++++++++++ .devcontainer/local-data/docker-compose.yaml | 16 ++++++++ .devcontainer/local-test/devcontainer.json | 30 ++++++++++++++ .devcontainer/local-test/docker-compose.yaml | 15 +++++++ README.md | 33 +++++++++------ 8 files changed, 201 insertions(+), 12 deletions(-) create mode 100644 .devcontainer/Dockerfile create mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/docker-compose.yaml create mode 100644 .devcontainer/local-data/devcontainer.json create mode 100644 .devcontainer/local-data/docker-compose.yaml create mode 100644 .devcontainer/local-test/devcontainer.json create mode 100644 .devcontainer/local-test/docker-compose.yaml diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..364338f --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,43 @@ +FROM python:3.9-slim + +RUN \ + adduser --system --disabled-password --shell /bin/bash vscode && \ + # install docker + apt-get update && \ + apt-get install ca-certificates curl gnupg lsb-release -y && \ + mkdir -m 0755 -p /etc/apt/keyrings && \ + curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null && \ + apt-get update && \ + apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y && \ + usermod -aG docker vscode && \ + apt-get clean + +RUN \ + # dev setup + apt update && \ + apt-get install sudo git bash-completion graphviz default-mysql-client s3fs procps -y && \ + usermod -aG sudo vscode && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \ + pip install --no-cache-dir --upgrade black pip && \ + echo '. /etc/bash_completion' >> /home/vscode/.bashrc && \ + echo 'export PS1="\[\e[32;1m\]\u\[\e[m\]@\[\e[34;1m\]\H\[\e[m\]:\[\e[33;1m\]\w\[\e[m\]$ "' >> /home/vscode/.bashrc && \ + # dircolors -b >> /home/vscode/.bashrc && \ # somehow fix colors + apt-get clean +COPY ./requirements.txt /tmp/ +RUN \ + # workflow dependencies + pip install --no-cache-dir -r /tmp/requirements.txt && \ + # clean up + rm /tmp/requirements.txt && \ + apt-get clean + +ENV DJ_HOST fakeservices.datajoint.io +ENV DJ_USER root +ENV DJ_PASS simple + +ENV VOLUME_ROOT_DATA_DIR /workspaces/workflow-zstack/example_data +ENV DATABASE_PREFIX neuro_ + +USER vscode +CMD bash -c "sudo rm /var/run/docker.pid; sudo dockerd" \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..d7b9b0d --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "Tutorial environment", + "dockerComposeFile": "docker-compose.yaml", + "service": "app", + "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + "remoteEnv": { + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + "onCreateCommand": "mkdir -p ${VOLUME_ROOT_DATA_DIR} && pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", + "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${VOLUME_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", + "hostRequirements": { + "cpus": 2, + "memory": "16gb", + "storage": "32gb" + }, + "forwardPorts": [ + 3306 + ], + "customizations": { + "settings": { + "python.pythonPath": "/usr/local/bin/python" + }, + "vscode": { + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml new file mode 100644 index 0000000..e8107cd --- /dev/null +++ b/.devcontainer/docker-compose.yaml @@ -0,0 +1,16 @@ +version: "3" +services: + app: + extends: + file: ./local-data/docker-compose.yaml + service: app + environment: + - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-calcium-imaging-data-v2 + devices: + - /dev/fuse + cap_add: + - SYS_ADMIN + security_opt: + - apparmor:unconfined +volumes: + docker_data: \ No newline at end of file diff --git a/.devcontainer/local-data/devcontainer.json b/.devcontainer/local-data/devcontainer.json new file mode 100644 index 0000000..5cc077e --- /dev/null +++ b/.devcontainer/local-data/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "Dev environment with local data", + "dockerComposeFile": "docker-compose.yaml", + "service": "app", + "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + "remoteEnv": { + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + "onCreateCommand": "pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", + "postStartCommand": "docker volume prune -f", + "hostRequirements": { + "cpus": 2, + "memory": "16gb", + "storage": "32gb" + }, + "forwardPorts": [ + 3306 + ], + "customizations": { + "settings": { + "python.pythonPath": "/usr/local/bin/python" + }, + "vscode": { + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/local-data/docker-compose.yaml b/.devcontainer/local-data/docker-compose.yaml new file mode 100644 index 0000000..80d22c4 --- /dev/null +++ b/.devcontainer/local-data/docker-compose.yaml @@ -0,0 +1,16 @@ +version: "3" +services: + app: + cpus: 2 + mem_limit: 16g + build: + context: ../.. + dockerfile: ./.devcontainer/Dockerfile + extra_hosts: + - fakeservices.datajoint.io:127.0.0.1 + volumes: + - ../../..:/workspaces:cached + - docker_data:/var/lib/docker # persist docker images + privileged: true # only because of dind +volumes: + docker_data: diff --git a/.devcontainer/local-test/devcontainer.json b/.devcontainer/local-test/devcontainer.json new file mode 100644 index 0000000..9725463 --- /dev/null +++ b/.devcontainer/local-test/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "Dev environment for local pytests with remote data", + "dockerComposeFile": "docker-compose.yaml", + "service": "app", + "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", + "remoteEnv": { + "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" + }, + "onCreateCommand": "pip install -r ./requirements_dev.txt && pip install -e . && pip install -e ../element-zstack && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", + "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${VOLUME_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", + "hostRequirements": { + "cpus": 2, + "memory": "16gb", + "storage": "32gb" + }, + "forwardPorts": [ + 3306 + ], + "customizations": { + "settings": { + "python.pythonPath": "/usr/local/bin/python" + }, + "vscode": { + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter" + ] + } + } +} \ No newline at end of file diff --git a/.devcontainer/local-test/docker-compose.yaml b/.devcontainer/local-test/docker-compose.yaml new file mode 100644 index 0000000..add45d0 --- /dev/null +++ b/.devcontainer/local-test/docker-compose.yaml @@ -0,0 +1,15 @@ +version: "3" +services: + app: + cpus: 2 + mem_limit: 16g + environment: + - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-calcium-imaging-data-v2 + build: + context: ../.. + dockerfile: ./.devcontainer/Dockerfile + extra_hosts: + - fakeservices.datajoint.io:127.0.0.1 + volumes: + - ../../..:/workspaces:cached + privileged: true # only because of dind \ No newline at end of file diff --git a/README.md b/README.md index 74eadb2..32cb857 100644 --- a/README.md +++ b/README.md @@ -36,20 +36,29 @@ a tutorial environment and notebook to learn the pipeline. + The easiest way to learn about DataJoint Elements is to use the tutorial notebook within the included interactive environment configured using [Dev Container](https://containers.dev/). -## Interactive Tutorial - ### Launch Environment -+ Local Environment - + Install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) - + Install [VSCode](https://code.visualstudio.com/) - + Install [Conda](https://docs.conda.io/en/latest/miniconda.html) - + Configure a database. See [here](https://tutorials.datajoint.org/setting-up/local-database.html) for details. - + `git clone` the code repository and open it in VSCode - + Install the repository with `pip install -e .` - + Setup a `dj_local_conf.json` with the `database.prefix` and `volume_root_data_dir`. See [User Guide](https://datajoint.com/docs/elements/user-guide/) for details. - + Add your example data to the `volume_root_data_dir`. +Here are some options that provide a great experience: + +- Cloud-based Environment (*recommended*) + - Launch using [GitHub Codespaces](https://github.com/features/codespaces) using the `+` option which will `Create codespace on main` in the codebase repository on your fork with default options. For more control, see the `...` where you may create `New with options...`. + - Build time for a codespace is several minutes. This is done infrequently and cached for convenience. + - Start time for a codespace is less than 1 minute. This will pull the built codespace from cache when you need it. + - *Tip*: Each month, GitHub renews a [free-tier](https://docs.github.com/en/billing/managing-billing-for-github-codespaces/about-billing-for-github-codespaces#monthly-included-storage-and-core-hours-for-personal-accounts) quota of compute and storage. Typically we run into the storage limits before anything else since codespaces consume storage while stopped. It is best to delete Codespaces when not actively in use and recreate when needed. We'll soon be creating prebuilds to avoid larger build times. Once any portion of your quota is reached, you will need to wait for it to be reset at the end of your cycle or add billing info to your GitHub account to handle overages. + - *Tip*: GitHub auto names the codespace but you can rename the codespace so that it is easier to identify later. + +- Local Environment + - Install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) + - Install [Docker](https://docs.docker.com/get-docker/) + - Install [VSCode](https://code.visualstudio.com/) + - Install the VSCode [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) + - `git clone` the codebase repository and open it in VSCode + - Use the `Dev Containers extension` to `Reopen in Container` (More info is in the `Getting started` included with the extension.) + +You will know your environment has finished loading once you either see a terminal open related to `Running postStartCommand` with a final message of `Done` or the `README.md` is opened in `Preview`. ### Instructions -1. We recommend you start by navigating to the `notebooks` directory. Execute the cells in the notebooks to begin your walk through of the tutorial. +1. We recommend you start by navigating to the `notebooks` directory on the left panel and go through the `tutorial.ipynb` Jupyter notebook. Execute the cells in the notebook to begin your walk through of the tutorial. + +1. Once you are done, see the options available to you in the menu in the bottom-left corner. For example, in codespace you will have an option to `Stop Current Codespace` but when running Dev Container on your own machine the equivalent option is `Reopen folder locally`. By default, GitHub will also automatically stop the Codespace after 30 minutes of inactivity. Once the codespace is no longer being used, we recommend deleting the codespace. From f74df61f8c2b4392fe4ae7a7e0a74cadc665fd8a Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Fri, 19 May 2023 14:30:58 -0500 Subject: [PATCH 59/62] Update requirements.txt Co-authored-by: Kabilar Gunalan --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c69e8fd..e0c9d23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,6 @@ element-animal>=0.1.5 element-lab>=0.2.0 element-session>=0.1.2 element-calcium-imaging>=0.5.5 -element-zstack @ git+https://github.com/kabilar/element-zstack +element-zstack>=0.1.0 intern>=1.4.1 ipykernel>=6.0.1 From 84c1ee18ce8329292ad0240d982b0c4886a44260 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Fri, 19 May 2023 14:53:54 -0500 Subject: [PATCH 60/62] Remove dev container + volume_matching, update notebook + README --- .devcontainer/Dockerfile | 43 -------------- .devcontainer/devcontainer.json | 30 ---------- .devcontainer/docker-compose.yaml | 16 ----- .devcontainer/local-data/devcontainer.json | 30 ---------- .devcontainer/local-data/docker-compose.yaml | 16 ----- .devcontainer/local-test/devcontainer.json | 30 ---------- .devcontainer/local-test/docker-compose.yaml | 15 ----- README.md | 62 +++++++++++--------- notebooks/tutorial.ipynb | 59 ++++++++++++------- workflow_zstack/pipeline.py | 4 +- 10 files changed, 72 insertions(+), 233 deletions(-) delete mode 100644 .devcontainer/Dockerfile delete mode 100644 .devcontainer/devcontainer.json delete mode 100644 .devcontainer/docker-compose.yaml delete mode 100644 .devcontainer/local-data/devcontainer.json delete mode 100644 .devcontainer/local-data/docker-compose.yaml delete mode 100644 .devcontainer/local-test/devcontainer.json delete mode 100644 .devcontainer/local-test/docker-compose.yaml diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 364338f..0000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -FROM python:3.9-slim - -RUN \ - adduser --system --disabled-password --shell /bin/bash vscode && \ - # install docker - apt-get update && \ - apt-get install ca-certificates curl gnupg lsb-release -y && \ - mkdir -m 0755 -p /etc/apt/keyrings && \ - curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg && \ - echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null && \ - apt-get update && \ - apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y && \ - usermod -aG docker vscode && \ - apt-get clean - -RUN \ - # dev setup - apt update && \ - apt-get install sudo git bash-completion graphviz default-mysql-client s3fs procps -y && \ - usermod -aG sudo vscode && \ - echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \ - pip install --no-cache-dir --upgrade black pip && \ - echo '. /etc/bash_completion' >> /home/vscode/.bashrc && \ - echo 'export PS1="\[\e[32;1m\]\u\[\e[m\]@\[\e[34;1m\]\H\[\e[m\]:\[\e[33;1m\]\w\[\e[m\]$ "' >> /home/vscode/.bashrc && \ - # dircolors -b >> /home/vscode/.bashrc && \ # somehow fix colors - apt-get clean -COPY ./requirements.txt /tmp/ -RUN \ - # workflow dependencies - pip install --no-cache-dir -r /tmp/requirements.txt && \ - # clean up - rm /tmp/requirements.txt && \ - apt-get clean - -ENV DJ_HOST fakeservices.datajoint.io -ENV DJ_USER root -ENV DJ_PASS simple - -ENV VOLUME_ROOT_DATA_DIR /workspaces/workflow-zstack/example_data -ENV DATABASE_PREFIX neuro_ - -USER vscode -CMD bash -c "sudo rm /var/run/docker.pid; sudo dockerd" \ No newline at end of file diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index d7b9b0d..0000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "Tutorial environment", - "dockerComposeFile": "docker-compose.yaml", - "service": "app", - "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", - "remoteEnv": { - "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" - }, - "onCreateCommand": "mkdir -p ${VOLUME_ROOT_DATA_DIR} && pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", - "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${VOLUME_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", - "hostRequirements": { - "cpus": 2, - "memory": "16gb", - "storage": "32gb" - }, - "forwardPorts": [ - 3306 - ], - "customizations": { - "settings": { - "python.pythonPath": "/usr/local/bin/python" - }, - "vscode": { - "extensions": [ - "ms-python.python", - "ms-toolsai.jupyter" - ] - } - } -} \ No newline at end of file diff --git a/.devcontainer/docker-compose.yaml b/.devcontainer/docker-compose.yaml deleted file mode 100644 index e8107cd..0000000 --- a/.devcontainer/docker-compose.yaml +++ /dev/null @@ -1,16 +0,0 @@ -version: "3" -services: - app: - extends: - file: ./local-data/docker-compose.yaml - service: app - environment: - - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-calcium-imaging-data-v2 - devices: - - /dev/fuse - cap_add: - - SYS_ADMIN - security_opt: - - apparmor:unconfined -volumes: - docker_data: \ No newline at end of file diff --git a/.devcontainer/local-data/devcontainer.json b/.devcontainer/local-data/devcontainer.json deleted file mode 100644 index 5cc077e..0000000 --- a/.devcontainer/local-data/devcontainer.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "Dev environment with local data", - "dockerComposeFile": "docker-compose.yaml", - "service": "app", - "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", - "remoteEnv": { - "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" - }, - "onCreateCommand": "pip install -e . && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", - "postStartCommand": "docker volume prune -f", - "hostRequirements": { - "cpus": 2, - "memory": "16gb", - "storage": "32gb" - }, - "forwardPorts": [ - 3306 - ], - "customizations": { - "settings": { - "python.pythonPath": "/usr/local/bin/python" - }, - "vscode": { - "extensions": [ - "ms-python.python", - "ms-toolsai.jupyter" - ] - } - } -} \ No newline at end of file diff --git a/.devcontainer/local-data/docker-compose.yaml b/.devcontainer/local-data/docker-compose.yaml deleted file mode 100644 index 80d22c4..0000000 --- a/.devcontainer/local-data/docker-compose.yaml +++ /dev/null @@ -1,16 +0,0 @@ -version: "3" -services: - app: - cpus: 2 - mem_limit: 16g - build: - context: ../.. - dockerfile: ./.devcontainer/Dockerfile - extra_hosts: - - fakeservices.datajoint.io:127.0.0.1 - volumes: - - ../../..:/workspaces:cached - - docker_data:/var/lib/docker # persist docker images - privileged: true # only because of dind -volumes: - docker_data: diff --git a/.devcontainer/local-test/devcontainer.json b/.devcontainer/local-test/devcontainer.json deleted file mode 100644 index 9725463..0000000 --- a/.devcontainer/local-test/devcontainer.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "Dev environment for local pytests with remote data", - "dockerComposeFile": "docker-compose.yaml", - "service": "app", - "workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}", - "remoteEnv": { - "LOCAL_WORKSPACE_FOLDER": "${localWorkspaceFolder}" - }, - "onCreateCommand": "pip install -r ./requirements_dev.txt && pip install -e . && pip install -e ../element-zstack && MYSQL_VER=8.0 docker compose down && MYSQL_VER=8.0 docker compose up --build --wait", - "postStartCommand": "docker volume prune -f && s3fs ${DJ_PUBLIC_S3_LOCATION} ${VOLUME_ROOT_DATA_DIR} -o nonempty,multipart_size=530,endpoint=us-east-1,url=http://s3.amazonaws.com,public_bucket=1", - "hostRequirements": { - "cpus": 2, - "memory": "16gb", - "storage": "32gb" - }, - "forwardPorts": [ - 3306 - ], - "customizations": { - "settings": { - "python.pythonPath": "/usr/local/bin/python" - }, - "vscode": { - "extensions": [ - "ms-python.python", - "ms-toolsai.jupyter" - ] - } - } -} \ No newline at end of file diff --git a/.devcontainer/local-test/docker-compose.yaml b/.devcontainer/local-test/docker-compose.yaml deleted file mode 100644 index add45d0..0000000 --- a/.devcontainer/local-test/docker-compose.yaml +++ /dev/null @@ -1,15 +0,0 @@ -version: "3" -services: - app: - cpus: 2 - mem_limit: 16g - environment: - - DJ_PUBLIC_S3_LOCATION=djhub.vathes.datapub.elements:/workflow-calcium-imaging-data-v2 - build: - context: ../.. - dockerfile: ./.devcontainer/Dockerfile - extra_hosts: - - fakeservices.datajoint.io:127.0.0.1 - volumes: - - ../../..:/workspaces:cached - privileged: true # only because of dind \ No newline at end of file diff --git a/README.md b/README.md index 32cb857..072f73d 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # DataJoint Workflow for ZStack Imaging -The DataJoint Workflow for ZStack Imaging combines five DataJoint Elements for cell -segmentation, volume registration, and cell matching - Elements Lab, -Animal, Session, Calcium Imaging, and ZStack. DataJoint Elements collectively standardize and automate data collection and -analysis for neuroscience experiments. Each Element is a modular pipeline for data +The DataJoint Workflow for ZStack Imaging combines five DataJoint Elements for +volume cell segmentation - Elements Lab, Animal, Session, Calcium Imaging, and +ZStack. DataJoint Elements collectively standardize and automate data collection +and analysis for neuroscience experiments. Each Element is a modular pipeline for data storage and processing with corresponding database tables that can be combined with -other Elements to assemble a fully functional pipeline. This repository also provides +other Elements to assemble a fully functional pipeline. This repository also provides a tutorial environment and notebook to learn the pipeline. ## Experiment Flowchart @@ -18,7 +18,7 @@ a tutorial environment and notebook to learn the pipeline. ## Getting Started -+ [Interactive tutorial on GitHub Codespaces](#interactive-tutorial) ++ [Interactive tutorial](#interactive-tutorial) + Install Element ZStack from PyPI @@ -34,31 +34,35 @@ a tutorial environment and notebook to learn the pipeline. ## Interactive Tutorial -+ The easiest way to learn about DataJoint Elements is to use the tutorial notebook within the included interactive environment configured using [Dev Container](https://containers.dev/). - ### Launch Environment -Here are some options that provide a great experience: - -- Cloud-based Environment (*recommended*) - - Launch using [GitHub Codespaces](https://github.com/features/codespaces) using the `+` option which will `Create codespace on main` in the codebase repository on your fork with default options. For more control, see the `...` where you may create `New with options...`. - - Build time for a codespace is several minutes. This is done infrequently and cached for convenience. - - Start time for a codespace is less than 1 minute. This will pull the built codespace from cache when you need it. - - *Tip*: Each month, GitHub renews a [free-tier](https://docs.github.com/en/billing/managing-billing-for-github-codespaces/about-billing-for-github-codespaces#monthly-included-storage-and-core-hours-for-personal-accounts) quota of compute and storage. Typically we run into the storage limits before anything else since codespaces consume storage while stopped. It is best to delete Codespaces when not actively in use and recreate when needed. We'll soon be creating prebuilds to avoid larger build times. Once any portion of your quota is reached, you will need to wait for it to be reset at the end of your cycle or add billing info to your GitHub account to handle overages. - - *Tip*: GitHub auto names the codespace but you can rename the codespace so that it is easier to identify later. - -- Local Environment - - Install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) - - Install [Docker](https://docs.docker.com/get-docker/) - - Install [VSCode](https://code.visualstudio.com/) - - Install the VSCode [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) - - `git clone` the codebase repository and open it in VSCode - - Use the `Dev Containers extension` to `Reopen in Container` (More info is in the `Getting started` included with the extension.) - -You will know your environment has finished loading once you either see a terminal open related to `Running postStartCommand` with a final message of `Done` or the `README.md` is opened in `Preview`. ++ Local Environment + + Install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) + + Install [VSCode](https://code.visualstudio.com/) + + Install [Conda](https://docs.conda.io/en/latest/miniconda.html) + + Configure a database. See [here](https://tutorials.datajoint.org/setting-up/local-database.html) for details. + + `git clone` the code repository and open it in VSCode + + Install the repository with `pip install -e .` + + Setup a `dj_local_conf.json` with the `database.prefix` and `volume_root_data_dir`. See [User Guide](https://datajoint.com/docs/elements/user-guide/) for details. + + Add your example data to the `volume_root_data_dir`. ### Instructions -1. We recommend you start by navigating to the `notebooks` directory on the left panel and go through the `tutorial.ipynb` Jupyter notebook. Execute the cells in the notebook to begin your walk through of the tutorial. - -1. Once you are done, see the options available to you in the menu in the bottom-left corner. For example, in codespace you will have an option to `Stop Current Codespace` but when running Dev Container on your own machine the equivalent option is `Reopen folder locally`. By default, GitHub will also automatically stop the Codespace after 30 minutes of inactivity. Once the codespace is no longer being used, we recommend deleting the codespace. +1. To upload data to BossDB, [create an account](https://api.bossdb.io) to + access the BossDB API and generate an API token. Please contact the team at [BossDB](https://bossdb.org) + to ensure you have `resource-manager` permissions for your account. +2. Follow the instructions below to set up the + [intern](https://github.com/jhuapl-boss/intern) REST API locally. + + Create a new folder `.intern` in your root directory. + + Create a configuration file `intern.cfg` within the `.intern` folder. The + path to this file should be `~/.intern/intern.cfg`. + + The `intern.cfg` file should contain the following exactly as shown below: + ```bash + # ~/.intern/intern.cfg + [Default] + protocol = https + host = api.bossdb.io + token = + ``` +3. Use the instructions above to set up a local environment. +4. Navigate to the `notebooks` directory. Execute the cells in the notebooks to begin your walk through of the tutorial. \ No newline at end of file diff --git a/notebooks/tutorial.ipynb b/notebooks/tutorial.ipynb index 6579f9b..171f504 100644 --- a/notebooks/tutorial.ipynb +++ b/notebooks/tutorial.ipynb @@ -12,14 +12,29 @@ "This notebook will walk through processing volumetric two-photon calcium imaging\n", "data with the DataJoint Workflow for volumetric image processing. The workflow\n", "currently supports volumetric data collected\n", - "from ScanImage. \n", + "from `ScanImage`. \n", "\n", - "**Please note that uploading data to BossDB via this pipeline requires an API\n", - "token which can be obtained by creating an account at\n", + "**Please note that uploading data to BossDB via this pipeline requires the\n", + "following:**\n", + "+ An API token which can be obtained by creating an account at\n", "[api.bossdb.io](https://api.bossdb.io). You will also need resource manager\n", - "permissions from the team at [BossDB](https://bossdb.org).**\n", - "\n", - "We will explain the following concepts as they relate to this workflow:\n", + "permissions from the team at [BossDB](https://bossdb.org).\n", + "+ A local setup of the [intern](https://github.com/jhuapl-boss/intern) REST API\n", + " using the instructions below:\n", + " + Create a new folder `.intern` in your root directory.\n", + " + Create a configuration file `intern.cfg` within the `.intern` folder. The\n", + " path to this file should be `~/.intern/intern.cfg`. \n", + " + The `intern.cfg` file should contain the following exactly as shown below:\n", + " ```bash\n", + " # ~/.intern/intern.cfg\n", + " [Default]\n", + " protocol = https\n", + " host = api.bossdb.io\n", + " token = \n", + " ```\n", + "\n", + "\n", + "In this notebook, we will explain the following concepts as they relate to this workflow:\n", "- What is an Element versus a Workflow?\n", "- Plot the workflow with `dj.Diagram`\n", "- Insert data into tables\n", @@ -46,6 +61,12 @@ "metadata": {}, "outputs": [], "source": [ + "import os\n", + "\n", + "if os.path.basename(os.getcwd()) == \"notebooks\":\n", + " os.chdir(\"..\")\n", + "\n", + "\n", "import datajoint as dj\n", "import datetime" ] @@ -114,7 +135,6 @@ " + dj.Diagram(session.Session)\n", " + dj.Diagram(scan.Scan)\n", " + dj.Diagram(volume)\n", - " + dj.Diagram(volume_matching)\n", " + dj.Diagram(bossdb)\n", ")" ] @@ -476,7 +496,7 @@ " segmentation_method=\"cellpose\",\n", " paramset_idx=1,\n", " params=dict(\n", - " diameter=8,\n", + " diameter=None,\n", " min_size=2,\n", " do_3d=False,\n", " anisotropy=0.5,\n", @@ -604,8 +624,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's define an `upload_key` to easily upload the volume to BossDB via this\n", - "workflow. The `upload_key` combines information about the current scan from\n", + "Let's define an `upload_key` to automatically upload the volume to BossDB via the\n", + "`bossdb` schema. The `upload_key` combines information about the current scan from\n", "`scan.Scan` and the `paramset_idx` from `SegmentationParamSet`." ] }, @@ -623,11 +643,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Finally, we can upload the volume and its corresponding segmentation data to\n", - "BossDB and generate a neuroglancer link to visualize the data ..\n", - "\n", - "The first table is `VolumeUploadTask`. Let's define the upload task by naming the collection, experiment,\n", - "and channel where the data should be uploaded. " + "The first table in this schema is `VolumeUploadTask`. Let's define the upload task by naming the collection, experiment,\n", + "and channel where the data should be uploaded." ] }, { @@ -654,15 +671,15 @@ "metadata": {}, "outputs": [], "source": [ - "col_name = \"dataJointTestUpload\"\n", - "exp_name = \"CaImagingFinal\"\n", - "chn_name = \"test1-seg\"\n", + "collection = \"dataJointTestUpload\"\n", + "experiment = \"CalciumImaging\"\n", + "channel = \"volume-image\"\n", "bossdb.VolumeUploadTask.insert1(\n", " dict(\n", " upload_key,\n", - " collection_name=col_name,\n", - " experiment_name=exp_name,\n", - " channel_name=chn_name,\n", + " collection_name=collection,\n", + " experiment_name=experiment,\n", + " channel_name=channel,\n", " )\n", ")" ] diff --git a/workflow_zstack/pipeline.py b/workflow_zstack/pipeline.py index a7a4aac..991828a 100644 --- a/workflow_zstack/pipeline.py +++ b/workflow_zstack/pipeline.py @@ -5,7 +5,7 @@ from element_animal.subject import Subject from element_session import session_with_id as session from element_calcium_imaging import imaging, scan -from element_zstack import volume, volume_matching, bossdb +from element_zstack import volume, bossdb from . import db_prefix from .paths import get_volume_root_data_dir, get_volume_tif_file @@ -20,7 +20,6 @@ "subject", "surgery", "volume", - "volume_matching", "bossdb", "Device", "get_volume_root_data_dir", @@ -44,5 +43,4 @@ Mask = imaging.Segmentation.Mask Scan = scan.Scan volume.activate(db_prefix + "volume", linking_module=__name__) -volume_matching.activate(db_prefix + "volume_matching") bossdb.activate(db_prefix + "bossdb", linking_module=__name__) From 521f63cce4341e5a5722b545263fa12fa7a8ffcd Mon Sep 17 00:00:00 2001 From: Kushal Bakshi <52367253+kushalbakshi@users.noreply.github.com> Date: Fri, 19 May 2023 14:56:48 -0500 Subject: [PATCH 61/62] Update tests/test_export.py Co-authored-by: Kabilar Gunalan --- tests/test_export.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_export.py b/tests/test_export.py index 3db840c..a52e805 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -62,11 +62,13 @@ def test_export(pipeline): skip_duplicates=True, ), ) - volume.SegmentationTask.update1(dict( + volume.SegmentationTask.insert1(dict( key, paramset_idx=1, - task_mode="load", - )) + task_mode="trigger", + ), + skip_duplicates=True, + ) segmentation_key = (volume.SegmentationTask & "subject='subject1'").fetch1("KEY") volume.Segmentation.populate(segmentation_key) volume.VoxelSize.insert1( From ad4e20633f44f90fd23a72468473095a1f769c96 Mon Sep 17 00:00:00 2001 From: kushalbakshi Date: Fri, 19 May 2023 15:10:55 -0500 Subject: [PATCH 62/62] Remove volume_matching from tests --- tests/conftest.py | 1 - tests/test_export.py | 8 ++++---- tests/test_pipeline_generation.py | 5 ----- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index cabe1de..2e15ba6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,7 +42,6 @@ def pipeline(): "session": pipeline.session, "scan": pipeline.scan, "volume": pipeline.volume, - "volume_matching": pipeline.volume_matching, "bossdb": pipeline.bossdb, } diff --git a/tests/test_export.py b/tests/test_export.py index a52e805..096886e 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -50,9 +50,9 @@ def test_export(pipeline): key = (volume.Volume & "subject='subject1'").fetch1("KEY") volume.SegmentationParamSet.insert_new_params( segmentation_method="cellpose", - paramset_idx=1, + paramset_idx=2, params=dict( - diameter=8, + diameter=None, min_size=2, do_3d=False, anisotropy=0.5, @@ -64,7 +64,7 @@ def test_export(pipeline): ) volume.SegmentationTask.insert1(dict( key, - paramset_idx=1, + paramset_idx=2, task_mode="trigger", ), skip_duplicates=True, @@ -92,4 +92,4 @@ def test_export(pipeline): ), skip_duplicates=True ) - bossdb.VolumeUpload.populate(upload_key) \ No newline at end of file + bossdb.VolumeUpload.populate(segmentation_key) \ No newline at end of file diff --git a/tests/test_pipeline_generation.py b/tests/test_pipeline_generation.py index 7bdef77..bedf5e1 100644 --- a/tests/test_pipeline_generation.py +++ b/tests/test_pipeline_generation.py @@ -3,7 +3,6 @@ def test_generate_pipeline(pipeline): session = pipeline["session"] scan = pipeline["scan"] volume = pipeline["volume"] - volume_matching = pipeline["volume_matching"] bossdb = pipeline["bossdb"] # Test connection from Subject to Session @@ -21,8 +20,4 @@ def test_generate_pipeline(pipeline): ] ) - assert "confidence" in ( - volume_matching.VolumeMatch.VolumeMask.heading.secondary_attributes - ) - assert "web_address" in (bossdb.VolumeUpload.WebAddress.heading.secondary_attributes)