Skip to content

Commit

Permalink
Merge pull request #1327 from mvdbeek/mount_test_data_dir_ro
Browse files Browse the repository at this point in the history
Mount test data dir read-only in docker
  • Loading branch information
jmchilton committed Nov 29, 2022
2 parents baa27f0 + 9951a6b commit 50d88e4
Show file tree
Hide file tree
Showing 18 changed files with 585 additions and 24 deletions.
3 changes: 0 additions & 3 deletions planemo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@ def callback(ctx, param, value):
assert option_source is not None
assert result is not VALUE_UNSET

if resolve_path and result is not None:
result = os.path.abspath(result)

planemo_ctx.set_option_source(param_name, option_source)
return result

Expand Down
63 changes: 47 additions & 16 deletions planemo/galaxy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
NamedTemporaryFile,
)
from typing import (
Any,
Dict,
Iterable,
List,
Optional,
Set,
Expand Down Expand Up @@ -118,6 +121,7 @@
<param id="docker_sudo">${docker_sudo}</param>
<param id="docker_sudo_cmd">${docker_sudo_cmd}</param>
<param id="docker_cmd">${docker_cmd}</param>
<param id="docker_volumes">${docker_volumes}</param>
${docker_host_param}
</destination>
<destination id="upload_dest" runner="planemo_runner">
Expand Down Expand Up @@ -194,9 +198,20 @@ def read_log(ctx, log_path, e: threading.Event):
log_fh.close()


def simple_docker_volume(path):
path = os.path.abspath(path)
return DockerVolume(f"{path}:{path}:rw")
def create_docker_volumes(paths: Iterable[str]) -> Iterable[DockerVolume]:
"""
Creates string of the format "host_path:target_path:mode" and deduplicates overlapping mounts.
"""
docker_volumes: Dict[str, DockerVolume] = {}
for path in paths:
docker_volume = DockerVolume.from_str(path)
if docker_volume.path in docker_volumes:
# volume has been specified already, make sure we use "rw" if any of the modes are "rw"
if docker_volume.mode == "rw" or docker_volumes[docker_volume.path].mode == "rw":
docker_volumes[docker_volume.path].mode = "rw"
else:
docker_volumes[docker_volume.path] = docker_volume
return docker_volumes.values()


@contextlib.contextmanager
Expand All @@ -223,11 +238,9 @@ def config_join(*args):
if os.path.exists(directory):
tool_directories.add(directory)

# TODO: remap these.
tool_volumes = []
volumes = []
for tool_directory in tool_directories:
volume = simple_docker_volume(tool_directory)
tool_volumes.append(volume)
volumes.append(tool_directory)

empty_tool_conf = config_join("empty_tool_conf.xml")

Expand Down Expand Up @@ -272,15 +285,15 @@ def config_join(*args):
# TODO: setup FTP upload dir and disable FTP server in container.

docker_target_kwds = docker_host_args(**kwds)
volumes = tool_volumes + [simple_docker_volume(config_directory)]
volumes.append(config_directory)
export_directory = kwds.get("export_directory", None)
if export_directory is not None:
volumes.append(DockerVolume(f"{export_directory}:/export:rw"))
volumes.append(f"{export_directory}:/export:rw")

# TODO: Allow this to real Docker volumes and allow multiple.
extra_volume = kwds.get("docker_extra_volume")
if extra_volume:
volumes.append(simple_docker_volume(extra_volume))
extra_volumes = kwds.get("docker_extra_volume") or []
volumes.extend(extra_volumes)
docker_volumes = create_docker_volumes(volumes)
yield DockerGalaxyConfig(
ctx,
config_directory,
Expand All @@ -291,7 +304,7 @@ def config_join(*args):
master_api_key,
runnables,
docker_target_kwds=docker_target_kwds,
volumes=volumes,
volumes=docker_volumes,
export_directory=export_directory,
kwds=kwds,
)
Expand Down Expand Up @@ -346,7 +359,8 @@ def config_join(*args):
log_file = f"{server_name}.log"
pid_file = f"{server_name}.pid"
ensure_dependency_resolvers_conf_configured(ctx, kwds, os.path.join(config_directory, "resolvers_conf.xml"))
_handle_job_config_file(config_directory, server_name, kwds)
all_tool_paths = _all_tool_paths(runnables, galaxy_root=galaxy_root, extra_tools=kwds.get("extra_tools"))
_handle_job_config_file(config_directory, server_name, test_data_dir, all_tool_paths, kwds)
_handle_job_metrics(config_directory, kwds)
_handle_refgenie_config(config_directory, galaxy_root, kwds)
file_path = kwds.get("file_path") or config_join("files")
Expand All @@ -356,7 +370,6 @@ def config_join(*args):
_ensure_directory(tool_dependency_dir)

shed_tool_conf = kwds.get("shed_tool_conf") or config_join("shed_tools_conf.xml")
all_tool_paths = _all_tool_paths(runnables, galaxy_root=galaxy_root, extra_tools=kwds.get("extra_tools"))
empty_tool_conf = config_join("empty_tool_conf.xml")

tool_conf = config_join("tool_conf.xml")
Expand Down Expand Up @@ -1289,7 +1302,13 @@ def _build_env_for_galaxy(properties, template_args):
return env


def _handle_job_config_file(config_directory, server_name, kwds):
def _handle_job_config_file(
config_directory: str,
server_name: str,
test_data_dir: Optional[str],
all_tool_paths: Set[str],
kwds: Dict[str, Any],
):
job_config_file = kwds.get("job_config_file", None)
if not job_config_file:
template_str = JOB_CONFIG_LOCAL
Expand All @@ -1303,6 +1322,17 @@ def _handle_job_config_file(config_directory, server_name, kwds):
if docker_host:
docker_host_param = f"""<param id="docker_host">{docker_host}</param>"""

volumes = list(kwds.get("docker_extra_volume") or [])
if test_data_dir:
volumes.append(f"{test_data_dir}:ro")

docker_volumes_str = "$defaults"
if volumes:
# exclude tool directories, these are mounted :ro by $defaults
all_tool_dirs = {os.path.dirname(tool_path) for tool_path in all_tool_paths}
extra_volumes_str = ",".join(str(v) for v in create_docker_volumes(volumes) if v.path not in all_tool_dirs)
docker_volumes_str = f"{docker_volumes_str},{extra_volumes_str}"

conf_contents = Template(template_str).safe_substitute(
{
"server_name": server_name,
Expand All @@ -1312,6 +1342,7 @@ def _handle_job_config_file(config_directory, server_name, kwds):
"docker_sudo_cmd": str(kwds.get("docker_sudo_cmd", docker_util.DEFAULT_SUDO_COMMAND)),
"docker_cmd": str(kwds.get("docker_cmd", docker_util.DEFAULT_DOCKER_COMMAND)),
"docker_host_param": docker_host_param,
"docker_volumes": docker_volumes_str,
}
)
write_file(job_config_file, conf_contents)
Expand Down
5 changes: 3 additions & 2 deletions planemo/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def galaxy_python_version():
"--galaxy_python_version",
use_global_config=True,
default=None,
type=click.Choice(["3", "3.7", "3.8", "3.9"]),
type=click.Choice(["3", "3.7", "3.8", "3.9", "3.10", "3.11"]),
help="Python version to start Galaxy under",
)

Expand Down Expand Up @@ -463,7 +463,8 @@ def docker_extra_volume_option():
type=arg_type,
default=None,
use_global_config=True,
help=("Extra path to mount if --engine docker."),
multiple=True,
help=("Extra path to mount if --engine docker or `--biocontainers` or `--docker`."),
)


Expand Down
130 changes: 130 additions & 0 deletions tests/data/data_manager/bowtie2_index_builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env python
# Dan Blankenberg
from __future__ import print_function

import json
import optparse
import os
import subprocess
import sys

DEFAULT_DATA_TABLE_NAMES = ["bowtie2_indexes"]


def get_id_name(params, dbkey, fasta_description=None):
# TODO: ensure sequence_id is unique and does not already appear in location file
sequence_id = params["param_dict"]["sequence_id"]
if not sequence_id:
sequence_id = dbkey

sequence_name = params["param_dict"]["sequence_name"]
if not sequence_name:
sequence_name = fasta_description
if not sequence_name:
sequence_name = dbkey
return sequence_id, sequence_name


def build_bowtie2_index(
data_manager_dict,
fasta_filename,
params,
target_directory,
dbkey,
sequence_id,
sequence_name,
data_table_names=DEFAULT_DATA_TABLE_NAMES,
):
# TODO: allow multiple FASTA input files
fasta_base_name = os.path.split(fasta_filename)[-1]
sym_linked_fasta_filename = os.path.join(target_directory, fasta_base_name)
os.symlink(fasta_filename, sym_linked_fasta_filename)
args = ["bowtie2-build", sym_linked_fasta_filename, sequence_id]
threads = os.environ.get("GALAXY_SLOTS")
if threads:
args.extend(["--threads", threads])
proc = subprocess.Popen(args=args, shell=False, cwd=target_directory)
return_code = proc.wait()
if return_code:
print("Error building index.", file=sys.stderr)
sys.exit(return_code)
data_table_entry = dict(value=sequence_id, dbkey=dbkey, name=sequence_name, path=sequence_id)
for data_table_name in data_table_names:
_add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)


def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
data_manager_dict["data_tables"] = data_manager_dict.get("data_tables", {})
data_manager_dict["data_tables"][data_table_name] = data_manager_dict["data_tables"].get(data_table_name, [])
data_manager_dict["data_tables"][data_table_name].append(data_table_entry)
return data_manager_dict


def main():
parser = optparse.OptionParser()
parser.add_option(
"-f",
"--fasta_filename",
dest="fasta_filename",
action="store",
type="string",
default=None,
help="fasta_filename",
)
parser.add_option(
"-d", "--fasta_dbkey", dest="fasta_dbkey", action="store", type="string", default=None, help="fasta_dbkey"
)
parser.add_option(
"-t",
"--fasta_description",
dest="fasta_description",
action="store",
type="string",
default=None,
help="fasta_description",
)
parser.add_option(
"-n",
"--data_table_name",
dest="data_table_name",
action="append",
type="string",
default=None,
help="data_table_name",
)
(options, args) = parser.parse_args()

filename = args[0]

with open(filename) as fh:
params = json.load(fh)
target_directory = params["output_data"][0]["extra_files_path"]
os.mkdir(target_directory)
data_manager_dict = {}

dbkey = options.fasta_dbkey

if dbkey in [None, "", "?"]:
raise Exception('"%s" is not a valid dbkey. You must specify a valid dbkey.' % (dbkey))

sequence_id, sequence_name = get_id_name(params, dbkey=dbkey, fasta_description=options.fasta_description)

# build the index
build_bowtie2_index(
data_manager_dict,
options.fasta_filename,
params,
target_directory,
dbkey,
sequence_id,
sequence_name,
data_table_names=options.data_table_name or DEFAULT_DATA_TABLE_NAMES,
)

# save info to json file
with open(filename, "w") as fh:
json.dump(data_manager_dict, fh, sort_keys=True)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<tool id="bowtie2_index_builder_data_manager" name="Bowtie2 index" tool_type="manage_data" version="@WRAPPER_VERSION@+galaxy0" profile="18.09">
<description>builder</description>
<requirements>
<requirement type="package" version="@WRAPPER_VERSION@">bowtie2</requirement>
</requirements>
<macros>
<token name="@WRAPPER_VERSION@">2.4.4</token>
</macros>
<command detect_errors="exit_code"><![CDATA[
python '$__tool_directory__/bowtie2_index_builder.py'
'${out_file}'
--fasta_filename '${all_fasta_source.fields.path}'
--fasta_dbkey '${all_fasta_source.fields.dbkey}'
--fasta_description '${all_fasta_source.fields.name}'
--data_table_name bowtie2_indexes ${tophat2}
]]></command>
<inputs>
<param name="all_fasta_source" type="select" label="Source FASTA Sequence">
<options from_data_table="all_fasta"/>
</param>
<param name="sequence_name" type="text" value="" label="Name of sequence" />
<param name="sequence_id" type="text" value="" label="ID for sequence" />
<param name="tophat2" type="boolean" truevalue="--data_table_name tophat2_indexes" falsevalue="" checked="True" label="Also make available for TopHat" help="Adds values to tophat2_indexes tool data table" />
</inputs>
<outputs>
<data name="out_file" format="data_manager_json"/>
</outputs>
<tests>
<test>
<param name="all_fasta_source" value="phiX174"/>
<output name="out_file" value="bowtie2_data_manager.json"/>
</test>
</tests>

<help>
.. class:: infomark

**Notice:** If you leave name, description, or id blank, it will be generated automatically.
</help>
</tool>
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<?xml version="1.0"?>
<data_managers>

<data_manager tool_file="data_manager/bowtie2_index_builder.xml" id="bowtie2_index_builder" version="2.2.6">
<data_table name="bowtie2_indexes">
<output>
<column name="value" />
<column name="dbkey" />
<column name="name" />
<column name="path" output_ref="out_file" >
<move type="directory" relativize_symlinks="True">
<!-- <source>${path}</source>--> <!-- out_file.extra_files_path is used as base by default --> <!-- if no source, eg for type=directory, then refers to base -->
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/bowtie2_index/${value}</target>
</move>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bowtie2_index/${value}/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
</output>
</data_table>

<data_table name="tophat2_indexes">
<output>
<column name="value" />
<column name="dbkey" />
<column name="name" />
<column name="path" output_ref="out_file" >
<!-- no move, always happens as part of bowtie2 and uses that path -->
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bowtie2_index/${value}/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
</output>
</data_table>
</data_manager>

</data_managers>

0 comments on commit 50d88e4

Please sign in to comment.